# CORRELATION IN PYTHON:NUMPY (CORRCOEF)

In [None]:
import numpy as np

np.random.seed(1)

# 1000 random integers between 0 and 50
x = np.random.randint(0, 50, 1000)

# Positive Correlation with some noise
y = x + np.random.normal(0, 10, 1000)

np.corrcoef(x, y)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.style.use('ggplot')

plt.scatter(x, y)
plt.show()

# Negative Correlation

In [None]:
# 1000 random integers between 0 and 50
x = np.random.randint(0, 50, 1000)

# Negative Correlation with some noise
y = 100 - x + np.random.normal(0, 5, 1000)

np.corrcoef(x, y)

In [None]:
plt.scatter(x, y)
plt.show()

# No-correlation(Neutral-Correlation)

In [None]:
x = np.random.randint(0, 50, 1000)
y = np.random.randint(0, 50, 1000)

np.corrcoef(x, y)

In [None]:
plt.scatter(x, y)
plt.show()

# CORRELATION

In [None]:
import numpy as np
import scipy.stats
x = np.arange(10, 20)
y = np.array([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])
scipy.stats.pearsonr(x, y)    


In [None]:
scipy.stats.spearmanr(x, y)   


In [None]:
scipy.stats.kendalltau(x, y)

# PANDAS IMPLEMENTATION :CORRELATION

In [None]:
import pandas as pd
x = pd.Series(range(10, 20))
x

In [None]:
y = pd.Series([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])
y

In [None]:
x.corr(y)                     # Pearson's r
y.corr(x)

In [None]:
x.corr(y, method='spearman')  # Spearman's rho

In [None]:
x.corr(y, method='kendall')   # Kendall's tau

# LINEAR REGRESSION

In [None]:
import numpy as np
import scipy.stats
x = np.arange(10, 20)
y = np.array([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])

Here, you import numpy and scipy.stats and define the variables x and y.

In [None]:
result = scipy.stats.linregress(x, y)
result.slope


In [None]:
result.intercept



In [None]:
result.rvalue


In [None]:
result.pvalue


In [None]:
result.stderr

# PEARSON CORRELATION :PANDAS

In [None]:
import pandas as pd
>>> x = pd.Series(range(10, 20))
>>> x

In [None]:
y = pd.Series([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])
>>> y

In [None]:
z = pd.Series([5, 3, 2, 1, 0, -2, -8, -11, -15, -16])
>>> z

In [None]:
xy = pd.DataFrame({'x-values': x, 'y-values': y})
>>> xy

In [None]:
xyz = pd.DataFrame({'x-values': x, 'y-values': y, 'z-values': z})
>>> xyz

In [None]:
corr_matrix = xy.corr()
>>> corr_matrix

# VISUALIZATION OF CORRELATION

In [None]:
import matplotlib.pyplot as plt
>>> plt.style.use('ggplot')

In [None]:
import numpy as np
import scipy.stats
x = np.arange(10, 20)
y = np.array([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])
z = np.array([5, 3, 2, 1, 0, -2, -8, -11, -15, -16])
xyz = np.array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
[2, 1, 4, 5, 8, 12, 18, 25, 96, 48],
 [5, 3, 2, 1, 0, -2, -8, -11, -15, -16]])

# X-Y Plots With a Regression Line

In [None]:
slope, intercept, r, p, stderr = scipy.stats.linregress(x, y)

In [None]:
line = f'Regression line: y={intercept:.2f}+{slope:.2f}x, r={r:.2f}'
line

In [None]:
fig, ax = plt.subplots()
ax.plot(x, y, linewidth=0, marker='s', label='Data points')
ax.plot(x, intercept + slope * x, label=line)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.legend(facecolor='white')
plt.show()

# HEATMAPS OF CORRELATION

In [None]:
corr_matrix = np.corrcoef(xyz).round(decimals=2)
corr_matrix

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(corr_matrix)
im.set_clim(-1, 1)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1, 2), ticklabels=('x', 'y', 'z'))
ax.yaxis.set(ticks=(0, 1, 2), ticklabels=('x', 'y', 'z'))
ax.set_ylim(2.5, -0.5)
for i in range(3):
    for j in range(3):
        ax.text(j, i, corr_matrix[i, j], ha='center', va='center',
                color='r')
cbar = ax.figure.colorbar(im, ax=ax, format='% .2f')
plt.show()

# LINEAR REGRESSION

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [None]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])

In [None]:
print(x)

In [None]:
print(y)

In [None]:
model = LinearRegression()

In [None]:
model.fit(x, y)

In [None]:
model = LinearRegression().fit(x, y)

In [None]:
r_sq = model.score(x, y)
>>> print('coefficient of determination:', r_sq)

In [None]:
print('intercept:', model.intercept_)

print('slope:', model.coef_)

In [None]:
new_model = LinearRegression().fit(x, y.reshape((-1, 1)))
print('intercept:', new_model.intercept_)

print('slope:', new_model.coef_)


In [None]:
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

In [None]:
x_new = np.arange(5).reshape((-1, 1))
print(x_new)

# MULTIPLE LINEAR REGRESSION

In [None]:
import pandas as pd

Stock_Market = {'Year': [2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016],
                'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
                'Interest_Rate': [2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
                'Unemployment_Rate': [5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
                'Stock_Index_Price': [1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1075,1047,965,943,958,971,949,884,866,876,822,704,719]        
                }

df = pd.DataFrame(Stock_Market,columns=['Year','Month','Interest_Rate','Unemployment_Rate','Stock_Index_Price']) 

print (df)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
  
Stock_Market = {'Year': [2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016],
                'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
                'Interest_Rate': [2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
                'Unemployment_Rate': [5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
                'Stock_Index_Price': [1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1075,1047,965,943,958,971,949,884,866,876,822,704,719]        
                }
 
df = pd.DataFrame(Stock_Market,columns=['Year','Month','Interest_Rate','Unemployment_Rate','Stock_Index_Price'])
 
plt.scatter(df['Interest_Rate'], df['Stock_Index_Price'], color='red')
plt.title('Stock Index Price Vs Interest Rate', fontsize=14)
plt.xlabel('Interest Rate', fontsize=14)
plt.ylabel('Stock Index Price', fontsize=14)
plt.grid(True)
plt.show()

Specifically, when interest rates go up, the stock index price also goes up.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
  
Stock_Market = {'Year': [2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016],
                'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
                'Interest_Rate': [2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
                'Unemployment_Rate': [5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
                'Stock_Index_Price': [1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1075,1047,965,943,958,971,949,884,866,876,822,704,719]        
                }
 
df = pd.DataFrame(Stock_Market,columns=['Year','Month','Interest_Rate','Unemployment_Rate','Stock_Index_Price'])
 
plt.scatter(df['Unemployment_Rate'], df['Stock_Index_Price'], color='green')
plt.title('Stock Index Price Vs Unemployment Rate', fontsize=14)
plt.xlabel('Unemployment Rate', fontsize=14)
plt.ylabel('Stock Index Price', fontsize=14)
plt.grid(True)
plt.show()

a linear relationship also exists between the Stock_Index_Price and the Unemployment_Rate – when the unemployment rates go up, the stock index price goes down .Here we still have a linear relationship, but with a negative slope