### Multiple linear Regression

In [1]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np 
%matplotlib inline

In [2]:
df_index=pd.read_csv('economic_index.csv')

In [3]:
df_index.head()

Unnamed: 0.1,Unnamed: 0,year,month,interest_rate,unemployment_rate,index_price
0,0,2017,12,2.75,5.3,1464
1,1,2017,11,2.5,5.3,1394
2,2,2017,10,2.5,5.3,1357
3,3,2017,9,2.5,5.3,1293
4,4,2017,8,2.5,5.4,1256


In [4]:
# drop unnecessary columns
df_index.drop(columns=["Unnamed: 0","year","month"],axis=1,inplace=True)

In [5]:
df_index

Unnamed: 0,interest_rate,unemployment_rate,index_price
0,2.75,5.3,1464
1,2.5,5.3,1394
2,2.5,5.3,1357
3,2.5,5.3,1293
4,2.5,5.4,1256
5,2.5,5.6,1254
6,2.5,5.5,1234
7,2.25,5.5,1195
8,2.25,5.5,1159
9,2.25,5.6,1167


In [6]:
## to check null value
df_index.isnull().sum()


interest_rate        0
unemployment_rate    0
index_price          0
dtype: int64

In [7]:
## Visualization
import seaborn as sns
sns.pairplot(df_index)

<seaborn.axisgrid.PairGrid at 0x2013e6eb7d0>

In [8]:
#correlation
df_index.corr()

Unnamed: 0,interest_rate,unemployment_rate,index_price
interest_rate,1.0,-0.925814,0.935793
unemployment_rate,-0.925814,1.0,-0.922338
index_price,0.935793,-0.922338,1.0


In [9]:
#Visualize more colsely
plt.scatter(df_index['interest_rate'],df_index['unemployment_rate'],color="r")
plt.xlabel('Interest')
plt.ylabel('Unemployment')

Text(0, 0.5, 'Unemployment')

In [10]:
## Independent and dependent features
X=df_index.iloc[:,:-1]
Y=df_index.iloc[:,-1]



In [11]:
X.head()

Unnamed: 0,interest_rate,unemployment_rate
0,2.75,5.3
1,2.5,5.3
2,2.5,5.3
3,2.5,5.3
4,2.5,5.4


In [12]:
Y.head()

0    1464
1    1394
2    1357
3    1293
4    1256
Name: index_price, dtype: int64

In [13]:
## Train Test Split
from sklearn.model_selection import train_test_split


In [14]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=42)

In [15]:
X_train.shape

(18, 2)

In [16]:
import seaborn as sns

In [17]:
sns.regplot(df_index['interest_rate'],df_index['index_price']) # not working

TypeError: regplot() takes from 0 to 1 positional arguments but 2 were given

In [None]:
sns.regplot(x='interest_rate', y='index_price', data=df_index)

In [None]:
sns.regplot(x='interest_rate', y='unemployment_rate', data=df_index)

In [None]:
sns.regplot(x='index_price', y='unemployment_rate', data=df_index)

In [None]:
## Standardisation
from sklearn.preprocessing import StandardScaler

In [None]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
X_train

In [None]:
## apply linear regression

from sklearn.linear_model import LinearRegression 
regression= LinearRegression()

In [None]:
regression.fit(X_train,Y_train)

In [None]:
# Cross validation
from sklearn.model_selection import cross_val_score
validation_score=cross_val_score(regression,X_train,Y_train,scoring='neg_mean_squared_error',cv=3)

In [None]:
np.mean(validation_score)

In [None]:
## prediction test
Y_pred=regression.predict(X_test)

In [None]:
Y_pred

In [None]:
# Performance metrices
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [None]:
mse=mean_squared_error(Y_test,Y_pred)
mae=mean_absolute_error(Y_test,Y_pred)
rmse=np.sqrt(mse)
print(mse)
print(mae)
print(rmse)

In [None]:
from sklearn.metrics import r2_score

In [None]:
score=r2_score(Y_test,Y_pred)
print(score)

In [None]:
1 -(1-score)*(len(Y_test-1))/(len(Y_test)-X_test.shape[1]-1)

## Assumptions

In [None]:
plt.scatter(Y_test,Y_pred)

In [None]:
#Errors
Residual=Y_test-Y_pred
print(Residual)

In [None]:
## plot this residuals
sns.displot(Residual,kind='kde')

In [None]:
## scatter plot with respect to prediction and residuals
plt.scatter(Y_pred,Residual)

## OLS

In [None]:
## ols linear regression
import statsmodels.api as sm
model=sm.OLS(Y_train,X_train).fit()

In [None]:
model.summary()

In [None]:
print(regression.coef_)