### Multiple Linear Regression Project
Economic Index Dataset

In [2]:
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns 
%matplotlib inline 

In [5]:
df = pd.read_csv("datasets/economic_index.csv")

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,year,month,interest_rate,unemployment_rate,index_price
0,0,2017,12,2.75,5.3,1464
1,1,2017,11,2.5,5.3,1394
2,2,2017,10,2.5,5.3,1357
3,3,2017,9,2.5,5.3,1293
4,4,2017,8,2.5,5.4,1256


In [5]:
#Drop unnecessary columns
df.drop(columns = ["Unnamed: 0","year","month"],axis=1,inplace=True)

In [6]:
df.head()

Unnamed: 0,interest_rate,unemployment_rate,index_price
0,2.75,5.3,1464
1,2.5,5.3,1394
2,2.5,5.3,1357
3,2.5,5.3,1293
4,2.5,5.4,1256


In [7]:
df.isnull().sum()

interest_rate        0
unemployment_rate    0
index_price          0
dtype: int64

In [8]:
sns.pairplot(df)

<seaborn.axisgrid.PairGrid at 0x1f2ae260800>

In [9]:
df.corr()

Unnamed: 0,interest_rate,unemployment_rate,index_price
interest_rate,1.0,-0.925814,0.935793
unemployment_rate,-0.925814,1.0,-0.922338
index_price,0.935793,-0.922338,1.0


In [10]:
plt.scatter(df['interest_rate'],df['unemployment_rate'], color='b')
plt.xlabel("Interest Rate")
plt.ylabel("Unemployment Rate")

Text(0, 0.5, 'Unemployment Rate')

In [11]:
df.head()

Unnamed: 0,interest_rate,unemployment_rate,index_price
0,2.75,5.3,1464
1,2.5,5.3,1394
2,2.5,5.3,1357
3,2.5,5.3,1293
4,2.5,5.4,1256


In [12]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [13]:
X.head()

Unnamed: 0,interest_rate,unemployment_rate
0,2.75,5.3
1,2.5,5.3
2,2.5,5.3
3,2.5,5.3
4,2.5,5.4


In [14]:
y.head()

0    1464
1    1394
2    1357
3    1293
4    1256
Name: index_price, dtype: int64

In [15]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.25,random_state=42)

In [16]:
sns.regplot(x='interest_rate',y='index_price',data=df)

<Axes: xlabel='interest_rate', ylabel='index_price'>

In [17]:
sns.regplot(x='interest_rate',y='unemployment_rate',data=df)

<Axes: xlabel='interest_rate', ylabel='unemployment_rate'>

In [18]:
sns.regplot(x='index_price',y='unemployment_rate',data=df)

<Axes: xlabel='index_price', ylabel='unemployment_rate'>

In [19]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [20]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [21]:
X_train

array([[-0.90115511,  0.37908503],
       [ 1.31077107, -1.48187786],
       [-0.90115511,  1.30956648],
       [ 1.31077107, -0.55139641],
       [ 1.31077107, -1.48187786],
       [-0.16384638,  0.68924552],
       [-0.90115511,  0.999406  ],
       [ 1.31077107, -1.48187786],
       [ 1.31077107, -1.17171738],
       [-0.90115511,  1.30956648],
       [-0.90115511,  0.999406  ],
       [-0.90115511,  0.37908503],
       [-0.90115511,  0.999406  ],
       [ 0.57346234, -0.8615569 ],
       [-0.16384638, -0.24123593],
       [-0.90115511,  0.06892455],
       [-0.90115511,  0.999406  ],
       [ 1.31077107, -0.8615569 ]])

In [22]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()

In [23]:
regressor.fit(X_train,y_train)

In [24]:
y_pred = regressor.predict(X_test)

In [25]:
y_pred

array([1204.22770398,  821.65051903, 1406.51300368,  857.70889608,
        994.90992298, 1168.16932693])

In [26]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error, r2_score

In [27]:
print("MSE:", mean_squared_error(y_test,y_pred))
print("MAE:", mean_absolute_error(y_test,y_pred))
print("RMSE:", root_mean_squared_error(y_test,y_pred))
print("R-Squared:", r2_score(y_test,y_pred))
print("Adjusted R-Squared:", 1 - (1-r2_score(y_test,y_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))

MSE: 5793.76288771258
MAE: 59.93578152323556
RMSE: 76.11677139574813
R-Squared: 0.8278978091457142
Adjusted R-Squared: 0.713163015242857


In [28]:
sns.scatterplot(x=y_test,y=y_pred)

<Axes: xlabel='index_price', ylabel='unemployment_rate'>

In [29]:
residuals = y_test - y_pred
residuals

8     -45.227704
16    149.349481
0      57.486996
18     26.291104
11     80.090077
9      -1.169327
Name: index_price, dtype: float64

In [30]:
sns.displot(residuals,kind='kde')

<seaborn.axisgrid.FacetGrid at 0x1f2afca00e0>

In [31]:
plt.scatter(y_pred,residuals)

<matplotlib.collections.PathCollection at 0x1f2b0858710>

In [32]:
import statsmodels.api as sm
ols_model = sm.OLS(y_train,X_train).fit()

In [34]:
ols_model.summary()

  res = hypotest_fun_out(*samples, **kwds)


0,1,2,3
Dep. Variable:,index_price,R-squared (uncentered):,0.035
Model:,OLS,Adj. R-squared (uncentered):,-0.086
Method:,Least Squares,F-statistic:,0.288
Date:,"Fri, 07 Feb 2025",Prob (F-statistic):,0.754
Time:,00:43:21,Log-Likelihood:,-150.85
No. Observations:,18,AIC:,305.7
Df Residuals:,16,BIC:,307.5
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,88.2728,658.295,0.134,0.895,-1307.250,1483.796
x2,-116.2572,658.295,-0.177,0.862,-1511.780,1279.266

0,1,2,3
Omnibus:,0.598,Durbin-Watson:,0.007
Prob(Omnibus):,0.741,Jarque-Bera (JB):,0.567
Skew:,-0.361,Prob(JB):,0.753
Kurtosis:,2.517,Cond. No.,4.78


In [35]:
print(regressor.coef_)

[  88.27275507 -116.25716066]
