In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [9]:
# Assuming your data file is in CSV format
df = pd.read_csv('/content/Final_Dataset.csv')


In [10]:
df['DATE'] = pd.to_datetime(df['DATE'], format='%d-%m-%Y')
df.set_index('DATE', inplace=True)

In [11]:
df

Unnamed: 0_level_0,CPI,M2,UNRATE,DSPI,PCE,FEDFUNDS,INDPRO,M1SL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1990-01-01,132.100,3166.8,5.4,4208.4,3730.7,8.23,61.6440,795.4
1990-02-01,132.700,3179.2,5.3,4236.3,3728.2,8.24,62.2108,798.1
1990-03-01,133.500,3190.1,5.2,4254.0,3754.9,8.28,62.5068,801.5
1990-04-01,134.000,3201.6,5.4,4290.8,3770.0,8.26,62.3597,806.1
1990-05-01,134.400,3200.6,5.4,4296.0,3775.8,8.18,62.5474,804.2
...,...,...,...,...,...,...,...,...
2023-06-01,308.309,,,,,,,
2023-07-01,308.801,,,,,,,
2023-08-01,309.661,,,,,,,
2023-09-01,310.661,,,,,,,


In [12]:
df.isnull().sum()

CPI          0
M2          45
UNRATE      45
DSPI        45
PCE         45
FEDFUNDS    45
INDPRO      45
M1SL        45
dtype: int64

In [13]:
df = df.dropna()

In [14]:


# Split the data into independent variables (X) and the dependent variable (y)
X = df.drop('CPI', axis=1)
y = df['CPI']

In [15]:




# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)


In [16]:

# Evaluate the model
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, predictions))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, predictions))
print('Root Mean Squared Error:', metrics.mean_squared_error(y_test, predictions)**0.5)


Mean Absolute Error: 1.7431515141202742
Mean Squared Error: 4.226456549621279
Root Mean Squared Error: 2.0558347573726055


In [17]:
# Print the coefficients of the model
coefficients = pd.DataFrame({'Variable': X.columns, 'Coefficient': model.coef_})
print('\nCoefficients:')
print(coefficients)


Coefficients:
   Variable  Coefficient
0        M2    -0.005218
1    UNRATE     0.686589
2      DSPI     0.004407
3       PCE     0.007693
4  FEDFUNDS    -0.848645
5    INDPRO     0.352306
6      M1SL     0.011262


In [18]:

# Print the intercept of the model
print('\nIntercept:', model.intercept_)


Intercept: 78.48772657779315


In [19]:
import statsmodels.api as sm

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Add a constant to the independent variables for the intercept term
X_train_with_intercept = sm.add_constant(X_train)

# Create a linear regression model using statsmodels
model = sm.OLS(y_train, X_train_with_intercept).fit()

# Print the summary to see p-values (significance) for each coefficient
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                    CPI   R-squared:                       0.997
Model:                            OLS   Adj. R-squared:                  0.997
Method:                 Least Squares   F-statistic:                     7750.
Date:                Thu, 07 Dec 2023   Prob (F-statistic):          2.04e-211
Time:                        20:23:31   Log-Likelihood:                -381.81
No. Observations:                 180   AIC:                             779.6
Df Residuals:                     172   BIC:                             805.2
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         75.9788      4.600     16.518      0.0

In [20]:
#regularization

from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

# Assuming X_train, X_test, y_train, y_test are your training and testing data

# Lasso Regression
lasso_model = Lasso(alpha=0.01)  # You can adjust the alpha parameter
lasso_model.fit(X_train, y_train)
lasso_predictions = lasso_model.predict(X_test)

# Ridge Regression
ridge_model = Ridge(alpha=0.01)  # You can adjust the alpha parameter
ridge_model.fit(X_train, y_train)
ridge_predictions = ridge_model.predict(X_test)

# Evaluate models
lasso_rmse = np.sqrt(mean_squared_error(y_test, lasso_predictions))
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_predictions))

print("Lasso RMSE:", lasso_rmse)
print("Ridge RMSE:", ridge_rmse)

# Print the coefficients for Lasso and Ridge
lasso_coefficients = pd.DataFrame({'Variable': X.columns, 'Coefficient': lasso_model.coef_})
ridge_coefficients = pd.DataFrame({'Variable': X.columns, 'Coefficient': ridge_model.coef_})

print('\nLasso Coefficients:')
print(lasso_coefficients)

print('\nRidge Coefficients:')
print(ridge_coefficients)


Lasso RMSE: 2.0945824965799473
Ridge RMSE: 2.0193635114247166

Lasso Coefficients:
   Variable  Coefficient
0        M2    -0.004756
1    UNRATE     0.615173
2      DSPI     0.009030
3       PCE     0.002615
4  FEDFUNDS    -0.844134
5    INDPRO     0.377284
6      M1SL     0.009073

Ridge Coefficients:
   Variable  Coefficient
0        M2    -0.005275
1    UNRATE     0.793465
2      DSPI     0.005435
3       PCE     0.006707
4  FEDFUNDS    -0.782552
5    INDPRO     0.366511
6      M1SL     0.011008


  model = cd_fast.enet_coordinate_descent(


In [21]:
#ridge lasso mae
from sklearn.metrics import mean_absolute_error

# Assuming lasso_model and ridge_model are your trained Lasso and Ridge models, and X_test, y_test are your test data

# Predictions for Lasso and Ridge models
lasso_predictions = lasso_model.predict(X_test)
ridge_predictions = ridge_model.predict(X_test)

# Calculate MAE for Lasso and Ridge
lasso_mae = mean_absolute_error(y_test, lasso_predictions)
ridge_mae = mean_absolute_error(y_test, ridge_predictions)

print("Lasso MAE:", lasso_mae)
print("Ridge MAE:", ridge_mae)


Lasso MAE: 1.7188147603531425
Ridge MAE: 1.6954749186347362


In [22]:
#ridge lasso r2
from sklearn.metrics import r2_score

# Assuming lasso_model and ridge_model are your trained Lasso and Ridge models, and X_test, y_test are your test data

# Predictions for Lasso and Ridge models
lasso_predictions = lasso_model.predict(X_test)
ridge_predictions = ridge_model.predict(X_test)

# Calculate R2 score for Lasso and Ridge
lasso_r2 = r2_score(y_test, lasso_predictions)
ridge_r2 = r2_score(y_test, ridge_predictions)

print("Lasso R-squared:", lasso_r2)
print("Ridge R-squared:", ridge_r2)


Lasso R-squared: 0.9967661023487743
Ridge R-squared: 0.9969941982011162


In [23]:
#multicollinearity = no multicollinearity
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Assuming df is your DataFrame containing the independent variables

# Add a constant term to the independent variables matrix
X = sm.add_constant(df)

# Calculate VIF for each variable
vif_data = pd.DataFrame()
vif_data["Variable"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

# Display the VIF values
print("VIF values:")
print(vif_data)


VIF values:
   Variable          VIF
0     const  2351.201905
1       CPI   327.528764
2        M2   494.139184
3    UNRATE     5.874674
4      DSPI  1062.508114
5       PCE  1206.116766
6  FEDFUNDS     6.303832
7    INDPRO    31.180282
8      M1SL   109.721650
