# IMPORT REQURIED LIBRARIES

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error , mean_absolute_error , r2_score

# LOAD DATA

In [2]:
df=pd.read_csv("AirPassengers.csv")
df.head()

Unnamed: 0,Month,#Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [3]:
df.rename(columns={"#Passengers": "Passengers"}, inplace=True)

In [4]:
df.head()

Unnamed: 0,Month,Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Month       144 non-null    object
 1   Passengers  144 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.4+ KB


In [6]:
df.describe()

Unnamed: 0,Passengers
count,144.0
mean,280.298611
std,119.966317
min,104.0
25%,180.0
50%,265.5
75%,360.5
max,622.0


# DATE TIME DATA HANDLING

In [7]:
df["Month"] = pd.to_datetime(df["Month"])


df["Year"] = df["Month"].dt.year
df["Month_num"] = df["Month"].dt.month



In [8]:
X = df[["Year", "Month_num"]]
y = df["Passengers"]

# DATA SPLITING FOR TRAINING AND TESTING

In [9]:
X_train , X_test , y_train ,y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

# LINEAR REGRESSION

In [10]:
linear_model =LinearRegression()

In [11]:
linear_model.fit(X_train,y_train)

In [12]:
y_pred = linear_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred) 
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = r2_score(y_test, y_pred) 
n, p = X_test.shape 
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print(f"Model Evaluation Results:")
print(f"MAE  = {mae:.2f}")
print(f"MSE  = {mse:.2f}")
print(f"RMSE = {rmse:.2f}")
print(f"R² Score = {r2:.4f}")
print(f"adjusted R² Score = {adj_r2:.4f}")

Model Evaluation Results:
MAE  = 32.48
MSE  = 1564.44
RMSE = 39.55
R² Score = 0.8426
adjusted R² Score = 0.8305


# Lasso Regression (L1 Regularization)

In [13]:
from sklearn.linear_model import Lasso

model_lasso = Lasso(alpha=0.1)
model_lasso.fit(X_train, y_train)


In [14]:
y_pred = model_lasso.predict(X_test)

mae = mean_absolute_error(y_test, y_pred) 
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = r2_score(y_test, y_pred) 
n, p = X_test.shape 
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print(f"Model Evaluation Results:")
print(f"MAE  = {mae:.2f}")
print(f"MSE  = {mse:.2f}")
print(f"RMSE = {rmse:.2f}")
print(f"R² Score = {r2:.4f}")
print(f"adjusted R² Score = {adj_r2:.4f}")

Model Evaluation Results:
MAE  = 32.47
MSE  = 1563.48
RMSE = 39.54
R² Score = 0.8427
adjusted R² Score = 0.8306


# Ridge Regression (L2 Regularization)

In [15]:
from sklearn.linear_model import Ridge

model_ridge = Ridge(alpha=1.0)
model_ridge.fit(X_train, y_train)


In [16]:
y_pred = model_ridge.predict(X_test)

mae = mean_absolute_error(y_test, y_pred) 
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = r2_score(y_test, y_pred) 
n, p = X_test.shape 
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print(f"Model Evaluation Results:")
print(f"MAE  = {mae:.2f}")
print(f"MSE  = {mse:.2f}")
print(f"RMSE = {rmse:.2f}")
print(f"R² Score = {r2:.4f}")
print(f"adjusted R² Score = {adj_r2:.4f}")

Model Evaluation Results:
MAE  = 32.45
MSE  = 1561.99
RMSE = 39.52
R² Score = 0.8428
adjusted R² Score = 0.8307


# RANDOM FOREST

In [19]:
from sklearn.ensemble import RandomForestRegressor
model_random = RandomForestRegressor(n_estimators=500, random_state=42)
model_random.fit(X_train, y_train)


In [20]:
y_pred = model_random.predict(X_test)
mae = mean_absolute_error(y_test, y_pred) 
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = r2_score(y_test, y_pred) 
n, p = X_test.shape 
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print(f"Model Evaluation Results:")
print(f"MAE  = {mae:.2f}")
print(f"MSE  = {mse:.2f}")
print(f"RMSE = {rmse:.2f}")
print(f"R² Score = {r2:.4f}")
print(f"adjusted R² Score = {adj_r2:.4f}")

Model Evaluation Results:
MAE  = 13.93
MSE  = 311.49
RMSE = 17.65
R² Score = 0.9687
adjusted R² Score = 0.9662


# Gradient Boosting Regression

In [17]:
from sklearn.ensemble import GradientBoostingRegressor

model_grad = GradientBoostingRegressor(n_estimators=500, learning_rate=0.1)
model_grad.fit(X_train, y_train)


In [18]:
y_pred = model_grad.predict(X_test)
mae = mean_absolute_error(y_test, y_pred) 
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  
r2 = r2_score(y_test, y_pred) 
n, p = X_test.shape 
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print(f"Model Evaluation Results:")
print(f"MAE  = {mae:.2f}")
print(f"MSE  = {mse:.2f}")
print(f"RMSE = {rmse:.2f}")
print(f"R² Score = {r2:.4f}")
print(f"adjusted R² Score = {adj_r2:.4f}")

Model Evaluation Results:
MAE  = 8.21
MSE  = 126.86
RMSE = 11.26
R² Score = 0.9872
adjusted R² Score = 0.9863


# SAVE MODEL

In [None]:
import joblib as jb
jb.dump(model_grad,"Sutdent_Performance_prediction_LinearRegression.pkl")