In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.feature_selection import RFECV


In [None]:
data = pd.read_csv("/Student_Performance.csv")

In [None]:
data.head()

In [None]:
data.head()

In [None]:
new_data = pd.get_dummies(data, drop_first=True)

In [None]:
new_data.head()

In [None]:
data.isnull().sum()

In [None]:
new_data.fillna(new_data.mean(), inplace=True)

In [None]:
new_data.head()

In [None]:
print("Duplicate row count : ",new_data.duplicated().sum())

In [None]:
new_data.drop_duplicates(inplace=True)

In [None]:
print("Duplicate row count : ",new_data.duplicated().sum())

In [None]:
new_data.columns = ["Study Hours", "Previous Score", "Sleep Hours", "Practiced Sample Paper Count", "Score", "Extracurricular Activities"]

In [None]:
independent = new_data.drop("Score", axis=1)
dependent = new_data.iloc[:, 4]

In [None]:
independent.head()

In [None]:
dependent.head()

In [None]:
new_data.describe()

In [None]:
for columns in independent.columns:
    plt.figure(figsize=(17,10))
    sns.scatterplot(x = columns, y = dependent, data = new_data, hue = "Previous Score")
    plt.title(f'Box plot of {columns} by score')
    plt.xlabel(columns)

    plt.ylabel("Score")
    plt.show()

In [None]:
plt.hist(dependent, bins=10)
plt.xlabel("Score")
plt.ylabel("Count")
plt.show()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.2, random_state=0)

In [None]:
model = LinearRegression()

In [None]:
model.fit(x_train, y_train)

In [None]:
y_pred_train = model.predict(x_train)
y_pred_test = model.predict(x_test)

In [None]:
mae_train = mean_absolute_error(y_train, y_pred_train)
mae_test = mean_absolute_error(y_test, y_pred_test)

mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)

r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

In [None]:
print(f"Training MAE: {mae_train:.2f}, Testing MAE: {mae_test:.2f}")
print(f"Training MSE: {mse_train:.2f}, Testing MSE: {mse_test:.2f}")
print(f"Training R-squared: {r2_train:.2f}, Testing R-squared: {r2_test:.2f}")

In [None]:
RFE_CV = RFECV(estimator=model, min_features_to_select=1, cv=10)

In [None]:
results = RFE_CV.fit(independent, dependent)

In [None]:
results.support_

In [None]:
selected_columns = independent.columns[results.support_]

In [None]:
selected_columns

In [None]:
parameters = {"alpha" : [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

In [None]:
lasso_model = Lasso()

In [None]:
ridge_model = Ridge()

In [None]:
grid_search_lasso = GridSearchCV(estimator=lasso_model, param_grid=parameters, cv = 10)
best_grid_param_lasso = grid_search_lasso.fit(independent, dependent)
print("Best parameters for Lasso (Grid search): ",best_grid_param_lasso.best_params_)

grid_search_ridge = GridSearchCV(estimator=ridge_model, param_grid=parameters, cv = 10)
best_grid_param_ridge = grid_search_ridge.fit(independent, dependent)
print("Best parameters for Lasso (Grid search): ",best_grid_param_ridge.best_params_)

In [None]:
random_search_lasso = RandomizedSearchCV(estimator=lasso_model, param_distributions=parameters, cv=10, n_iter=7)
best_params_lasso = random_search_lasso.fit(independent, dependent)
print("Best parameters for Lasso (Random search): ", best_params_lasso.best_params_)

random_search_ridge = RandomizedSearchCV(estimator=ridge_model, param_distributions=parameters, cv=10, n_iter=7)
best_params_ridge = random_search_ridge.fit(independent, dependent)
print("Best parameters for Ridge (Random search): ", best_params_ridge.best_params_)


In [None]:
lasso = Lasso(alpha = 0.001)

In [None]:
ridge = Ridge(alpha = 1)

In [None]:
lasso.fit(x_train, y_train)

In [None]:
ridge.fit(x_train, y_train)

In [None]:
y_pred_ridge = ridge.predict(x_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
print(f"Ridge Regression Mean Squared Error: {mse_ridge:.2f}")

In [None]:
y_pred_lasso = lasso.predict(x_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
print(f"Lasso Regression Mean Squared Error: {mse_lasso:.2f}")

In [None]:
scores_1 = cross_val_score(model, independent, dependent,  cv= 5 , scoring='neg_mean_squared_error')

mse_scores = -scores_1

print("Cross-Validation Scores (MSE):", mse_scores)
print("Average Cross-Validation MSE:", mse_scores.mean())

In [None]:
scores_1 = cross_val_score(ridge, independent, dependent,  cv= 5 , scoring='neg_mean_squared_error')

mse_scores = -scores_1

print("Cross-Validation Scores (MSE):", mse_scores)
print("Average Cross-Validation MSE:", mse_scores.mean())

In [None]:
scores_lasso = cross_val_score(lasso, independent, dependent,  cv= 5 , scoring='neg_mean_squared_error')

mse_scores = -scores_1

print("Cross-Validation Scores (MSE):", mse_scores)
print("Average Cross-Validation MSE:", mse_scores.mean())

In [None]:
independent.columns

In [None]:
new_student_sample_data = np.array([2, 90, 8, 1, False]).reshape(1, 5)

In [None]:
prediction = model.predict(new_student_sample_data)
prediction_lasso = lasso.predict(new_student_sample_data)
prediction_ridge = ridge.predict(new_student_sample_data)

In [None]:
print("Student score :",prediction)
print("Student score _Lasso:",prediction_lasso)
print("Student score_Ridge :",prediction_ridge)