In [38]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold, cross_val_score
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [39]:
df = pd.read_csv("StressLevelDataset.csv")
df.head(3)

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,14,20,0,11,2,1,2,4,2,3,...,2,3,2,3,3,2,3,3,2,1
1,15,8,1,15,5,3,1,4,3,1,...,2,1,4,1,5,1,4,5,5,2
2,12,18,1,14,2,1,2,2,2,2,...,2,2,3,3,2,2,3,2,2,1


In [40]:
df.isnull().sum()

anxiety_level                   0
self_esteem                     0
mental_health_history           0
depression                      0
headache                        0
blood_pressure                  0
sleep_quality                   0
breathing_problem               0
noise_level                     0
living_conditions               0
safety                          0
basic_needs                     0
academic_performance            0
study_load                      0
teacher_student_relationship    0
future_career_concerns          0
social_support                  0
peer_pressure                   0
extracurricular_activities      0
bullying                        0
stress_level                    0
dtype: int64

In [41]:
X = df.drop('anxiety_level',axis='columns')
X.head(3)

Unnamed: 0,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,safety,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,20,0,11,2,1,2,4,2,3,3,2,3,2,3,3,2,3,3,2,1
1,8,1,15,5,3,1,4,3,1,2,2,1,4,1,5,1,4,5,5,2
2,18,1,14,2,1,2,2,2,2,3,2,2,3,3,2,2,3,2,2,1


In [42]:
Y = df.anxiety_level
Y.head(3)

0    14
1    15
2    12
Name: anxiety_level, dtype: int64

In [43]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=50)

In [54]:
x_test

Unnamed: 0,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,safety,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
243,21,1,14,2,1,2,2,2,3,3,3,2,3,3,3,3,2,2,3,1
850,16,0,13,2,1,2,4,3,2,2,3,2,3,2,3,2,2,3,3,1
760,29,0,7,1,2,4,2,1,3,4,5,5,2,4,1,3,2,1,1,0
445,11,1,18,3,3,1,3,4,2,2,1,2,3,2,4,1,5,5,5,2
345,8,1,17,4,3,1,5,5,1,2,1,2,5,1,5,1,4,5,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
773,1,1,20,4,3,1,5,5,2,2,1,1,5,2,5,1,4,4,5,2
691,1,0,4,2,3,5,1,4,1,3,0,5,1,0,4,0,5,1,3,2
994,25,0,2,1,2,5,1,2,4,5,5,5,2,4,1,3,1,1,1,0
278,11,1,20,5,3,2,3,4,0,3,0,5,1,3,5,0,2,2,1,1


In [44]:
lr = LinearRegression()
lr.fit(x_train,y_train)
lr.score(x_test,y_test) * 100

73.84812694014579

In [45]:
kf = KFold(n_splits=5, shuffle=True, random_state=50)
cv_scores = cross_val_score(lr, X, Y, cv=kf)
print("Cross-Validation Scores:", cv_scores)

Cross-Validation Scores: [0.73848127 0.61958971 0.65357287 0.59747313 0.68522607]


In [64]:
def grid_search_linear_models(X, Y):
    # Define parameter grids for each model
    param_grids = {
        'Linear Regression': {},
        'Lasso Regression': {'alpha': [0.1, 1.0, 10.0]},
        'Support Vector Regression': {'C': [0.1, 1, 10], 'gamma': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf']},
        'Decision Tree': {'max_depth': [3, 5, 10, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4]}
    }

    # Initialize models
    models = {
        'Linear Regression': LinearRegression(),
        'Lasso Regression': Lasso(),
        'Support Vector Regression': SVR(),
        'Decision Tree': DecisionTreeRegressor()
    }

    best_model_name = None
    best_score = -np.inf
    best_model = None

    # Perform Grid Search for each model
    for name, model in models.items():
        param_grid = param_grids[name]
        grid_search = GridSearchCV(model, param_grid, cv=kf, scoring='neg_mean_squared_error')
        grid_search.fit(x_train, y_train)

        mean_score = np.mean(grid_search.cv_results_['mean_test_score'])
        if mean_score > best_score:
            best_score = mean_score
            best_model_name = name
            best_model = grid_search.best_estimator_

    # Evaluate the best model
    rmse, mae = evaluate_regression_model(best_model, x_test, y_test)
    print("Best Model:", best_model_name)
    print("Best Score (Neg Mean Squared Error):", best_score)
    print("RMSE:", rmse)
    print("MAE:", mae)
    print("Best Model Parameters:", best_model.get_params())
    
    return best_model

# Function to evaluate regression model performance
def evaluate_regression_model(model, x_test, y_test):
    y_pred = model.predict(x_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    return rmse, mae

# Apply GridSearchCV to multiple models
best_model = grid_search_linear_models(X, Y)

Best Model: Linear Regression
Best Score (Neg Mean Squared Error): -13.689602190841521
RMSE: 3.1136537620455007
MAE: 2.3420375938166744
Best Model Parameters: {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}


In [65]:
def predict_with_best_model(model, X_new):
    y_pred = model.predict(X_new)
    return y_pred

features1 = np.array([20, 0, 11, 2, 1, 2, 4, 2, 3, 3, 2, 3, 2, 3, 3, 2, 3, 3, 2, 1])
features2 = np.array([28, 0, 3, 1, 2, 4, 2, 1, 3, 4, 4, 4, 2, 5, 1, 3, 1, 2, 1, 0])

# Assuming 'model' is your trained model
# Predict using the function
prediction1 = predict_with_best_model(best_model, features1.reshape(1, -1))
prediction2 = predict_with_best_model(best_model, features2.reshape(1, -1))

print("Prediction 1:", prediction1)
print("Prediction 2:", prediction2)

Prediction 1: [11.26836878]
Prediction 2: [4.54577882]




In [66]:
import pickle
with open('student_anxiety_level_model.pickle','wb') as f:
    pickle.dump(lr,f)