# STUDENT PERFORMANCE INDICATOR

## Training And Testing Model

### Importing Libraries

In [78]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns


In [79]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

In [80]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso 
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from xgboost import XGBRegressor

In [81]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [82]:
import warnings 

### Loading Data

In [83]:
df = pd.read_csv(r'C:\Users\HP\Desktop\Project Deployment\notebook\data\raw.csv')

In [84]:
df.sample(5)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
532,male,group E,associate's degree,standard,completed,62,61,58
586,female,group A,high school,standard,none,55,73,73
773,female,group C,bachelor's degree,free/reduced,none,62,78,79
938,male,group D,some college,standard,completed,85,81,85
512,male,group D,some high school,free/reduced,none,62,49,52


### Making Training and Test Features

Input Features

In [85]:
X = df.drop(columns='math score', axis=1)

In [86]:
X.sample(5)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,reading score,writing score
297,male,group E,associate's degree,standard,completed,74,68
738,male,group D,associate's degree,standard,none,71,73
788,male,group C,associate's degree,free/reduced,none,65,61
915,female,group E,some college,standard,none,70,66
354,female,group C,some college,standard,none,71,70


In [87]:
num_features = X.select_dtypes(exclude='object').columns
cat_features = X.select_dtypes(include='object').columns

numeric_transformer = StandardScaler() 
ohe_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    [
        ('OneHotEncoder', ohe_transformer, cat_features),
        ('StandardScaler', numeric_transformer, num_features),
    ]
)

In [105]:
cat_features

Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course'],
      dtype='object')

In [88]:
X = preprocessor.fit_transform(X)


In [89]:
X.shape

(1000, 19)

Output Features

In [90]:
y = df['math score']

In [91]:
y[:5]

0    72
1    69
2    90
3    47
4    76
Name: math score, dtype: int64

Train Test Split

In [92]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [93]:
X_train.shape, X_test.shape

((800, 19), (200, 19))

### Training Model

Creating Function to evaluate model

In [100]:
def evaluate_model(y_true, y_predicted):
    
    mae = mean_absolute_error(y_true, y_predicted)
    mse = mean_squared_error(y_true, y_predicted)
    rmse = np.sqrt(mse)
    r2score = r2_score(y_true, y_predicted)
    
    return mae, rmse, r2score
    

Training model and Evaluation

In [95]:
models = {
    'LinearRegression': LinearRegression(),
    'Lasso' : Lasso(),
    'Ridge': Ridge(),
    'K-Neighbours Regressor' : KNeighborsRegressor(),
    'Decision Tree' : DecisionTreeRegressor(),
    'Random Forest Regressor' : RandomForestRegressor(),
    'Ada-Boost Regressor' : AdaBoostRegressor() ,
    'XGBRegressor' : XGBRegressor()
}

In [101]:
model_list = []
r2_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)
    
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    mae_train, rmse_train, r2score_train = evaluate_model(y_train, y_train_pred)
    mae_test, rmse_test, r2score_test = evaluate_model(y_test, y_test_pred)
    
    print('********************************************')
    print(f'Model Name : {list(models.keys())[i]} --->')
    
    model_list.append(list(models.keys())[i])
    r2_list.append(r2score_test)
    
    print('-----------------------------------')
    print('Training .......')
    print(f'- Mean Absolute Error : {mae_train}')
    print(f'- Mean Square Error   : {rmse_train}')
    print(f'- R2-Score : {r2score_train}')
    print('-----------------------------------')
    print('Testing .......')
    print(f'- Mean Absolute Error : {mae_test}')
    print(f'- Mean Square Error   : {rmse_test}')
    print(f'- R2-Score : {r2score_test}')
    print('********************************************')
    

********************************************
Model Name : LinearRegression --->
-----------------------------------
Training .......
- Mean Absolute Error : 4.271484375
- Mean Square Error   : 5.329287354885032
- R2-Score : 0.874022530822051
-----------------------------------
Testing .......
- Mean Absolute Error : 4.2221875
- Mean Square Error   : 5.425185408698029
- R2-Score : 0.8790464771743793
********************************************
********************************************
Model Name : Lasso --->
-----------------------------------
Training .......
- Mean Absolute Error : 5.206302661246526
- Mean Square Error   : 6.593815587795565
- R2-Score : 0.8071462015863458
-----------------------------------
Testing .......
- Mean Absolute Error : 5.157881810347763
- Mean Square Error   : 6.519694535667419
- R2-Score : 0.8253197323627853
********************************************
********************************************
Model Name : Ridge --->
---------------------------------

Results

In [103]:
pd.DataFrame(
    list(zip(model_list, r2_list)), 
    columns = ['Model Name', 'R2 Score']
    )

Unnamed: 0,Model Name,R2 Score
0,LinearRegression,0.879046
1,Lasso,0.82532
2,Ridge,0.880593
3,K-Neighbours Regressor,0.783813
4,Decision Tree,0.735369
5,Random Forest Regressor,0.85228
6,Ada-Boost Regressor,0.848719
7,XGBRegressor,0.827797
