## LIBs

In [11]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')


from sklearn.preprocessing import LabelEncoder , OrdinalEncoder


## Models
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor , AdaBoostRegressor
from xgboost import XGBRegressor

In [13]:
df = pd.read_csv(r"D:\Work\Route\HTI\S10\KNN\StudentsPerformance.csv")

In [15]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   gender                       1000 non-null   object
 1   race/ethnicity               1000 non-null   object
 2   parental level of education  1000 non-null   object
 3   lunch                        1000 non-null   object
 4   test preparation course      1000 non-null   object
 5   math score                   1000 non-null   int64 
 6   reading score                1000 non-null   int64 
 7   writing score                1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


## Data PRE

In [20]:
x = df.drop(['math score'],axis = 1)
y = df['math score']

In [22]:
x.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,74
1,female,group C,some college,standard,completed,90,88
2,female,group B,master's degree,standard,none,95,93
3,male,group A,associate's degree,free/reduced,none,57,44
4,male,group C,some college,standard,none,78,75


In [26]:
x['gender'].unique()

array(['female', 'male'], dtype=object)

In [28]:
x['race/ethnicity'].unique()

array(['group B', 'group C', 'group A', 'group D', 'group E'],
      dtype=object)

In [30]:
x['lunch'].unique()

array(['standard', 'free/reduced'], dtype=object)

In [32]:
x['test preparation course'].unique()

array(['none', 'completed'], dtype=object)

In [34]:
x['parental level of education'].unique()

array(["bachelor's degree", 'some college', "master's degree",
       "associate's degree", 'high school', 'some high school'],
      dtype=object)

In [36]:
label_encoder = LabelEncoder()

In [38]:
x['gender'] = label_encoder.fit_transform(x['gender'] )
x['race/ethnicity'] = label_encoder.fit_transform(x['race/ethnicity'] )
x['lunch'] = label_encoder.fit_transform(x['lunch'] )
x['test preparation course'] = label_encoder.fit_transform(x['test preparation course'] )

In [42]:
x['parental level of education'].unique()

array(["bachelor's degree", 'some college', "master's degree",
       "associate's degree", 'high school', 'some high school'],
      dtype=object)

In [44]:
ord_encoder = OrdinalEncoder(categories = [["some high school",
                                            "high school",
                                            "some college",
                                            "associate's degree",
                                            "bachelor's degree",
                                            "master's degree"]])

In [46]:
x[['parental level of education']] = ord_encoder.fit_transform(x[['parental level of education']])

In [48]:
x.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,reading score,writing score
0,0,1,4.0,1,1,72,74
1,0,2,2.0,1,0,90,88
2,0,1,5.0,1,1,95,93
3,1,0,3.0,0,1,57,44
4,1,2,2.0,1,1,78,75


In [50]:
x_train , x_test, y_train,y_test = train_test_split(x,y,
                                                   test_size= 0.2,
                                                   random_state = 0)

## Modeling 

In [54]:
models= {
 'LinearRegression':LinearRegression(),
    'SVR':SVR(),
    'KNeighborsRegressor': KNeighborsRegressor(),
    'DecisionTreeRegressor' :DecisionTreeRegressor(),
    'RandomForestRegressor' : RandomForestRegressor(),
    'AdaBoostRegressor' : AdaBoostRegressor(),
    'XGBRegressor': XGBRegressor()
    
}

In [56]:
def evalute_model(act, pred,model_name):
    score = r2_score(act, pred)
    print(f'The score of [{model_name}] model is --> ({score})')
    return score

In [62]:
model_scores={}
model_pred={}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model_name = list(models)[i]
    
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    model_pred[model_name] = y_pred
    
    m_score = evalute_model(y_test,y_pred,model_name)
    model_scores[model_name] = m_score

The score of [LinearRegression] model is --> (0.8596877833639415)
The score of [SVR] model is --> (0.6113881488448154)
The score of [KNeighborsRegressor] model is --> (0.6499336280061571)
The score of [DecisionTreeRegressor] model is --> (0.7573715278268115)
The score of [RandomForestRegressor] model is --> (0.8559246029154252)
The score of [AdaBoostRegressor] model is --> (0.8286716522483727)
The score of [XGBRegressor] model is --> (0.8235119341656882)


In [64]:
model_scores

{'LinearRegression': 0.8596877833639415,
 'SVR': 0.6113881488448154,
 'KNeighborsRegressor': 0.6499336280061571,
 'DecisionTreeRegressor': 0.7573715278268115,
 'RandomForestRegressor': 0.8559246029154252,
 'AdaBoostRegressor': 0.8286716522483727,
 'XGBRegressor': 0.8235119341656882}

In [66]:
model_pred

{'LinearRegression': array([63.51412785, 75.07131488, 49.48901512, 67.70291291, 70.11877822,
        74.17608778, 65.5085795 , 48.89037124, 87.77966411, 43.99886822,
        44.62012749, 58.87058527, 81.60499124, 85.8507563 , 43.56071191,
        33.69900011, 56.50851289, 76.61832389, 46.75549245, 80.0591183 ,
        55.10922393, 47.9921308 , 70.63742139, 62.80213528, 48.3499982 ,
        65.25317157, 61.36665789, 49.18367941, 57.5540598 , 78.49305337,
        73.29892729, 90.82716549, 83.75130314, 53.99254409, 44.9710993 ,
        88.15054366, 92.41555   , 58.61246469, 68.50399888, 70.28113301,
        72.92018655, 59.09503473, 72.54737629, 69.11443637, 74.02389312,
        50.84250166, 83.2471284 , 53.78956983, 77.25976343, 51.77315439,
        42.34700917, 62.6803455 , 42.38806276, 62.67491775, 39.71277206,
        66.46391113, 47.3545357 , 67.34355034, 81.62772769, 54.53549287,
        70.59246891, 72.06628726, 43.4358362 , 77.65760279, 70.29706946,
        67.32142352, 77.6720712

In [68]:
y_test

993    62
859    87
298    40
553    77
672    69
       ..
679    63
722    74
215    84
653    65
150    62
Name: math score, Length: 200, dtype: int64