In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [25]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostRegressor,RandomForestRegressor
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression,Ridge,Lasso
import warnings

In [3]:
df=pd.read_csv('StudentsPerformance.csv')

In [4]:
X=df.drop(columns=['math score'])
y=df['math score']

In [5]:
num_features=X.select_dtypes(exclude='object').columns
cat_features=X.select_dtypes(include='object').columns

In [6]:
num_features

Index(['reading score', 'writing score'], dtype='object')

In [10]:
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

In [14]:
numeric_transformer=StandardScaler()
oh_transformer=OneHotEncoder()

preprocessor= ColumnTransformer([
    ('OneHotEncoder',oh_transformer,cat_features),
    ('StandardScaler',numeric_transformer,num_features)
]
)

In [15]:
X=preprocessor.fit_transform(X)

In [20]:
X.shape

(1000, 19)

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [23]:
X_train.shape

(800, 19)

In [24]:
X_test.shape

(200, 19)

In [26]:
def evaluate_model(true,predicted):
    mae=mean_absolute_error(true,predicted)
    mse=mean_squared_error(true,predicted)
    rmse=np.sqrt(mse)
    R2_score=r2_score(true,predicted)
    return mae,mse,rmse,R2_score

In [27]:
models={
    "LinearRegression":LinearRegression(),
    "Ridge":Ridge(),
    "lasso":Lasso(),
    "K_neighbour":KNeighborsRegressor(),
    "Decison_classifier":DecisionTreeClassifier(),
    "Linear_reg":LinearRegression(),
   "Ada_boost": AdaBoostRegressor(),
    "Random_forest":RandomForestRegressor()
     }
model_list=[]
r2_list=[]

In [29]:
for i in range(len(models)):
    ## train
    model=list(models.values())[i]
    model.fit(X_train,y_train)
    ##test
    y_train_pred=model.predict(X_train)
    y_test_pred=model.predict(X_test)
    mae_train,mse_train,rmse_train,R2_score_train=evaluate_model(y_train,y_train_pred)
    mae_test,mse_test,rmse_test,R2_score_test=evaluate_model(y_test,y_test_pred)

    print(list(models.keys())[i])
    model_list.append((list(models.keys())[i]))


    print("Models accuracy  for training")
    print("Mean square error :{:4f}".format(mse_train))
    print("Mean Absolute error :{:4f}".format(mae_train))
    print("Root mean squared error:{:4f}".format(rmse_train))
    print("R2_score:{:4f}".format(R2_score_train))

    print("---------------------------------------------------------------")

    print("Models accuracy  for test")
    print("Mean square error :{:4f}".format(mse_test))
    print("Mean Absolute error :{:4f}".format(mae_test))
    print("Root mean squared error:{:4f}".format(rmse_test))
    print("R2_score:{:4f}".format(R2_score_test))
    r2_list.append(R2_score_test)


LinearRegression
Models accuracy  for training
Mean square error :28.380544
Mean Absolute error :4.278750
Root mean squared error:5.327339
R2_score:0.874115
---------------------------------------------------------------
Models accuracy  for test
Mean square error :29.263809
Mean Absolute error :4.225937
Root mean squared error:5.409603
R2_score:0.879740
Ridge
Models accuracy  for training
Mean square error :28.337788
Mean Absolute error :4.264988
Root mean squared error:5.323325
R2_score:0.874304
---------------------------------------------------------------
Models accuracy  for test
Mean square error :29.056272
Mean Absolute error :4.211101
Root mean squared error:5.390387
R2_score:0.880593
lasso
Models accuracy  for training
Mean square error :43.478404
Mean Absolute error :5.206303
Root mean squared error:6.593816
R2_score:0.807146
---------------------------------------------------------------
Models accuracy  for test
Mean square error :42.506417
Mean Absolute error :5.157882
Ro

[WinError 2] The system cannot find the file specified
  File "d:\NAITIK\PROJECT\venv\lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
  File "d:\NAITIK\PROJECT\venv\lib\subprocess.py", line 489, in run
    with Popen(*popenargs, **kwargs) as process:
  File "d:\NAITIK\PROJECT\venv\lib\subprocess.py", line 854, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "d:\NAITIK\PROJECT\venv\lib\subprocess.py", line 1307, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,


K_neighbour
Models accuracy  for training
Mean square error :32.629450
Mean Absolute error :4.518750
Root mean squared error:5.712219
R2_score:0.855268
---------------------------------------------------------------
Models accuracy  for test
Mean square error :52.586000
Mean Absolute error :5.616000
Root mean squared error:7.251621
R2_score:0.783898
Decison_classifier
Models accuracy  for training
Mean square error :0.156250
Mean Absolute error :0.018750
Root mean squared error:0.395285
R2_score:0.999307
---------------------------------------------------------------
Models accuracy  for test
Mean square error :92.375000
Mean Absolute error :7.625000
Root mean squared error:9.611191
R2_score:0.620385
Linear_reg
Models accuracy  for training
Mean square error :28.380544
Mean Absolute error :4.278750
Root mean squared error:5.327339
R2_score:0.874115
---------------------------------------------------------------
Models accuracy  for test
Mean square error :29.263809
Mean Absolute error 