In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,OneHotEncoder,PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,root_mean_squared_error
from xgboost import XGBRegressor

In [2]:
df = pd.read_csv("Exam_Score_Prediction.csv")
x  =df.iloc[:,:-1]
y =df.iloc[:,-1]

In [3]:
x_train,x_test,y_train ,y_test = train_test_split(x,y,test_size=0.2,random_state=0)
Scale_colums = ["age","study_hours","class_attendance","sleep_hours"]
encode_colums = ["gender","course","internet_access","sleep_quality","study_method","facility_rating","exam_difficulty"]
preprocessor= ColumnTransformer(transformers=[("encoder", OneHotEncoder(), encode_colums),("scaler",StandardScaler(),Scale_colums)], remainder="passthrough")


In [4]:
lin_reg=Pipeline(steps=[
    ("preprocessor",preprocessor),
    ("model",LinearRegression())
]) 
lin_reg.fit(x_train,y_train)
lin_reg_pred =lin_reg.predict(x_test)

In [5]:
svm =Pipeline(steps=[
    ("preprocessor",preprocessor),
    ("model",SVR(kernel="rbf"))
])
svm.fit(x_train,y_train)
svr_pred = svm.predict(x_test)

In [7]:
dt =Pipeline(steps=[
    ("preprocessor",preprocessor),
    ("model",DecisionTreeRegressor(random_state=0))
])
dt.fit(x_train,y_train)
dt_pred=dt.predict(x_test)

In [8]:
rf=Pipeline(steps=[
    ("preprocessor",preprocessor),
    ("model",RandomForestRegressor(n_estimators=150, random_state=0))
])
rf.fit(x_train,y_train)
rf_pred=rf.predict(x_test)

In [9]:
xgboost =Pipeline(steps=[
    ("preprocessor",preprocessor),
    ("model",XGBRegressor(n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='reg:squarederror',
    random_state=0))
])
xgboost.fit(x_train,y_train)
xg_pred=xgboost.predict(x_test)

In [10]:
def report(prediction,y_test):
    for name,pred in prediction:
        print(f"{name}Report\nR2 score:{r2_score(y_test,pred)}\nRMSE: {root_mean_squared_error(y_test,pred)}")

In [12]:
predictions= [("LinearRegression",lin_reg_pred),("SVR",svr_pred),("decision Tree",dt_pred),("Random Forest",rf_pred),("XGbosst",xg_pred)]
report(predictions,y_test)

LinearRegressionReport
R2 score:0.7401527601962383
RMSE: 9.65562840837304
SVRReport
R2 score:0.7290530998906866
RMSE: 9.859697326904028
decision TreeReport
R2 score:0.41160825006575774
RMSE: 14.529637432374216
Random ForestReport
R2 score:0.713582383418395
RMSE: 10.137277918938778
XGbosstReport
R2 score:0.7202469862772152
RMSE: 10.018642430870393
