In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.simplefilter("ignore")

In [2]:
df = pd.read_csv("admission_analyzed.csv")
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


In [3]:
X = df.drop("Chance of Admit", axis = 1)
y = df["Chance of Admit"]

***Train Test Split***

In [23]:
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

***Finding Best Random State***

In [15]:
train = []
test = []
cv = []
mae = []

for i in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = i)

    step = ColumnTransformer(transformers = [
        ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
    ], remainder = "passthrough")

    pipe = Pipeline([
        ("step", step),
        # ("pca", PCA(n_components = 1)),
        ("model", LinearRegression())
    ])

    pipe.fit(X_train, y_train)
    pred_train = pipe.predict(X_train)
    pred_test = pipe.predict(X_test)

    train_accuracy = pipe.score(X_train, y_train)
    test_accuracy = pipe.score(X_test, y_test)

    mae_score = mean_absolute_error(y_test, pred_test)
    cv_score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

    train.append(train_accuracy)
    test.append(test_accuracy)
    cv.append(cv_score)
    mae.append(mae_score)

ff = pd.DataFrame({
    "Train Model": train,
    "Test Model": test,
    "CV": cv,
    "MAE": mae
})

In [16]:
ff.sort_values(by = "MAE", ascending = True)

Unnamed: 0,Train Model,Test Model,CV,MAE
37,0.809355,0.870252,0.800884,0.034781
46,0.821726,0.801517,0.808648,0.036747
28,0.816858,0.844576,0.795360,0.037469
31,0.814673,0.848832,0.805871,0.037697
99,0.813026,0.852766,0.806158,0.038246
...,...,...,...,...
85,0.831743,0.776164,0.822676,0.049260
94,0.837615,0.748116,0.826114,0.049850
78,0.842624,0.731096,0.832520,0.050759
32,0.821500,0.813323,0.806425,0.052852


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 46)

# ***PolynomialFeatures***

In [20]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", PolynomialFeatures())
])

svc_param_grid = {"model__degree": list(range(1, 10))}

svc_grid = GridSearchCV(pipe, svc_param_grid, cv = 5, scoring = "accuracy")

svc_grid.fit(X_train, y_train)

svc_grid.best_params_

{'model__degree': 1}

# ***Ridge***

In [27]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", Ridge())
])

svc_param_grid = {"model__alpha": [0.001, 0.1, 0, 10]}

svc_grid = GridSearchCV(pipe, svc_param_grid, cv = 5, scoring = "r2")

svc_grid.fit(X_train, y_train)

svc_grid.best_params_

{'model__alpha': 10}

# ***Lasso***

In [28]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", Lasso())
])

svc_param_grid = {"model__alpha": [0.001, 0.1, 0, 10, 100]}

svc_grid = GridSearchCV(pipe, svc_param_grid, cv = 5, scoring = "r2")

svc_grid.fit(X_train, y_train)

svc_grid.best_params_

{'model__alpha': 0.001}

# ***ElasticNet***

In [31]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", ElasticNet())
])

svc_param_grid = {"model__alpha": [0.001, 0.1, 0, 10, 100], "model__l1_ratio": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,1]}

svc_grid = GridSearchCV(pipe, svc_param_grid, cv = 5, scoring = "r2")

svc_grid.fit(X_train, y_train)

svc_grid.best_params_

{'model__alpha': 0.001, 'model__l1_ratio': 0.4}

# ***SVR***

In [32]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", SVR())
])

svc_param_grid = {"model__C": [0.001, 0.1, 0, 10, 100], "model__kernel": ["linear", "poly", "sigmoid", "rbf"]}

svc_grid = GridSearchCV(pipe, svc_param_grid, cv = 5, scoring = "r2")

svc_grid.fit(X_train, y_train)

svc_grid.best_params_

{'model__C': 0.1, 'model__kernel': 'linear'}

# ***KNeighborsRegressor***

In [34]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", KNeighborsRegressor())
])

knn_param_grid = {"model__n_neighbors": list(range(1, 20))}

knn_grid = GridSearchCV(pipe, knn_param_grid, cv = 5, scoring = "r2")

knn_grid.fit(X_train, y_train)

knn_grid.best_params_

{'model__n_neighbors': 5}

# ***DecisionTreeRegressor***

In [35]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", DecisionTreeRegressor())
])

decision_param_grid = {"model__max_depth": list(range(1, 40))}

decision_grid = GridSearchCV(pipe, decision_param_grid, cv = 5, scoring = "r2")

decision_grid.fit(X_train, y_train)

decision_grid.best_params_

{'model__max_depth': 4}

# ***RandomForestRegressor***

In [36]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", RandomForestRegressor())
])

random_param_grid = {"model__n_estimators": list(range(1, 20))}

random_grid = GridSearchCV(pipe, random_param_grid, cv = 5, scoring = "r2")

random_grid.fit(X_train, y_train)

random_grid.best_params_

{'model__n_estimators': 18}

# ***AdaBoostRegressor***

In [37]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", AdaBoostRegressor())
])

ada_param_grid = {"model__n_estimators": list(range(1, 20))}

ada_grid = GridSearchCV(pipe, ada_param_grid, cv = 5, scoring = "r2")

ada_grid.fit(X_train, y_train)

ada_grid.best_params_

{'model__n_estimators': 5}

# ***GradientBoostingRegressor***

In [38]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", GradientBoostingRegressor())
])

gradient_param_grid = {"model__n_estimators": list(range(1, 20)), "model__learning_rate" : [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}

gradient_grid = GridSearchCV(pipe, gradient_param_grid, cv = 5, scoring = "r2")

gradient_grid.fit(X_train, y_train)

gradient_grid.best_params_

{'model__learning_rate': 0.2, 'model__n_estimators': 18}

# ***XGBRegressor***

In [40]:
step = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

pipe = Pipeline([
    ("step", step),
    ("model", XGBRegressor())
])

xgb_param_grid = {"model__n_estimators": list(range(1, 30)), "model__learning_rate" : [0.2, 0.3, 0.4, 0.5],
                 "model__gamma": [0, 0.1, 0.2, 0.3]}

xgb_grid = GridSearchCV(pipe, xgb_param_grid, cv = 5, scoring = "r2")

xgb_grid.fit(X_train, y_train)

xgb_grid.best_params_

{'model__gamma': 0.1, 'model__learning_rate': 0.3, 'model__n_estimators': 25}