In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter("ignore")

In [2]:
df = pd.read_csv("admission_analyzed.csv")
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


In [3]:
df.shape

(500, 8)

In [4]:
X = df.drop("Chance of Admit", axis = 1)
y = df["Chance of Admit"]

# ***Libraries***

In [28]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_absolute_error

In [6]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

***Train_Test_Split***

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 46)

# ***LinearRegression***

In [9]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = LinearRegression()

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8217259546584925
Testing Accuracy : 0.8015172695523146
CV Score : 0.8086484605529461
MAE : 0.03674727259337707


# ***Ridge***

In [70]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = Ridge(alpha = 10)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8212368411024896
Testing Accuracy : 0.8072410397558862
CV Score : 0.809408570522834
MAE : 0.03615559769346537


# ***Lasso***

In [14]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = Lasso(alpha = 0.001)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8215239909723397
Testing Accuracy : 0.8074766702806122
CV Score : 0.8089342459559724
MAE : 0.03624943143801186


# ***ElasticNet***

In [15]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = ElasticNet(alpha = 0.001, l1_ratio = 0.4)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.821690840408435
Testing Accuracy : 0.804149668181085
CV Score : 0.8091097866976348
MAE : 0.03651267745715218


# ***SVR***

In [65]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = SVR(C = 0.1, kernel = "linear")

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.7875971122530621
Testing Accuracy : 0.7291717866899476
CV Score : 0.7733868686426187
MAE : 0.048557490938415125


# ***KNeighborsRegressor***

In [64]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = KNeighborsRegressor(n_neighbors = 5)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8740294656634049
Testing Accuracy : 0.6772822905955864
CV Score : 0.7889101466418752
MAE : 0.04637999999999999


# ***DecisionTreeRegressor***

In [63]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = DecisionTreeRegressor(max_depth = 4, random_state = 1)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8342451730876681
Testing Accuracy : 0.7068224286999568
CV Score : 0.7314207377665272
MAE : 0.04591372114646496


# ***RandomForestRegressor***

In [62]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = RandomForestRegressor(n_estimators = 18, random_state = 0)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.9629053489988468
Testing Accuracy : 0.745791356755263
CV Score : 0.753023499373942
MAE : 0.03968333333333334


# ***AdaBoostRegressor***

In [61]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = AdaBoostRegressor(n_estimators = 5, random_state = 0)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.7944841390008756
Testing Accuracy : 0.7453844029327511
CV Score : 0.7537153837373033
MAE : 0.04309183829889921


# ***GradientBoostingRegressor***

In [60]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = GradientBoostingRegressor(n_estimators = 18, random_state = 0, learning_rate = 0.2)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8795397149984258
Testing Accuracy : 0.7886710934623241
CV Score : 0.7749638233688293
MAE : 0.03692709544332131


# ***XGBRegressor***

In [59]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = XGBRegressor(n_estimators = 25, random_state = 0, learning_rate = 0.3, gamma = 0.1)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8157110499991213
Testing Accuracy : 0.7954104096829382
CV Score : 0.753348012208291
MAE : 0.03936390671730041


# ***FINAL MODEL - RIDGE***

In [71]:
step1 = ColumnTransformer(transformers = [
    ("scaler", StandardScaler(), [0, 1, 2, 3, 4, 5])
], remainder = "passthrough")

model = Ridge(alpha = 10)

pipe = Pipeline([
    ("scaler", step1),
    ("model", model)
])

pipe.fit(X_train, y_train)
pred_test = pipe.predict(X_test)
print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("CV Score :", cross_val_score(pipe, X_train, y_train, cv = 5).mean())
print("MAE :", mean_absolute_error(y_test, pred_test))

Training Accuracy : 0.8212368411024896
Testing Accuracy : 0.8072410397558862
CV Score : 0.809408570522834
MAE : 0.03615559769346537


In [72]:
X.head(1)

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337,118,4,4.5,4.5,9.65,1


In [84]:
import pickle
with open("admission_ridge_model.pkl", "wb") as file:
    pickle.dump(pipe, file)

In [85]:
with open("admission_ridge_model.pkl", "rb") as file:
    loaded = pickle.load(file)

In [88]:
final = pd.DataFrame({
    "GRE Score": 337,
    "TOEFL Score": 118,
    "University Rating": 4,
    "SOP": 4.5,
    "LOR": 4.5,
    "CGPA": 8.65,
    "Research": 0,
}, index = [0])

chance = round((loaded.predict(final)[0])*100, 2)
print(f"You have {chance} % of chances to get admission.")

You have 81.26 % of chances to get admission.
