# Author: Sandesh Basnet

# Graduate Admission Prediction

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./Datasets//Admission_Predict.csv')

In [3]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [4]:
df.shape

(400, 9)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Serial No.         400 non-null    int64  
 1   GRE Score          400 non-null    int64  
 2   TOEFL Score        400 non-null    int64  
 3   University Rating  400 non-null    int64  
 4   SOP                400 non-null    float64
 5   LOR                400 non-null    float64
 6   CGPA               400 non-null    float64
 7   Research           400 non-null    int64  
 8   Chance of Admit    400 non-null    float64
dtypes: float64(4), int64(5)
memory usage: 28.3 KB


In [6]:
df.isnull().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [7]:
df.drop_duplicates(inplace= True)

In [8]:
df.shape

(400, 9)

In [9]:
df.drop(['Serial No.'], axis= 1, inplace= True)

In [10]:
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


In [11]:
df.describe()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,316.8075,107.41,3.0875,3.4,3.4525,8.598925,0.5475,0.72435
std,11.473646,6.069514,1.143728,1.006869,0.898478,0.596317,0.498362,0.142609
min,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,308.0,103.0,2.0,2.5,3.0,8.17,0.0,0.64
50%,317.0,107.0,3.0,3.5,3.5,8.61,1.0,0.73
75%,325.0,112.0,4.0,4.0,4.0,9.0625,1.0,0.83
max,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [12]:
import sweetviz

In [13]:
df.columns

Index(['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR ', 'CGPA',
       'Research', 'Chance of Admit '],
      dtype='object')

In [14]:
report = sweetviz.analyze(df, target_feat= 'Chance of Admit ')

                                             |                                             | [  0%]   00:00 ->…

In [15]:
report.show_html('Admission_Prediction.html')

In [16]:
df['Chance of Admit '].value_counts()

0.64    17
0.71    16
0.72    15
0.73    13
0.94    12
0.79    12
0.76    12
0.70    12
0.78    12
0.74    11
0.80    11
0.68    10
0.93     9
0.84     9
0.89     9
0.65     9
0.62     9
0.82     8
0.86     8
0.75     8
0.90     8
0.57     8
0.81     8
0.77     8
0.91     7
0.69     7
0.96     7
0.66     7
0.61     7
0.67     7
0.85     6
0.63     6
0.56     6
0.92     6
0.58     5
0.46     5
0.87     5
0.54     5
0.52     5
0.47     5
0.59     4
0.49     4
0.97     4
0.88     4
0.95     4
0.53     3
0.48     3
0.44     3
0.42     3
0.83     3
0.38     2
0.34     2
0.36     2
0.45     2
0.50     2
0.43     1
0.51     1
0.55     1
0.60     1
0.39     1
Name: Chance of Admit , dtype: int64

In [17]:
X = df.drop(['Chance of Admit '], axis = 1)
y = df['Chance of Admit ']

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [20]:
from sklearn.preprocessing import StandardScaler

In [21]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [22]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score

In [23]:
linear_reg = LinearRegression()
dtree = DecisionTreeRegressor()
svr = SVR()
gradient = GradientBoostingRegressor()

In [24]:
linear_reg.fit(X_train, y_train)
dtree.fit(X_train, y_train)
svr.fit(X_train, y_train)
gradient.fit(X_train, y_train)

In [25]:
pred_linear_reg = linear_reg.predict(X_test)
pred_dtree = dtree.predict(X_test)
pred_svr = svr.predict(X_test)
pred_gradient = gradient.predict(X_test)

In [26]:
score1 = r2_score(y_test, pred_linear_reg)
score2 = r2_score(y_test, pred_dtree)
score3 = r2_score(y_test, pred_svr)
score4 = r2_score(y_test, pred_gradient)

In [29]:
report = {
    'Model': ['Linear Regression', 'Decision Tree Regressor', 'SVR', 'Gradient Boosting Regressor'],
    'R2_Score': [score1, score2, score3, score4]
}

In [30]:
Report = pd.DataFrame(report)
Report

Unnamed: 0,Model,R2_Score
0,Linear Regression,0.848377
1,Decision Tree Regressor,0.582504
2,SVR,0.773987
3,Gradient Boosting Regressor,0.786721
