# Importing Libraries

In [41]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# Reading and Exploring files

In [42]:
df = pd.read_csv('Admission Chance.csv')

In [43]:
df.head(5)

Unnamed: 0,Serial No,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Serial No          400 non-null    int64  
 1   GRE Score          400 non-null    int64  
 2   TOEFL Score        400 non-null    int64  
 3   University Rating  400 non-null    int64  
 4    SOP               400 non-null    float64
 5   LOR                400 non-null    float64
 6   CGPA               400 non-null    float64
 7   Research           400 non-null    int64  
 8   Chance of Admit    400 non-null    float64
dtypes: float64(4), int64(5)
memory usage: 28.2 KB


In [45]:
df.describe()

Unnamed: 0,Serial No,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,200.5,316.8075,107.41,3.0875,3.4,3.4525,8.598925,0.5475,0.72435
std,115.614301,11.473646,6.069514,1.143728,1.006869,0.898478,0.596317,0.498362,0.142609
min,1.0,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,100.75,308.0,103.0,2.0,2.5,3.0,8.17,0.0,0.64
50%,200.5,317.0,107.0,3.0,3.5,3.5,8.61,1.0,0.73
75%,300.25,325.0,112.0,4.0,4.0,4.0,9.0625,1.0,0.83
max,400.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [46]:
df.columns

Index(['Serial No', 'GRE Score', 'TOEFL Score', 'University Rating', ' SOP',
       'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='object')

In [47]:
df.rename(columns = {'GRE Score': 'GRE_Score', 'TOEFL Score': 'TOEFL_Score', 'University Rating': 'University_Rating', 'Chance of Admit ': 'Chance_of_Admit'}, inplace = True)

In [48]:
df.columns

Index(['Serial No', 'GRE_Score', 'TOEFL_Score', 'University_Rating', ' SOP',
       'LOR ', 'CGPA', 'Research', 'Chance_of_Admit'],
      dtype='object')

# Feature selection

In [49]:
y = df['Chance_of_Admit']
df.drop(['Serial No'], axis = 1, inplace = True)

In [50]:
X = df.drop(['Chance_of_Admit'], axis = 1)

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2529)

In [52]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((320, 7), (320,), (80, 7), (80,))

# Multiple Linear Regression Model

In [54]:
from sklearn.linear_model import LinearRegression
lin_df = LinearRegression()
lin_df = lin_df.fit(X_train, y_train)

In [55]:
lin_df.intercept_

np.float64(-1.2695069799364993)

In [56]:
lin_df.coef_

array([ 0.00174151,  0.00348526,  0.00433217, -0.00340044,  0.02129946,
        0.11459488,  0.01967799])

In [57]:
y_pred = lin_df.predict(X_test)

# Metrices

In [58]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

In [59]:
mean_absolute_error(y_test, y_pred)

np.float64(0.04775760019872856)

In [60]:
mean_absolute_percentage_error(y_test, y_pred)

np.float64(0.083061628535216)

In [61]:
mean_squared_error(y_test, y_pred)

np.float64(0.004483789262621526)

In [62]:
r2_score(y_test, y_pred)

0.8079643551291407

# Predictions

In [70]:
new_data = pd.DataFrame({'GRE_Score' : [314, 300, 314, 330, 340], 'TOEFL_Score' : [98, 110, 98, 116, 120], 'University_Rating' : [4, 4, 3, 3, 5], ' SOP' : [4,4,4,4,5], 'LOR ' : [3, 3, 3, 4, 5], 'CGPA' : [8.62, 8.20, 8.7, 8.9, 9.3], 'Research' : [1, 1, 1, 1, 1]})

In [71]:
predictions = lin_df.predict(new_data)

In [72]:
predictions

array([0.69399449, 0.66330657, 0.6988299 , 0.8336472 , 0.93740468])