#                              Admission chance prediction


This dataset is created for prediction of Graduate Admissions from an Indian perspective.
Content
The dataset contains several parameters which are considered important during the application for Masters Programs.
The parameters included are :
1.	GRE Scores ( out of 340 )
2.	TOEFL Scores ( out of 120 )
3.	University Rating ( out of 5 )
4.	Statement of Purpose and Letter of Recommendation Strength ( out of 5 )
5.	Undergraduate GPA ( out of 10 )
6.	Research Experience ( either 0 or 1 )
7.	Chance of Admit ( ranging from 0 to 1 )


# importing basic libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# reading the data

In [None]:
df=pd.read_csv('../input/admission-chance-prediction/Admission_Prediction.csv')
df=df.drop('Serial No.',axis=1)
print(df)


# Rename columns to make their use easier.

In [None]:

df=df.rename(columns={'Serial No.':'no','GRE Score':'gre','TOEFL Score':'toefl','University Rating':'rating','SOP':'sop','LOR ':'lor',
                           'CGPA':'gpa','Research':'research','Chance of Admit ':'chance'})

In [None]:
print(df.dtypes)

In [None]:
print('Shape of the data:',df.shape)


In [None]:
print(df.head())

In [None]:
print(df.tail())

In [None]:
print(df.describe())

In [None]:
# Average GRE Score of the students

GRE = df['gre'].mean()
TOEFLScore = df['toefl'].mean()
cgpa = df['gpa'].mean()

# sop - statement of purpose
# lor - letter of recommendation

SOP = np.round(df['sop'].mean())
LOR = np.round(df['lor'].mean())

Research = np.round(df['research'].mean())
uni_rating = np.round(df['rating'].mean())

# printing the results
  
print("The average Score for GRE is                                        :",GRE)
print("The average Score for TOEFL is                                      :",TOEFLScore)
print("The average CGPA is                                                 :",cgpa)
print("The average Number for Statement of Purpose is                      :", SOP)
print("The average Number for Recommendation letters among the students is :", LOR)
print("The average Number of Research done by students is                  :", Research)
print("The average University Ratings of Different Students is             :", uni_rating)

In [None]:
# Average GRE Score of the students

GRE = df['gre'].min()
TOEFLScore = df['toefl'].min()
cgpa = df['gpa'].min()

# sop - statement of purpose
# lor - letter of recommendation

SOP = np.round(df['sop'].min())
LOR = np.round(df['lor'].min())

Research = np.round(df['research'].min())
uni_rating = np.round(df['rating'].min())

# printing the results
  
print("The Minimum Score for GRE is                                        :",GRE)
print("The Minimum Score for TOEFL is                                      :",TOEFLScore)
print("The Minimum CGPA is                                                 :",cgpa)
print("The Minimum Number for Statement of Purpose is                      :", SOP)
print("The Minimum Number for Recommendation letters among the students is :", LOR)
print("The Minimum Number of Research done by students is                  :", Research)
print("The Minimum University Ratings of Different Students is             :", uni_rating)

In [None]:
#checking null values

print(df.isna().sum())
print(df.isnull().any().any())

# spilitting x and y 

In [None]:
x=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
print(x)
print(y)
print('shape of x is:',x.shape)
print('shape of y is:',y.shape)

In [None]:
plt.plot(x, y)
plt.grid()
plt.show()

# plots

In [None]:
print(sns.pairplot(df))


In [None]:

df.hist(bins=10, figsize=(20,15))
plt.show()

In [None]:
plt.figure(figsize=(12,8))
plt.plot(range(len(df[df['research']==1])), df[df['research']==1]['chance'], color='red')
plt.plot(range(len(df[df['research']==0])), df[df['research']==0]['chance'], color='blue')
plt.show()

In [None]:
corr = df.corr()
sns.heatmap(corr, xticklabels=corr.columns.values,yticklabels=corr.columns.values)
   

# performing standardscalar before splitting training and test data 

In [None]:
#feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)
print(x)
print('shape of the x is:',x.shape)

# splitting train and test data

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

print("Shape of x_train :", x_train.shape)
print("Shape of x_test :", x_test.shape)
print("Shape of y_train :", y_train.shape)
print("Shape of y_test :", y_test.shape)
print(x_train)
print(x_test)

# performing standardscalar after splitting training and test data¶

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train)
print(x_test)
print(x_train.shape)
print(x_test.shape)

# applying ML models

# applying linear regression model

In [None]:
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

lr = LinearRegression()
lr.fit(x_train, y_train)

y_pred = lr.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(" Mean Squared Error      : ",mse)
print(" Root Mean Squared Error : ",rmse)
print(" R-Squared Error         :", r2)


plt.scatter(y_test,y_pred,c='blue')
plt.grid()
plt.xlabel('y_test')
plt.ylabel('y_pred')
          
plt.show()


# applying  svm model

In [None]:
from sklearn.svm import SVR

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

svr = SVR(kernel = 'linear')
svr.fit(x_train, y_train)

svr_pred = svr.predict(x_test)

mse = mean_squared_error(y_test, svr_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, svr_pred)

print(" Mean Squared Error      : ",mse)
print(" Root Mean Squared Error : ",rmse)
print(" R-Squared Error         : ", r2)

plt.scatter(y_test,svr_pred,c='red')
plt.grid()
plt.xlabel('y_test')
plt.ylabel('svr_pred')
          
plt.show()

# applying extratree regressor 

In [None]:
from sklearn.ensemble import ExtraTreesRegressor

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

etr = ExtraTreesRegressor()
etr.fit(x_train, y_train)

etr_pred = etr.predict(x_test)

mse = mean_squared_error(y_test, etr_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, etr_pred)

print(" Mean Squared Error       : ",mse)
print("Root Mean Squared Error   : ",rmse)
print("R-Squared Error           :", r2)

plt.scatter(y_test,etr_pred,c='green')
plt.grid()
plt.xlabel('y_test')
plt.ylabel('svr_pred')
          
plt.show()

# applying randomforestregressor

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
rfr = RandomForestRegressor(n_estimators=20,random_state=42)
rfr.fit(x_train, y_train)
y_pred=rfr.predict(x_test)
r2=r2_score(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)

print(" Mean Squared Error       : ",mse)
print("Root Mean Squared Error   : ",rmse)
print("R-Squared Error           :", r2)

plt.scatter(y_test,y_pred,c='yellow')
plt.grid()
plt.xlabel('y_test')
plt.ylabel('svr_pred')
          
plt.show()