### Importing Required Libraries and Data set

In [142]:
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score

In [143]:
df=pd.read_csv("C:/Users/hemak/Downloads/Admission_Predict.csv")

### Data Analysis

In [144]:
df.shape

(400, 9)

In [145]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [146]:
df.sample(10)

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
33,34,340,114,5,4.0,4.0,9.6,1,0.9
340,341,312,107,3,3.0,3.0,8.46,1,0.75
347,348,299,94,1,1.0,1.0,7.34,0,0.42
46,47,329,114,5,4.0,5.0,9.3,1,0.86
299,300,305,112,3,3.0,3.5,8.65,0,0.71
3,4,322,110,3,3.5,2.5,8.67,1,0.8
342,343,308,106,3,3.0,3.0,8.24,0,0.58
177,178,319,110,3,3.5,3.5,9.04,0,0.82
112,113,301,107,3,3.5,3.5,8.34,1,0.62
226,227,306,110,2,3.5,4.0,8.45,0,0.63


In [147]:
df.nunique()

Serial No.           400
GRE Score             49
TOEFL Score           29
University Rating      5
SOP                    9
LOR                    9
CGPA                 168
Research               2
Chance of Admit       60
dtype: int64

In [148]:
df.describe(include='all')

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,200.5,316.8075,107.41,3.0875,3.4,3.4525,8.598925,0.5475,0.72435
std,115.614301,11.473646,6.069514,1.143728,1.006869,0.898478,0.596317,0.498362,0.142609
min,1.0,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,100.75,308.0,103.0,2.0,2.5,3.0,8.17,0.0,0.64
50%,200.5,317.0,107.0,3.0,3.5,3.5,8.61,1.0,0.73
75%,300.25,325.0,112.0,4.0,4.0,4.0,9.0625,1.0,0.83
max,400.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [149]:
df.isnull().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [150]:
df['Prob_of_admission']=df.iloc[:, -1]

### Data Preprocessing

In [151]:
X=df.drop(['Prob_of_admission'],axis=1)
y=df['Prob_of_admission']

In [152]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [153]:
print(X_train,y_train)

     Serial No.  GRE Score  TOEFL Score  University Rating  SOP  LOR   CGPA  \
336         337        319          110                  3  3.0   2.5  8.79   
64           65        325          111                  3  3.0   3.5  8.70   
55           56        320          103                  3  3.0   3.0  7.70   
106         107        329          111                  4  4.5   4.5  9.18   
300         301        309          106                  2  2.5   2.5  8.00   
..          ...        ...          ...                ...  ...   ...   ...   
323         324        305          102                  2  2.0   2.5  8.18   
192         193        322          114                  5  4.5   4.0  8.94   
117         118        290          104                  4  2.0   2.5  7.46   
47           48        339          119                  5  4.5   4.0  9.70   
172         173        322          110                  4  4.0   5.0  9.13   

     Research  Chance of Admit   
336         0    

In [154]:
models = [
    RandomForestRegressor(),
    LinearRegression(),
    DecisionTreeRegressor(),
    GradientBoostingRegressor()
]

In [157]:
for regressor in models:
    print("\n-------------------------")
    print(f'Regressor: {regressor.__class__.__name__}')
    
    regressor.fit(X_train, y_train)
    
    pred = regressor.predict(X_test)
    mse, mae ,r2= mean_squared_error(pred, y_test), mean_absolute_error(pred, y_test), r2_score(y_test,pred)
    print(f'Mean Squared Error: {mse}\tMean Absolute Error: {mae} \tR2 score: {r2}\n\n')


-------------------------
Regressor: RandomForestRegressor
Mean Squared Error: 6.335375000000109e-06	Mean Absolute Error: 0.000898750000000241 	R2 score: 0.9996512794578913



-------------------------
Regressor: LinearRegression
Mean Squared Error: 4.844484182111653e-31	Mean Absolute Error: 5.641320743876576e-16 	R2 score: 1.0



-------------------------
Regressor: DecisionTreeRegressor
Mean Squared Error: 3.0000000000000004e-05	Mean Absolute Error: 0.0012500000000000352 	R2 score: 0.9983486981807298



-------------------------
Regressor: GradientBoostingRegressor
Mean Squared Error: 3.7413431809320103e-06	Mean Absolute Error: 0.00038626756010712857 	R2 score: 0.9997940637732937




### Model Summary

### Considering the provided metrics and the potential concerns, both the Random Forest Regressor and the Gradient Boosting Regressor appear to be strong candidates for the best model. They consistently achieved low MSE and MAE values with high R2 scores, indicating excellent predictiveperformance. 

### Between the two, Gradient Boosting Regressor might be slightly preferable due to its ensemble nature, which generally reduces overfitting. 