In [1]:
import numpy as np
import pandas as pd
import scipy as sp

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, f1_score, accuracy_score, roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder

In [2]:
trainData  = 'https://raw.githubusercontent.com/Datamanim/datarepo/main/admission/train.csv'
testData  = 'https://raw.githubusercontent.com/Datamanim/datarepo/main/admission/test.csv'
subData  = 'https://raw.githubusercontent.com/Datamanim/datarepo/main/admission/submission.csv'

In [3]:
train = pd.read_csv(trainData)
test = pd.read_csv(testData)
sub = pd.read_csv(subData)

In [4]:
train.isnull().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [5]:
X = train.drop(['Serial No.','Chance of Admit'], axis=1)
y = train['Chance of Admit']

X = StandardScaler().fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
rf = RandomForestRegressor(random_state=42).fit(X_train, y_train)
y_pred = rf.predict(X_test)
r2_score(y_test, y_pred)

0.7948207873977469

In [8]:
ab = AdaBoostRegressor(random_state=42).fit(X_train, y_train)
y_pred = ab.predict(X_test)
r2_score(y_test, y_pred)

0.7828170702620636

In [9]:
gb = GradientBoostingRegressor(random_state=42).fit(X_train, y_train)
y_pred = gb.predict(X_test)
r2_score(y_test, y_pred)

0.8226037515231047

In [10]:
pipe = Pipeline([('scaler', StandardScaler()), ('rf', RandomForestRegressor())])
params = [{'rf__random_state':[1, 10, 100], 'rf__n_jobs':[1, 10, 100]}]
cv = GridSearchCV(pipe, params, n_jobs=1).fit(X_train, y_train)
r2_score(y_test, cv.predict(X_test))

0.7970925538768222

In [11]:
test_test = test.drop(['Serial No.'], axis=1)
test_test = StandardScaler().fit_transform(test_test)

In [12]:
test_pred = gb.predict(test_test)
sub[0] = test_pred
sub.to_csv('17782.csv',index=False)