In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_absolute_error
import pickle
import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv('/content/Admission_Predict.csv')

In [None]:
df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.00,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.80
4,5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
395,396,324,110,3,3.5,3.5,9.04,1,0.82
396,397,325,107,3,3.0,3.5,9.11,1,0.84
397,398,330,116,4,5.0,4.5,9.45,1,0.91
398,399,312,103,3,3.5,4.0,8.78,0,0.67


In [None]:
def eda(df):
  print("Shape of dataset: ", df.shape)
  print()
  print("Missing values: ", df.isnull().sum())
  print()
  print("Data types: ", df.dtypes)
  print()
  print(df.describe())

In [None]:
eda(df)

Shape of dataset:  (400, 9)

Missing values:  Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

Data types:  Serial No.             int64
GRE Score              int64
TOEFL Score            int64
University Rating      int64
SOP                  float64
LOR                  float64
CGPA                 float64
Research               int64
Chance of Admit      float64
dtype: object

       Serial No.   GRE Score  TOEFL Score  University Rating         SOP  \
count  400.000000  400.000000   400.000000         400.000000  400.000000   
mean   200.500000  316.807500   107.410000           3.087500    3.400000   
std    115.614301   11.473646     6.069514           1.143728    1.006869   
min      1.000000  290.000000    92.000000           1.000000    1.000000   
25%    100.750000  308.000000   103.000000           2.0000

In [None]:
def prepare_x_y(df,target_column):
  x=df.drop(target_column, axis=1)
  y=df[target_column]

  print("Feature shape: ", x.shape)
  print("Target shape: ", y.shape)

  return x,y

In [None]:
x,y=prepare_x_y(df, 'Chance of Admit ')

Feature shape:  (400, 8)
Target shape:  (400,)


In [None]:
def split_data(x,y,test_size=0.2):
  x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=test_size,random_state=42)
  print("Train Size: ", x_train.shape)
  print("Test Size: ", x_test.shape)

  return x_train,x_test, y_train, y_test

In [None]:
x_train, x_test,y_train,y_test=split_data(x,y)

Train Size:  (320, 8)
Test Size:  (80, 8)


In [None]:
def train_model(model,x_train,y_train):
  model.fit(x_train,y_train)
  print(f"{type(model).__name__ } trained successfully!")

  return model

model=train_model(LinearRegression(),x_train,y_train)

LinearRegression trained successfully!


In [None]:
def evaluate_model(model, x_test,y_test):
  prediction=model.predict(x_test)

  r2=r2_score(y_test,prediction)
  mae=mean_absolute_error(y_test,prediction)

  print("R2 Score: ", r2)
  print("MAE: ", mae)

  return r2,mae

In [None]:
r2,mae=evaluate_model(model,x_test,y_test)

R2 Score:  0.8212241793299226
MAE:  0.04994339166543271


In [None]:
def save_model(model,path):
  with open(path, 'wb') as f:
    pickle.dump(model, f)
  print(f"Model saved: {path}")
save_model(model,'model.pkl')

Model saved: model.pkl


In [None]:
def load_model(path):
  with open(path, 'rb') as f:
    model=pickle.load(f)
  print("Model Loaded!")
  return model

model=load_model('model.pkl')
sample=x_test.iloc[0:1]
print("Sample Model prediction: ",model.predict(sample))
print("Actual value: ", y_test.iloc[0])

Model Loaded!
Sample Model prediction:  [0.65574435]
Actual value:  0.68


In [None]:
df.columns


Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',
       'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='object')