In [2]:
#----Importing the Necessary Libraries-----------------------

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import pickle
import warnings
from sklearn.pipeline import Pipeline


warnings.filterwarnings('ignore')

In [2]:
#-- Defining the range of max marks of each subject a student can score-----------
maths = np.random.randint(0,76,5000)
logical = np.random.randint(0,76,5000)
english = np.random.randint(0,51,5000)


#-- Calculaing the total marks of the students and adding weights to the subjects---------------------------------
total_score = 0.4*maths + 0.4*logical + 0.2*english

In [3]:
#-- Normalizing the scores to bring them into the range od 0,1---------------

normalized_score = (total_score - total_score.min()) / (total_score.max() - (total_score.min()))

In [4]:
adjusted_score = np.clip(1-normalized_score, 0 , 1)

In [5]:
# Map to rank in range [1, 70,000]
ranks = (adjusted_score * 70000).astype(int) + 1

In [6]:
# Clip to ensure max rank = 70000
ranks = np.clip(ranks, 1, 70000)

In [7]:
# Create DataFrame
df = pd.DataFrame({
    'Maths': maths,
    'Logical Reasoning': logical,
    'English': english,
    'Rank': ranks
})

In [8]:
df.head()

Unnamed: 0,Maths,Logical Reasoning,English,Rank
0,24,35,38,38542
1,71,38,41,17084
2,73,68,46,2709
3,4,11,41,56251
4,24,51,6,38542


In [9]:
df.shape

(5000, 4)

In [10]:
df.to_csv('Student_Data.csv', index=False)

In [11]:
index = ['LinearRegression', 'RandomForestRegressor', 'XGBRegressor']
columns = ['train_rmse','test_rmse', 'train_r2', 'test_r2']

analysis_df = pd.DataFrame(index=index, columns=columns)
analysis_df

Unnamed: 0,train_rmse,test_rmse,train_r2,test_r2
LinearRegression,,,,
RandomForestRegressor,,,,
XGBRegressor,,,,


In [12]:
def evaluate_regression_model(model, X_train, X_test, y_train, y_test, analysis_df, model_name):
    # Predicting
    y_train_predict = model.predict(X_train)
    y_test_predict = model.predict(X_test)
    
    # Metrics
    train_rmse = root_mean_squared_error(y_train, y_train_predict)
    train_r2 = r2_score(y_train, y_train_predict)
    test_rmse = root_mean_squared_error(y_test, y_test_predict)
    test_r2 = r2_score(y_test, y_test_predict)

    # Printing report
    print(f"📊 Regression Report for {model_name}")
    print(f"Train Root Mean Squared Error (RMSE): {train_rmse:.2f}")
    print(f"Test Root Mean Squared Error (RMSE): {test_rmse:.2f}")
    print(f"Train R² Score: {train_r2:.4f}")
    print(f"Test R² Score: {test_r2:.4f}")
    print("-" * 40)

    # Updating DataFrame
    analysis_df.loc[model_name] = [train_rmse,test_rmse, train_r2, test_r2]
    
    return analysis_df


In [13]:
X = df.drop(columns=['Rank'])
y = df['Rank']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()

sc = StandardScaler()

X_train_scaled = sc.fit_transform(X_train_scaled)
X_test_scaled = sc.transform(X_test_scaled)

In [14]:
linear_model = LinearRegression()

In [15]:
linear_model.fit(X_train_scaled, y_train)

In [16]:
#--------Function Call----------------------

evaluate_regression_model(linear_model, X_train_scaled, X_test_scaled, y_train, y_test, analysis_df, 'LinearRegression')

📊 Regression Report for LinearRegression
Train Root Mean Squared Error (RMSE): 0.31
Test Root Mean Squared Error (RMSE): 0.31
Train R² Score: 1.0000
Test R² Score: 1.0000
----------------------------------------


Unnamed: 0,train_rmse,test_rmse,train_r2,test_r2
LinearRegression,0.31289,0.311016,1.0,1.0
RandomForestRegressor,,,,
XGBRegressor,,,,


In [17]:
rf_regressor = RandomForestRegressor()

In [18]:
rf_regressor.fit(X_train, y_train)

In [19]:
#--------Function Call----------------------

evaluate_regression_model(rf_regressor, X_train, X_test, y_train, y_test, analysis_df, 'RandomForestRegressor')

📊 Regression Report for RandomForestRegressor
Train Root Mean Squared Error (RMSE): 242.73
Test Root Mean Squared Error (RMSE): 653.89
Train R² Score: 0.9997
Test R² Score: 0.9975
----------------------------------------


Unnamed: 0,train_rmse,test_rmse,train_r2,test_r2
LinearRegression,0.31289,0.311016,1.0,1.0
RandomForestRegressor,242.727969,653.890334,0.999656,0.997483
XGBRegressor,,,,


In [20]:
xgb_model = XGBRegressor()

In [21]:
xgb_model.fit(X_train, y_train)

In [22]:
#--------Function Call----------------------

evaluate_regression_model(xgb_model, X_train, X_test, y_train, y_test, analysis_df, 'XGBRegressor')

📊 Regression Report for XGBRegressor
Train Root Mean Squared Error (RMSE): 267.12
Test Root Mean Squared Error (RMSE): 569.85
Train R² Score: 0.9996
Test R² Score: 0.9981
----------------------------------------


Unnamed: 0,train_rmse,test_rmse,train_r2,test_r2
LinearRegression,0.31289,0.311016,1.0,1.0
RandomForestRegressor,242.727969,653.890334,0.999656,0.997483
XGBRegressor,267.117992,569.852189,0.999584,0.998088


In [23]:
rank_model = Pipeline(
    [
        ('scaler', sc),
        ('model', linear_model)
    ]
)

In [4]:
Maths = int(input('Maths: '))
Logical = int(input('Logical: '))
English = int(input('English: '))

Maths:  40
Logical:  40
English:  20


In [5]:
testing_df = pd.DataFrame(
    {
        'Maths': [Maths],
        'Logical Reasoning': [Logical],
        'English': [English]
    }
)

In [26]:
rank_model.predict(testing_df)

array([33542.24074867])

In [27]:
with open('rank_model.pkl', 'wb') as fp:
    pickle.dump(rank_model, fp)

In [3]:
with open('rank_model.pkl','rb') as fp:
    model = pickle.load(fp)

In [6]:
model.predict(testing_df)

array([33542.24074867])