# Imports

In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import MinMaxScaler

# Number of Candidates

In [2]:
no_of_candidates = json.load(open("./Data/no_of_candidates.json"))

# Library

In [3]:
class BidirectionalPredictor:
    def __init__(self, degree=10):
        """
        Initialize predictors for both directions
        """
        self.degree = degree
        # Predictor for marks -> ranks
        self.marks_to_ranks_model = self._create_model()
        # Predictor for ranks -> marks
        self.ranks_to_marks_model = self._create_model()

    def _create_model(self):
        """Helper function to create a model with scaler"""
        return {
            'poly_features': PolynomialFeatures(degree=self.degree),
            'model': LinearRegression(),
            'scaler': MinMaxScaler()
        }

    def train(self, marks, ranks):
        """Train both directional models"""
        # Train marks -> ranks model
        self._train_single_direction(
            self.marks_to_ranks_model,
            np.array(marks).reshape(-1, 1),
            np.array(ranks).reshape(-1, 1)
        )

        # Train ranks -> marks model
        self._train_single_direction(
            self.ranks_to_marks_model,
            np.array(ranks).reshape(-1, 1),
            np.array(marks).reshape(-1, 1)
        )

    def _train_single_direction(self, model_dict, X, y):
        """Helper function to train a single direction"""
        X_scaled = model_dict['scaler'].fit_transform(X)
        X_poly = model_dict['poly_features'].fit_transform(X_scaled)
        model_dict['model'].fit(X_poly, y)

    def predict_rank(self, marks):
        """Predict ranks given marks"""
        return self._predict_single_direction(
            self.marks_to_ranks_model,
            np.array(marks).reshape(-1, 1)
        )

    def predict_marks(self, ranks):
        """Predict marks given ranks"""
        return self._predict_single_direction(
            self.ranks_to_marks_model,
            np.array(ranks).reshape(-1, 1)
        )

    def _predict_single_direction(self, model_dict, X):
        """Helper function for prediction"""
        assert 'scaler' in model_dict, "Key 'scaler' not found in model_dict"
        assert 'poly_features' in model_dict, "Key 'poly_features' not found in model_dict"
        assert 'model' in model_dict, "Key 'model' not found in model_dict"

        X_scaled = model_dict['scaler'].transform(X)
        X_poly = model_dict['poly_features'].transform(X_scaled)
        predictions = model_dict['model'].predict(X_poly)
        return np.round(predictions.flatten())

    def plot_both_directions(self, marks, ranks):
        """Plot both prediction directions"""
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

        # Plot marks -> ranks
        ax1.scatter(marks, ranks, color='blue', label='Actual Data')
        X_smooth = np.linspace(min(marks), max(marks), 300)
        y_smooth = self.predict_rank(X_smooth)
        ax1.plot(X_smooth, y_smooth, color='red', label='Prediction')
        ax1.set_xlabel('Marks')
        ax1.set_ylabel('Rank')
        ax1.set_title('Marks → Rank Prediction')
        ax1.legend()
        ax1.grid(True)

        # Plot ranks -> marks
        ax2.scatter(ranks, marks, color='blue', label='Actual Data')
        X_smooth = np.linspace(min(ranks), max(ranks), 300)
        y_smooth = self.predict_marks(X_smooth)
        ax2.plot(X_smooth, y_smooth, color='red', label='Prediction')
        ax2.set_xlabel('Rank')
        ax2.set_ylabel('Marks')
        ax2.set_title('Rank → Marks Prediction')
        ax2.legend()
        ax2.grid(True)

        plt.tight_layout()
        plt.show()

In [4]:
class Marks_vs_Rank_Analyser:
    def __init__(self, year):
        self.year = year
        marks_vs_rank = pd.read_csv(f"./Data/{self.year}/{self.year}_Marks_vs_percentile.csv", index_col=[0], header=[0])
        marks_vs_rank.index.name = "Percentile"
        exam_dates = marks_vs_rank.columns
        marks_vs_rank["Ranks"] = (100 - marks_vs_rank.index)/100*no_of_candidates["2024"]
        marks_vs_rank["Max"] = marks_vs_rank[exam_dates].max(axis=1)
        marks_vs_rank["Min"] = marks_vs_rank[exam_dates].min(axis=1)
        marks_vs_rank["Avg"] = marks_vs_rank[exam_dates].mean(axis=1)
        marks_vs_rank["Median"] = marks_vs_rank[exam_dates].median(axis=1)

        # Minimum
        self.min_score_predictor = BidirectionalPredictor()
        self.min_score_predictor.train(marks_vs_rank["Min"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

        # Maximum
        self.max_score_predictor = BidirectionalPredictor()
        self.max_score_predictor.train(marks_vs_rank["Max"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

        # Average
        self.avg_score_predictor = BidirectionalPredictor()
        self.avg_score_predictor.train(marks_vs_rank["Avg"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

        # Median
        self.median_score_predictor = BidirectionalPredictor()
        self.median_score_predictor.train(marks_vs_rank["Median"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

    def predict_marks(self, ranks):
        """Predict marks given ranks"""
        return self.min_score_predictor.predict_marks(ranks), self.max_score_predictor.predict_marks(ranks), self.avg_score_predictor.predict_marks(ranks), self.median_score_predictor.predict_marks(ranks)

    def predict_ranks(self, marks):
        """Predict marks given ranks"""
        return self.min_score_predictor.predict_rank(marks), self.median_score_predictor.predict_rank(marks)

# Analysers

In [5]:
mvr_2024 = Marks_vs_Rank_Analyser(2024)
mvr_2023 = Marks_vs_Rank_Analyser(2023)

# Main

In [6]:
NITs_cutoff = pd.read_csv("./Data/2024/NITs_2024.csv")
Institutes = list(set(NITs_cutoff["Institute"]))

## Input

In [13]:
for i in range(len(Institutes)):
    print(f"{i+1}. {Institutes[i]}", flush=True)
NIT_name = Institutes[int(input("Enter the NIT number: ")) - 1]

Branches = list(set(NITs_cutoff[NITs_cutoff["Institute"] == NIT_name]["Academic Program Name"]))
for i in range(len(Branches)):
    print(f"{i+1}. {Branches[i]}", flush=True)
NIT_Branches = []
enter_branch_number = input("Enter the NIT branch (Type 'n' or 'no' to stop): ")
while i<len(Branches) and enter_branch_number.lower() not in ["n", "no"]:
    NIT_Branches.append(Branches[int(enter_branch_number) - 1])
    enter_branch_number = input("Enter the NIT branch: ")

gender = input("Enter the gender(F/M): ")
if gender == "F":
    gender = "Female-only (including Supernumerary)"
else:
    gender = "Gender-Neutral"

category = input(f"Enter the category({"/".join(set(NITs_cutoff["Seat Type"]))}): ")

quota = input("Enter your quota(HS/OS): ")

1. National Institute of Technology Delhi
2. National Institute of Technology Sikkim
3. Sardar Vallabhbhai National Institute of Technology, Surat
4. National Institute of Technology, Mizoram
5. National Institute of Technology, Tiruchirappalli
6. National Institute of Technology Hamirpur
7. National Institute of Technology, Manipur
8. National Institute of Technology Puducherry
9. National Institute of Technology Arunachal Pradesh
10. Dr. B R Ambedkar National Institute of Technology, Jalandhar
11. Motilal Nehru National Institute of Technology Allahabad
12. National Institute of Technology, Jamshedpur
13. Malaviya National Institute of Technology Jaipur
14. National Institute of Technology, Uttarakhand
15. National Institute of Technology, Srinagar
16. National Institute of Technology Durgapur
17. National Institute of Technology Goa
18. National Institute of Technology, Silchar
19. National Institute of Technology, Warangal
20. National Institute of Technology Agartala
21. Visvesvar

In [14]:
filtered_db = NITs_cutoff[(NITs_cutoff["Institute"] == NIT_name) & (NITs_cutoff["Academic Program Name"].isin(NIT_Branches)) & (NITs_cutoff["Gender"] == gender) & (NITs_cutoff["Seat Type"] == category) & (NITs_cutoff["Quota"] == quota)]

In [15]:
filtered_db.loc[:, "2024 Min Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])[0]
filtered_db.loc[:, "2024 Max Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])[1]
filtered_db.loc[:, "2024 Average Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])[2]
filtered_db.loc[:, "2024 Median Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])[3]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db.loc[:, "2024 Min Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db.loc[:, "2024 Max Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])[1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db.loc[:

In [16]:
filtered_db.loc[:, "2023 Min Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])[0]
filtered_db.loc[:, "2023 Max Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])[1]
filtered_db.loc[:, "2023 Average Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])[2]
filtered_db.loc[:, "2023 Median Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])[3]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db.loc[:, "2023 Min Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db.loc[:, "2023 Max Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])[1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db.loc[:

In [18]:
filtered_db[["Academic Program Name", "2024 Average Score", "2023 Average Score"]]

Unnamed: 0,Academic Program Name,2024 Average Score,2023 Average Score
2152,"Artificial Intelligence (4 Years, Bachelor of ...",236.0,221.0
2218,"Computational and Data Science (4 Years, Bache...",231.0,216.0
2235,"Computer Science and Engineering (4 Years, Bac...",243.0,228.0
2287,Electronics and Communication Engineering (4 Y...,227.0,211.0
2313,"Information Technology (4 Years, Bachelor of T...",233.0,218.0


In [25]:
df = pd.DataFrame(filtered_db["2024 Median Score"] - filtered_db["2023 Median Score"])
df.mean()

0    17.4
dtype: float64

In [26]:
filtered_db["2024 Median Score"]+17.4

2152    255.4
2218    251.4
2235    262.4
2287    246.4
2313    252.4
Name: 2024 Median Score, dtype: float64

In [27]:
"/".join(set(NITs_cutoff["Seat Type"]))

'OPEN (PwD)/OBC-NCL (PwD)/OPEN/SC/SC (PwD)/ST/ST (PwD)/EWS/EWS (PwD)/OBC-NCL'