# Imports

In [8]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score

# Number of Candidate

In [9]:
no_of_candidates = json.load(open("./Data/no_of_candidates.json"))
no_of_candidates

{'2024': 1415110, '2023': 1113325}

# Marks Prediction

## 2024

In [10]:
marks_vs_rank_2024 = pd.read_csv("./Data/2024/2024_Marks_vs_percentile.csv", index_col=[0], header=[0])
marks_vs_rank_2024.index.name = "Percentile"
exam_dates = marks_vs_rank_2024.columns
marks_vs_rank_2024["Ranks"] = (100 - marks_vs_rank_2024.index)/100*no_of_candidates["2024"]
marks_vs_rank_2024["Max"] = marks_vs_rank_2024[exam_dates].max(axis=1)
marks_vs_rank_2024["Min"] = marks_vs_rank_2024[exam_dates].min(axis=1)
marks_vs_rank_2024["Avg"] = marks_vs_rank_2024[exam_dates].mean(axis=1)
marks_vs_rank_2024["Median"] = marks_vs_rank_2024[exam_dates].median(axis=1)
# plt.scatter(marks_vs_rank_2024["Median"], marks_vs_rank_2024["Ranks"])
marks_vs_rank_2024.head(30)

Unnamed: 0_level_0,27JanS1,27JanS2,29JanS1,29JanS2,30JanS1,30JanS2,31JanS1,31JanS2,01JanS1,01JanS2,...,6AprS2,8AprS1,8AprS2,9AprS1,9AprS2,Ranks,Max,Min,Avg,Median
Percentile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
99.9,278.0,264.0,264.0,268.0,257.0,255.0,233.0,215.0,224.0,252.0,...,256,265,271,250,258,1415.11,278.0,215.0,254.7,257.5
99.8,270.0,255.0,249.0,254.0,242.0,242.0,218.0,197.0,203.0,233.0,...,247,246,257,234,241,2830.22,270.0,197.0,240.2,243.5
99.7,265.0,246.0,240.0,246.0,231.0,231.0,206.0,184.0,195.0,217.0,...,232,235,248,222,232,4245.33,265.0,184.0,230.1,232.5
99.6,259.0,238.0,232.0,239.0,223.0,221.0,198.0,178.0,185.0,207.0,...,225,226,240,214,222,5660.44,259.0,178.0,222.1,224.0
99.5,254.0,233.0,225.0,232.0,216.0,214.0,191.0,171.0,180.0,200.0,...,219,220,232,207,215,7075.55,254.0,171.0,215.65,217.5
99.4,250.0,227.0,219.0,227.0,210.0,208.0,186.0,165.0,175.0,195.0,...,213,213,227,202,210,8490.66,250.0,165.0,210.4,211.5
99.3,245.0,223.0,215.0,223.0,204.0,202.0,181.0,162.0,173.0,189.0,...,210,209,221,198,205,9905.77,245.0,162.0,206.0,207.0
99.2,242.0,217.0,210.0,220.0,200.0,198.0,177.0,158.0,167.0,181.0,...,206,205,216,193,201,11320.88,242.0,158.0,201.65,204.0
99.1,239.0,214.0,206.0,215.0,196.0,194.0,173.0,154.0,164.0,176.0,...,202,200,212,190,198,12735.99,239.0,154.0,197.65,199.0
99.0,236.0,211.0,203.0,212.0,193.0,191.0,170.0,151.0,161.0,172.0,...,200,198,208,186,194,14151.1,236.0,151.0,194.5,196.0


### Regression of Minimum score

In [11]:
min_score_predictor = BidirectionalPredictor()
min_score_predictor.train(marks_vs_rank_2024["Min"].to_numpy(), marks_vs_rank_2024["Ranks"].to_numpy())
# min_score_predictor.predict_rank(180)

### Regression of Maximum Score

In [12]:
max_score_predictor = BidirectionalPredictor()
max_score_predictor.train(marks_vs_rank_2024["Max"].to_numpy(), marks_vs_rank_2024["Ranks"].to_numpy())

### Regression of Average Score

In [13]:
avg_score_predictor = BidirectionalPredictor()
avg_score_predictor.train(marks_vs_rank_2024["Avg"].to_numpy(), marks_vs_rank_2024["Ranks"].to_numpy())

### Regression of Median Score

In [14]:
median_score_predictor = BidirectionalPredictor()
median_score_predictor.train(marks_vs_rank_2024["Median"].to_numpy(), marks_vs_rank_2024["Ranks"].to_numpy())

## General Case

In [15]:
class BidirectionalPredictor:
    def __init__(self, degree=10):
        """
        Initialize predictors for both directions
        """
        self.degree = degree
        # Predictor for marks -> ranks
        self.marks_to_ranks_model = self._create_model()
        # Predictor for ranks -> marks
        self.ranks_to_marks_model = self._create_model()

    def _create_model(self):
        """Helper function to create a model with scaler"""
        return {
            'poly_features': PolynomialFeatures(degree=self.degree),
            'model': LinearRegression(),
            'scaler': MinMaxScaler()
        }

    def train(self, marks, ranks):
        """Train both directional models"""
        # Train marks -> ranks model
        self._train_single_direction(
            self.marks_to_ranks_model,
            np.array(marks).reshape(-1, 1),
            np.array(ranks).reshape(-1, 1)
        )

        # Train ranks -> marks model
        self._train_single_direction(
            self.ranks_to_marks_model,
            np.array(ranks).reshape(-1, 1),
            np.array(marks).reshape(-1, 1)
        )

    def _train_single_direction(self, model_dict, X, y):
        """Helper function to train a single direction"""
        X_scaled = model_dict['scaler'].fit_transform(X)
        X_poly = model_dict['poly_features'].fit_transform(X_scaled)
        model_dict['model'].fit(X_poly, y)

    def predict_rank(self, marks):
        """Predict ranks given marks"""
        return self._predict_single_direction(
            self.marks_to_ranks_model,
            np.array(marks).reshape(-1, 1)
        )

    def predict_marks(self, ranks):
        """Predict marks given ranks"""
        return self._predict_single_direction(
            self.ranks_to_marks_model,
            np.array(ranks).reshape(-1, 1)
        )

    def _predict_single_direction(self, model_dict, X):
        """Helper function for prediction"""
        X_scaled = model_dict['scaler'].transform(X)
        X_poly = model_dict['poly_features'].transform(X_scaled)
        predictions = model_dict['model'].predict(X_poly)
        return np.round(predictions.flatten())

    def plot_both_directions(self, marks, ranks):
        """Plot both prediction directions"""
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

        # Plot marks -> ranks
        ax1.scatter(marks, ranks, color='blue', label='Actual Data')
        X_smooth = np.linspace(min(marks), max(marks), 300)
        y_smooth = self.predict_rank(X_smooth)
        ax1.plot(X_smooth, y_smooth, color='red', label='Prediction')
        ax1.set_xlabel('Marks')
        ax1.set_ylabel('Rank')
        ax1.set_title('Marks → Rank Prediction')
        ax1.legend()
        ax1.grid(True)

        # Plot ranks -> marks
        ax2.scatter(ranks, marks, color='blue', label='Actual Data')
        X_smooth = np.linspace(min(ranks), max(ranks), 300)
        y_smooth = self.predict_marks(X_smooth)
        ax2.plot(X_smooth, y_smooth, color='red', label='Prediction')
        ax2.set_xlabel('Rank')
        ax2.set_ylabel('Marks')
        ax2.set_title('Rank → Marks Prediction')
        ax2.legend()
        ax2.grid(True)

        plt.tight_layout()
        plt.show()

In [16]:
class Marks_vs_Rank_Analyser:
    def __init__(self, year):
        self.year = year
        marks_vs_rank = pd.read_csv(f"./Data/{self.year}/{self.year}_Marks_vs_percentile.csv", index_col=[0], header=[0])
        marks_vs_rank.index.name = "Percentile"
        exam_dates = marks_vs_rank.columns
        marks_vs_rank["Ranks"] = (100 - marks_vs_rank.index)/100*no_of_candidates["2024"]
        marks_vs_rank["Max"] = marks_vs_rank[exam_dates].max(axis=1)
        marks_vs_rank["Min"] = marks_vs_rank[exam_dates].min(axis=1)
        marks_vs_rank["Avg"] = marks_vs_rank[exam_dates].mean(axis=1)
        marks_vs_rank["Median"] = marks_vs_rank[exam_dates].median(axis=1)

        # Minimum
        self.min_score_predictor = BidirectionalPredictor()
        self.min_score_predictor.train(marks_vs_rank["Min"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

        # Maximum
        self.max_score_predictor = BidirectionalPredictor()
        self.max_score_predictor.train(marks_vs_rank["Max"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

        # Average
        self.avg_score_predictor = BidirectionalPredictor()
        self.avg_score_predictor.train(marks_vs_rank["Avg"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

        # Median
        self.median_score_predictor = BidirectionalPredictor()
        self.median_score_predictor.train(marks_vs_rank["Median"].to_numpy(), marks_vs_rank["Ranks"].to_numpy())

    def predict_marks(self, ranks):
        """Predict marks given ranks"""
        return self.min_score_predictor.predict_marks(ranks), self.max_score_predictor.predict_marks(ranks), self.avg_score_predictor.predict_marks(ranks), self.median_score_predictor.predict_marks(ranks)

    def predict_ranks(self, marks):
        """Predict marks given ranks"""
        return self.min_score_predictor.predict_rank(marks), self.median_score_predictor.predict_rank(marks)


In [17]:
mvr_2024 = Marks_vs_Rank_Analyser(2024)
mvr_2024.predict_marks([2836])

(array([198.]), array([271.]), array([241.]), array([244.]))

In [18]:
mvr_2023 = Marks_vs_Rank_Analyser(2023)
mvr_2023.predict_marks(2836)

(array([206.]), array([246.]), array([226.]), array([227.]))

# Marks Prediction

In [19]:
NITs_cutoff = pd.read_csv("./Data/2024/NITs_2024.csv")
Institutes = list(set(NITs_cutoff["Institute"]))


In [21]:
for i in range(len(Institutes)):
    print(f"{i+1}. {Institutes[i]}", flush=True)
NIT_name = Institutes[int(input("Enter the NIT number: ")) - 1]

Branches = list(set(NITs_cutoff[NITs_cutoff["Institute"] == NIT_name]["Academic Program Name"]))
for i in range(len(Branches)):
    print(f"{i+1}. {Branches[i]}", flush=True)
NIT_Branches = []
enter_branch_number = input("Enter the NIT branch (Type 'n' or 'no' to stop): ")
while i<len(Branches) and enter_branch_number.lower() not in ["n", "no"]:
    NIT_Branches.append(Branches[int(enter_branch_number) - 1])
    enter_branch_number = input("Enter the NIT branch: ")

gender = input("Enter the gender(F/M): ")
if gender == "F":
    gender = "Female-only (including Supernumerary)"
else:
    gender = "Gender-Neutral"

category = input(f"Enter the category({"/".join(set(NITs_cutoff["Seat Type"]))}): ")

quota = input("Enter your quota(HS/OS): ")

filtered_db = NITs_cutoff[(NITs_cutoff["Institute"] == NIT_name) & (NITs_cutoff["Academic Program Name"].isin(NIT_Branches)) & (NITs_cutoff["Gender"] == gender) & (NITs_cutoff["Seat Type"] == category) & (NITs_cutoff["Quota"] == quota)]

1. National Institute of Technology, Mizoram
2. National Institute of Technology, Tiruchirappalli
3. National Institute of Technology Nagaland
4. Maulana Azad National Institute of Technology Bhopal
5. National Institute of Technology, Srinagar
6. National Institute of Technology Puducherry
7. National Institute of Technology Patna
8. National Institute of Technology, Andhra Pradesh
9. National Institute of Technology Meghalaya
10. Visvesvaraya National Institute of Technology, Nagpur
11. National Institute of Technology Delhi
12. National Institute of Technology Karnataka, Surathkal
13. National Institute of Technology, Uttarakhand
14. Indian Institute of Engineering Science and Technology, Shibpur
15. National Institute of Technology Raipur
16. National Institute of Technology Agartala
17. National Institute of Technology Sikkim
18. Motilal Nehru National Institute of Technology Allahabad
19. National Institute of Technology Arunachal Pradesh
20. National Institute of Technology, Jam

In [336]:
filtered_db

Unnamed: 0,Institute,Academic Program Name,Quota,Seat Type,Gender,Opening Rank,Closing Rank
2152,"National Institute of Technology Karnataka, Su...","Artificial Intelligence (4 Years, Bachelor of ...",HS,OPEN,Gender-Neutral,2836,3505
2218,"National Institute of Technology Karnataka, Su...","Computational and Data Science (4 Years, Bache...",HS,OPEN,Gender-Neutral,3437,4148
2235,"National Institute of Technology Karnataka, Su...","Computer Science and Engineering (4 Years, Bac...",HS,OPEN,Gender-Neutral,1084,2628
2287,"National Institute of Technology Karnataka, Su...",Electronics and Communication Engineering (4 Y...,HS,OPEN,Gender-Neutral,2763,4872
2313,"National Institute of Technology Karnataka, Su...","Information Technology (4 Years, Bachelor of T...",HS,OPEN,Gender-Neutral,2911,3877


In [22]:
filtered_db["2024 Min Score"], filtered_db["2024 Max Score"], filtered_db["2024 Average Score"], filtered_db["2024 Median Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db["2024 Min Score"], filtered_db["2024 Max Score"], filtered_db["2024 Average Score"], filtered_db["2024 Median Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db["2024 Min Score"], filtered_db["2024 Max Score"], filtered_db["2024 Average Score"], filtered_db["2024 Median Score"] = mvr_2024.predict_marks(filtered_db["Closing Rank"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_inde

In [23]:
filtered_db["2023 Min Score"], filtered_db["2023 Max Score"], filtered_db["2023 Average Score"], filtered_db["2023 Median Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db["2023 Min Score"], filtered_db["2023 Max Score"], filtered_db["2023 Average Score"], filtered_db["2023 Median Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_db["2023 Min Score"], filtered_db["2023 Max Score"], filtered_db["2023 Average Score"], filtered_db["2023 Median Score"] = mvr_2023.predict_marks(filtered_db["Closing Rank"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_inde

In [24]:
filtered_db[["Academic Program Name", "2024 Min Score","2024 Max Score", "2024 Average Score", "2024 Median Score", "2023 Min Score", "2023 Max Score", "2023 Average Score", "2023 Median Score"]]

Unnamed: 0,Academic Program Name,2024 Min Score,2024 Max Score,2024 Average Score,2024 Median Score,2023 Min Score,2023 Max Score,2023 Average Score,2023 Median Score
1967,"Computer Science and Engineering (4 Years, Bac...",129.0,213.0,172.0,175.0,135.0,176.0,154.0,152.0
1993,"Computer Science and Engineering (5 Years, Bac...",126.0,208.0,167.0,171.0,131.0,171.0,149.0,147.0
2109,"Mathematics and Computing (4 Years, Bachelor o...",123.0,202.0,162.0,166.0,126.0,166.0,144.0,142.0
