In [29]:
# Import necessary libraries
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder

In [30]:
# Load the saved models
college_model = CatBoostClassifier()
college_model.load_model("../Saved_Models/college_model.cbm")

branch_model = CatBoostClassifier()
branch_model.load_model("../Saved_Models/branch_model.cbm")

<catboost.core.CatBoostClassifier at 0x25511c8d6d0>

In [31]:
# Define the test data in the format provided
form_data = [{"age":"20","hscMarks":"88.64","jeeMainsMarks":"93.26","mhtcetMarks":"90.85","socioeconomicBackground":"Open","sscMarks":"85.65"}]

In [32]:
# Convert list of dictionaries to DataFrame
form_data_df = pd.DataFrame(form_data)

In [33]:
# Encode categorical variables (Socioeconomic_Background)
le = LabelEncoder()
form_data_df['Socioeconomic_Background'] = le.fit_transform(form_data_df['socioeconomicBackground'])

In [34]:
# Drop the original column after encoding
form_data_df.drop(columns=['socioeconomicBackground'], inplace=True)

In [35]:
form_data_df

Unnamed: 0,age,hscMarks,jeeMainsMarks,mhtcetMarks,sscMarks,Socioeconomic_Background
0,20,88.64,93.26,90.85,85.65,0


In [36]:
# Function to preprocess the data
def preprocess_data(data):
    # Convert string values to float where necessary
    for entry in data:
        entry['age'] = float(entry['age'])
        entry['hscMarks'] = float(entry['hscMarks'])
        entry['jeeMainsMarks'] = float(entry['jeeMainsMarks'])
        entry['mhtcetMarks'] = float(entry['mhtcetMarks'])
        entry['sscMarks'] = float(entry['sscMarks'])
    return data

In [37]:
# Preprocess the test data
processed_data = preprocess_data(form_data)

In [38]:
processed_data

[{'age': 20.0,
  'hscMarks': 88.64,
  'jeeMainsMarks': 93.26,
  'mhtcetMarks': 90.85,
  'socioeconomicBackground': 'Open',
  'sscMarks': 85.65}]

In [39]:
# Convert the processed data into a DataFrame
data_point = pd.DataFrame({
    'Age': [float(processed_data[0]["age"])],
    'Socioeconomic_Background': [form_data_df['Socioeconomic_Background'][0]],
    'SSC_Marks_Percentage': [float(processed_data[0]["sscMarks"])],
    'HSC_Marks_Percentage': [float(processed_data[0]["hscMarks"])],
    'MHTCET_Scores_Percentile': [float(processed_data[0]["mhtcetMarks"])],
    'JEE_Mains_Scores_Percentile': [float(processed_data[0]["jeeMainsMarks"])]
})

In [40]:
# # Function to make predictions using the models
# def make_predictions(data):
#     college_predictions = college_model.predict(data)
#     branch_predictions = branch_model.predict(data)
#     return college_predictions, branch_predictions

# # Make predictions
# college_preds, branch_preds = make_predictions(data_point)

In [41]:
# new
# Function to make predictions using the models
def make_predictions(data, top_n=3):
    # Get probabilities for each class
    college_probs = college_model.predict_proba(data)
    branch_probs = branch_model.predict_proba(data)
    
    # Get the indices of the top N predictions
    top_n_college_indices = college_probs.argsort()[:, ::-1][:, :top_n]
    top_n_branch_indices = branch_probs.argsort()[:, ::-1][:, :top_n]
    
    # Get the top N predictions and their corresponding probabilities
    top_n_college_preds = college_model.classes_[top_n_college_indices]
    top_n_college_probs = np.array([college_probs[i, indices] for i, indices in enumerate(top_n_college_indices)])
    
    top_n_branch_preds = branch_model.classes_[top_n_branch_indices]
    top_n_branch_probs = np.array([branch_probs[i, indices] for i, indices in enumerate(top_n_branch_indices)])
    
    return top_n_college_preds, top_n_college_probs, top_n_branch_preds, top_n_branch_probs

# Make predictions
top_n_college_preds, top_n_college_probs, top_n_branch_preds, top_n_branch_probs = make_predictions(data_point)

In [42]:
# Print the predictions
print("Top 3 College Predictions:")
for i in range(len(top_n_college_preds[0])):
    print("Prediction:", top_n_college_preds[0][i], "| Probability:", top_n_college_probs[0][i])

print("\nTop 3 Branch Predictions:")
for i in range(len(top_n_branch_preds[0])):
    print("Prediction:", top_n_branch_preds[0][i], "| Probability:", top_n_branch_probs[0][i])

Top 3 College Predictions:
Prediction: XIE | Probability: 0.9699715296609469
Prediction: RCE | Probability: 0.02869544765464859
Prediction: TCET | Probability: 0.0007027097552885604

Top 3 Branch Predictions:
Prediction: Electronics and Telecommunication Engineering | Probability: 0.9595956800756615
Prediction: Artificial Intelligence and Data Science | Probability: 0.015133193220389548
Prediction: Electronics and Computer Science | Probability: 0.013724508547618624


In [43]:
# Print the predictions
print("College Predictions:", college_preds)
print("Branch Predictions:", branch_preds)

College Predictions: [['XIE']]
Branch Predictions: [['Electronics and Telecommunication Engineering']]
