In [1]:
# evaluate_risk_model_on_train.py

import pandas as pd
import sys
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Add path to model handler
sys.path.append(os.path.abspath("../../model"))
from model_handler import RiskModelHandler 

# Load full training data (same as used in Cell 1)
df = pd.read_csv("../../dataset/train_dataset.csv")

# Create true labels (y)
sem3_subjects = ["Math-3 Theory", "DE Theory", "FSD Theory", "Python Theory"]
df["Sem 3 Percentage"] = df[sem3_subjects].sum(axis=1) / 4
df["Sem 2 Percentage"] = df[[
    "Math-2 Theory", "Data Structures using Java Theory", "DBMS Theory",
    "Fundamental of Electronics and Electrical Theory", "Java-2 Theory"
]].mean(axis=1)
df["Sem 2 Percentile"] = df["Sem 2 Percentage"].rank(pct=True) * 100
df["Sem 3 Percentile"] = df["Sem 3 Percentage"].rank(pct=True) * 100
df["Percentile Drop"] = df["Sem 2 Percentile"] - df["Sem 3 Percentile"]
df["Risk Flag"] = df["Percentile Drop"] > 10

# True labels
y_true = df["Risk Flag"].astype(int)

columns_to_drop = [
    "Math-3 Theory",
    "DE Theory",
    "FSD Theory",
    "Python Theory",
    "Sem 3 Percentage", "Sem 2 Percentage",
    "Sem 3 Percentile", "Sem 2 Percentile",
    "Percentile Drop",
    "Risk Flag",'DE Practical', 'FSD Practical', 'Python Practical', 'Communication Theory', 'Law Theory'
]

# Drop the specified columns from the DataFrame
# 'axis=1' indicates that we are dropping columns, not rows
# 'inplace=True' modifies the DataFrame directly
df.drop(columns=columns_to_drop, inplace=True)
# Print the remaining columns in the DataFrame
# print("Remaining columns in DataFrame after drop:")
# print(df.columns.tolist())

# Predict using model handler
handler = RiskModelHandler(model_path="../model/risk_model.joblib")
y_pred = handler.predict(df)

# Metrics
acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred)
rec = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Output
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")


Accuracy:  1.0000
Precision: 1.0000
Recall:    1.0000
F1 Score:  1.0000


In [2]:
pd.set_option('display.max_columns', None)
print(df.head())

  Gender Religion Branch Section-1 Section-2 Section-3  Roll-1  Math-1 Theory  \
0      M    Hindu     CE         D         D         A     350             47   
1      F    Hindu    CST         B         B         D      18             84   
2      F    Hindu   AIML         A         A         C      23             74   
3      M    Hindu    CST         B         B         D     212             55   
4      M    Hindu    CST         B         B         D     208             38   

   Physics Theory  Physics Practical  Java-1 Theory  Java-1 Practical  \
0              48                 75             50                76   
1              83                 81             61                95   
2              85                 86             64                88   
3              69                 82             56                82   
4              59                 74             36                77   

   Software Engineering Theory  Software Engineering Practical  \
0       

In [3]:
# evaluate_risk_model_on_train.py

import pandas as pd
import sys
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Add path to model handler
sys.path.append(os.path.abspath("../../model"))
from model_handler import RiskModelHandler 

# Load full training data (same as used in Cell 1)
df = pd.read_csv("../../dataset/test_dataset.csv")

# Create true labels (y)
sem3_subjects = ["Math-3 Theory", "DE Theory", "FSD Theory", "Python Theory"]
df["Sem 3 Percentage"] = df[sem3_subjects].sum(axis=1) / 4
df["Sem 2 Percentage"] = df[[
    "Math-2 Theory", "Data Structures using Java Theory", "DBMS Theory",
    "Fundamental of Electronics and Electrical Theory", "Java-2 Theory"
]].mean(axis=1)
df["Sem 2 Percentile"] = df["Sem 2 Percentage"].rank(pct=True) * 100
df["Sem 3 Percentile"] = df["Sem 3 Percentage"].rank(pct=True) * 100
df["Percentile Drop"] = df["Sem 2 Percentile"] - df["Sem 3 Percentile"]
df["Risk Flag"] = df["Percentile Drop"] > 10

# True labels
y_true = df["Risk Flag"].astype(int)

columns_to_drop = [
    "Math-3 Theory",
    "DE Theory",
    "FSD Theory",
    "Python Theory",
    "Sem 3 Percentage", "Sem 2 Percentage",
    "Sem 3 Percentile", "Sem 2 Percentile",
    "Percentile Drop",
    "Risk Flag",'DE Practical', 'FSD Practical', 'Python Practical', 'Communication Theory', 'Law Theory'
]

# Drop the specified columns from the DataFrame
# 'axis=1' indicates that we are dropping columns, not rows
# 'inplace=True' modifies the DataFrame directly
df.drop(columns=columns_to_drop, inplace=True)
# Print the remaining columns in the DataFrame
# print("Remaining columns in DataFrame after drop:")
# print(df.columns.tolist())

# Predict using model handler
handler = RiskModelHandler(model_path="../model/risk_model.joblib")
y_pred = handler.predict(df)

# Metrics
acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred)
rec = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Output
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")


Accuracy:  0.7238
Precision: 0.2857
Recall:    0.2857
F1 Score:  0.2857


In [None]:
import nbformat
from nbformat.v4 import new_code_cell

def modify_ipynb_drop_columns(notebook_path):
    """
    Modifies a Jupyter Notebook file to change the 'drop' columns for X.

    Args:
        notebook_path (str): The path to the .ipynb file.
    """
    try:
        with open(notebook_path, 'r', encoding='utf-8') as f:
            notebook = nbformat.read(f, as_version=4)

        found_and_replaced = False
        old_line = "X = current_df.drop(columns=['Risk Flag'])"
        new_line = "X = current_df.drop(columns=['Risk Flag'])"

        for cell in notebook.cells:
            if cell.cell_type == 'code':
                lines = cell.source.splitlines()
                modified_lines = []
                cell_modified = False
                for line in lines:
                    if old_line in line:
                        modified_lines.append(line.replace(old_line, new_line))
                        cell_modified = True
                        found_and_replaced = True
                        print(f"Replaced in cell:\nOriginal: {old_line}\nNew: {new_line}")
                    else:
                        modified_lines.append(line)                if cell_modified:
                    cell.source = "\n".join(modified_lines)

        if found_and_replaced:
            with open(notebook_path, 'w', encoding='utf-8') as f:
                nbformat.write(notebook, f)
            print(f"Successfully modified '{notebook_path}'.")
        else:
            print(f"The line to replace ('{old_line}') was not found in '{notebook_path}'.")

    except FileNotFoundError:
        print(f"Error: Notebook file not found at '{notebook_path}'")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    notebook_file = "model_selection.ipynb"
    modify_ipynb_drop_columns(notebook_file)

The line to replace ('X = current_df.drop(columns=['Risk Flag', 'Predicted Risk Flag'])') was not found in 'model_selection.ipynb'.
