In [None]:
import pandas as pd
import numpy as np

# Load datasets
file_paths = {
    "cleaned_quiz_results": "/content/cleaned_quiz_results.csv",
    "processed_quiz_data": "/content/processed_quiz_data (1).csv",
    "quiz_newdata": "/content/quiz_newdata.csv"
}

dataframes = {name: pd.read_csv(path) for name, path in file_paths.items()}

# Step 1: Data Exploration
print("Cleaned Quiz Results:")
print(dataframes["cleaned_quiz_results"].head())
print("\nProcessed Quiz Data:")
print(dataframes["processed_quiz_data"].head())
print("\nQuiz New Data:")
print(dataframes["quiz_newdata"].head())

Cleaned Quiz Results:
       id  quiz_id                       user_id  \
0  336497       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
1  336448        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
2  333330       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
3  333242        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
4  329504       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   

                    submitted_at                     created_at  \
0  2025-01-17T15:30:18.027+05:30  2025-01-17T15:30:18.044+05:30   
1  2025-01-17T15:17:44.042+05:30  2025-01-17T15:17:44.056+05:30   
2  2025-01-16T20:13:19.682+05:30  2025-01-16T20:13:19.699+05:30   
3  2025-01-16T20:00:11.562+05:30  2025-01-16T20:00:11.573+05:30   
4  2025-01-15T20:34:39.462+05:30  2025-01-15T20:34:39.478+05:30   

                      updated_at  score  trophy_level  accuracy  speed  ...  \
0  2025-01-17T15:30:18.044+05:30    108             2      0.90    100  ...   
1  2025-01-17T15:17:44.056+05:30     92             1      1.00    100  ...   
2  2025-01-16T20:13:19.69

In [None]:
# Step 2: Performance Analysis
performance_metrics = dataframes["cleaned_quiz_results"][
    ["user_id", "quiz_id", "score", "accuracy", "mistakes_corrected", "initial_mistake_count", "better_than"]
]

# Convert accuracy to numeric if stored as a percentage string
if performance_metrics["accuracy"].dtype == object:
    performance_metrics["accuracy"] = performance_metrics["accuracy"].str.replace("%", "").astype(float) / 100

In [None]:
# Summarize user performance
user_performance_summary = performance_metrics.groupby("user_id").agg({
    "score": ["mean", "max", "min"],
    "accuracy": ["mean", "max", "min"],
    "mistakes_corrected": ["mean", "max"],
    "initial_mistake_count": ["mean", "max"],
    "better_than": ["mean", "max"]
}).reset_index()

NameError: name 'performance_metrics' is not defined

In [None]:
# Step 3: Identifying Weak Areas
weak_performance_threshold = 0.7
high_mistake_threshold = 5

performance_trends = dataframes["cleaned_quiz_results"].groupby("user_id").agg({
    "accuracy": ["mean"],
    "initial_mistake_count": ["mean"],
    "mistakes_corrected": ["mean"],
    "better_than": ["mean"]
}).reset_index()

performance_trends.columns = ["user_id", "avg_accuracy", "avg_initial_mistakes", "avg_corrected_mistakes", "avg_better_than"]
performance_trends["weak_performance"] = (performance_trends["avg_accuracy"] < weak_performance_threshold) & (performance_trends["avg_initial_mistakes"] > high_mistake_threshold)


NameError: name 'dataframes' is not defined

In [None]:
# Step 4: Generating Personalized Recommendations
def generate_recommendations(user_row):
    recommendations = []
    if user_row["avg_accuracy"] < weak_performance_threshold:
        recommendations.append("Focus on accuracy improvement by reviewing past mistakes.")
    if user_row["avg_initial_mistakes"] > high_mistake_threshold:
        recommendations.append("Practice more on frequently missed question types.")
    if user_row["avg_corrected_mistakes"] < 3:
        recommendations.append("Actively review and correct mistakes during practice.")
    return recommendations

performance_trends["recommendations"] = performance_trends.apply(generate_recommendations, axis=1)

In [None]:
# Aggregating user performance
performance_trends = dataframes["cleaned_quiz_results"].groupby("user_id").agg({
    "score": "mean",  # Ensure "score" is included
    "accuracy": "mean",
    "initial_mistake_count": "mean",
    "mistakes_corrected": "mean",
    "better_than": "mean"
}).reset_index()

# Renaming columns for easier access
performance_trends.rename(columns={
    "score": "avg_score",
    "accuracy": "avg_accuracy",
    "initial_mistake_count": "avg_initial_mistakes",
    "mistakes_corrected": "avg_corrected_mistakes",
    "better_than": "avg_better_than"
}, inplace=True)

In [None]:
# Define a function to generate personalized recommendations
def generate_recommendations(user_row):
    recommendations = []
    if user_row["avg_accuracy"] < 0.7:
        recommendations.append("Focus on improving accuracy through revision and practice.")
    if user_row["avg_initial_mistakes"] > 5:
        recommendations.append("Review commonly made mistakes and practice weak areas.")
    if user_row["avg_corrected_mistakes"] < 3:
        recommendations.append("Make use of detailed explanations to understand mistakes.")
    if not recommendations:
        recommendations.append("Great work! Maintain consistency and practice higher-level questions.")
    return ", ".join(recommendations)  # Convert list to string

# Apply the function to generate recommendations
performance_trends["recommendations"] = performance_trends.apply(generate_recommendations, axis=1)

In [None]:
# Step 5: (Optional) Predicting NEET Rank - Basic Probabilistic Model
def predict_neet_rank(accuracy, score):
    base_rank = 50000  # Approximate base rank
    score_factor = (1 - (score / 200)) * 10000  # Score impact
    accuracy_factor = (1 - accuracy) * 10000  # Accuracy impact
    predicted_rank = base_rank - score_factor - accuracy_factor
    return max(1, int(predicted_rank))

performance_trends["predicted_neet_rank"] = performance_trends.apply(
    lambda row: predict_neet_rank(row["avg_accuracy"], row["avg_score"]), axis=1
)

# Display final recommendations
print(performance_trends[["user_id", "recommendations", "predicted_neet_rank"]])

                        user_id  \
0  YcDFSO4ZukTJnnFMgRNVwZTE4j42   

                                     recommendations  predicted_neet_rank  
0  Review commonly made mistakes and practice wea...                40235  


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Sample dataset for testing (replace with actual NEET data if available)
actual_neet_data = pd.DataFrame({
    "user_id": ["student_1", "student_2", "student_3"],
    "actual_score": [180, 120, 60],  # Actual scores from NEET exam
    "actual_rank": [500, 15000, 45000]  # Actual ranks
})

# Predict NEET rank using our model
actual_neet_data["predicted_neet_rank"] = actual_neet_data.apply(
    lambda row: predict_neet_rank(row["actual_score"] / 200, row["actual_score"]), axis=1
)

# Calculate Error Metrics
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")


Mean Absolute Error (MAE): 27833.333333333332
Mean Squared Error (MSE): 1022083333.3333334


In [None]:
import numpy as np

# Simulated NEET Score-to-Rank Mapping (Approximate Percentile Distribution)
def estimate_neet_rank(score):
    # Simulated percentile-based rank distribution (hypothetical, based on real trends)
    score_rank_mapping = {
        200: 1,  # Top scorer
        180: 500,
        160: 5000,
        140: 15000,
        120: 30000,
        100: 50000,
        80: 70000,
        60: 85000,
        40: 95000,
        20: 99000,
        0: 100000  # Lowest rank
    }

    # Interpolating ranks for intermediate scores
    scores = np.array(list(score_rank_mapping.keys()))
    ranks = np.array(list(score_rank_mapping.values()))

    # Predict rank using interpolation
    predicted_rank = np.interp(score, scores, ranks)
    return int(predicted_rank)

# Apply Improved NEET Rank Prediction
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(estimate_neet_rank)

# Recalculate Error Metrics
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"Improved Mean Absolute Error (MAE): {mae}")
print(f"Improved Mean Squared Error (MSE): {mse}")


Improved Mean Absolute Error (MAE): 79833.33333333333
Improved Mean Squared Error (MSE): 6716750000.0


In [None]:
import numpy as np

def predict_neet_rank_fixed(score):
    """
    Predict NEET rank using a logarithmic scaling function to mimic real-world rank distribution.
    """
    A = 100000  # Max possible rank
    B = 0.05    # Scaling factor (adjust based on real data)

    predicted_rank = A * np.exp(-B * score)  # Logarithmic decay formula
    return int(predicted_rank)

# Apply the improved rank prediction model
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(predict_neet_rank_fixed)

# Recalculate Error Metrics
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"Final Improved Mean Absolute Error (MAE): {mae}")
print(f"Final Improved Mean Squared Error (MSE): {mse}")


Final Improved Mean Absolute Error (MAE): 18421.0
Final Improved Mean Squared Error (MSE): 606549879.0


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Sample NEET score-to-rank mapping (Replace with real NEET data if available)
neet_scores = np.array([200, 180, 160, 140, 120, 100, 80, 60, 40, 20, 0])  # Scores
neet_ranks = np.array([1, 500, 5000, 15000, 30000, 50000, 70000, 85000, 95000, 99000, 100000])  # Ranks

# Fit a Polynomial Regression Model (degree = 2 for quadratic fit)
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(neet_scores.reshape(-1, 1))

model = LinearRegression()
model.fit(X_poly, neet_ranks)

# Function to predict NEET rank using trained polynomial model
def predict_neet_rank_poly(score):
    score_array = np.array([[score]])  # Convert to array
    score_poly = poly.transform(score_array)  # Transform using polynomial features
    predicted_rank = model.predict(score_poly)[0]  # Predict rank
    return max(1, int(predicted_rank))  # Ensure valid rank

# Apply model to test data
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(predict_neet_rank_poly)

# Evaluate the model
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"Polynomial Regression MAE: {mae}")
print(f"Polynomial Regression MSE: {mse}")


Polynomial Regression MAE: 17579.666666666668
Polynomial Regression MSE: 461219310.3333333


In [None]:
# Increase Polynomial Degree to 3 or 4
poly = PolynomialFeatures(degree=3)  # Try degree=3 or 4
X_poly = poly.fit_transform(neet_scores.reshape(-1, 1))

# Train Polynomial Regression Model
model = LinearRegression()
model.fit(X_poly, neet_ranks)

# Define Prediction Function with Higher Degree Polynomial
def predict_neet_rank_poly(score):
    score_array = np.array([[score]])  # Convert to array
    score_poly = poly.transform(score_array)  # Transform using polynomial features
    predicted_rank = model.predict(score_poly)[0]  # Predict rank
    return max(1, int(predicted_rank))  # Ensure valid rank

# Apply Model to Test Data
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(predict_neet_rank_poly)

# Evaluate Model
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"High-Degree Polynomial Regression MAE: {mae}")
print(f"High-Degree Polynomial Regression MSE: {mse}")


High-Degree Polynomial Regression MAE: 18776.666666666668
High-Degree Polynomial Regression MSE: 597785140.6666666


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Sample NEET data (Replace this with actual historical NEET score-rank data)
neet_scores = np.array([200, 180, 160, 140, 120, 100, 80, 60, 40, 20, 0])  # Scores
neet_ranks = np.array([1, 500, 5000, 15000, 30000, 50000, 70000, 85000, 95000, 99000, 100000])  # Ranks

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(neet_scores.reshape(-1, 1), neet_ranks, test_size=0.2, random_state=42)

# Train XGBoost Model
xgb_model = XGBRegressor(n_estimators=500, learning_rate=0.05, max_depth=5, objective="reg:squarederror")
xgb_model.fit(X_train, y_train)

# Define Prediction Function using XGBoost
def predict_neet_rank_xgb(score):
    return int(xgb_model.predict(np.array([[score]]))[0])

# Apply Model to Test Data
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(predict_neet_rank_xgb)

# Evaluate Model Performance
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"XGBoost Regression MAE: {mae}")
print(f"XGBoost Regression MSE: {mse}")


XGBoost Regression MAE: 18333.0
XGBoost Regression MSE: 608306667.0


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Train a Random Forest Model
rf_model = RandomForestRegressor(n_estimators=500, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

# Define Prediction Function using Random Forest
def predict_neet_rank_rf(score):
    return int(rf_model.predict(np.array([[score]]))[0])

# Apply Model to Test Data
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(predict_neet_rank_rf)

# Evaluate Model
mae_rf = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse_rf = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"Random Forest Regression MAE: {mae_rf}")
print(f"Random Forest Regression MSE: {mse_rf}")


Random Forest Regression MAE: 19704.0
Random Forest Regression MSE: 652526408.0


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

# Sample NEET data (Replace with real NEET data if available)
neet_scores = np.array([200, 180, 160, 140, 120, 100, 80, 60, 40, 20, 0])  # Scores
neet_ranks = np.array([1, 500, 5000, 15000, 30000, 50000, 70000, 85000, 95000, 99000, 100000])  # Ranks

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(neet_scores.reshape(-1, 1), neet_ranks, test_size=0.2, random_state=42)

# Train XGBoost Model
xgb_model = XGBRegressor(n_estimators=1000, learning_rate=0.01, max_depth=6, objective="reg:squarederror")
xgb_model.fit(X_train, y_train)

# Define Prediction Function using XGBoost
def predict_neet_rank_xgb(score):
    return int(xgb_model.predict(np.array([[score]]))[0])

# Apply Model to Test Data
actual_neet_data = pd.DataFrame({"actual_score": [200, 150, 100, 50, 10], "actual_rank": [1, 10000, 50000, 90000, 99900]})
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(predict_neet_rank_xgb)

# Evaluate Model Performance
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"🔥 Best Model - XGBoost Regression 🔥")
print(f"✅ Mean Absolute Error (MAE): {mae}")
print(f"✅ Mean Squared Error (MSE): {mse}")


🔥 Best Model - XGBoost Regression 🔥
✅ Mean Absolute Error (MAE): 6179.4
✅ Mean Squared Error (MSE): 89962149.4


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

# Sample NEET data (Replace this with a larger real NEET dataset)
neet_scores = np.array([200, 190, 180, 170, 160, 150, 140, 130, 120, 110, 100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 0])
neet_ranks = np.array([1, 100, 500, 2000, 5000, 10000, 15000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 85000, 90000, 95000, 97000, 99000, 99900, 100000])

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(neet_scores.reshape(-1, 1), neet_ranks, test_size=0.2, random_state=42)

# Define the XGBoost model
xgb_model = XGBRegressor(objective="reg:squarederror")

# Define hyperparameter grid
param_grid = {
    "n_estimators": [500, 1000, 1500],
    "learning_rate": [0.01, 0.05, 0.1],
    "max_depth": [4, 6, 8],
    "subsample": [0.8, 1.0],
    "colsample_bytree": [0.8, 1.0]
}

# Perform Grid Search to find best hyperparameters
grid_search = GridSearchCV(xgb_model, param_grid, cv=5, scoring="neg_mean_absolute_error", verbose=2)
grid_search.fit(X_train, y_train)

# Get the best model
best_xgb = grid_search.best_estimator_

# Predict using optimized model
actual_neet_data = pd.DataFrame({"actual_score": [200, 150, 100, 50, 10], "actual_rank": [1, 10000, 50000, 90000, 99900]})
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(lambda x: int(best_xgb.predict(np.array([[x]]))[0]))

# Evaluate performance
mae = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"🔥 Optimized XGBoost Model 🔥")
print(f"✅ Best Mean Absolute Error (MAE): {mae}")
print(f"✅ Best Mean Squared Error (MSE): {mse}")


AttributeError: 'super' object has no attribute '__sklearn_tags__'

In [None]:

from catboost import CatBoostRegressor

# Train a CatBoost model
cat_model = CatBoostRegressor(iterations=1500, learning_rate=0.05, depth=6, loss_function='MAE', verbose=100)
cat_model.fit(X_train, y_train)

# Predict using CatBoost
actual_neet_data["predicted_neet_rank"] = actual_neet_data["actual_score"].apply(lambda x: int(cat_model.predict(np.array([[x]]))[0]))

# Evaluate performance
mae_cat = mean_absolute_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])
mse_cat = mean_squared_error(actual_neet_data["actual_rank"], actual_neet_data["predicted_neet_rank"])

print(f"🔥 CatBoost Model 🔥")
print(f"✅ Mean Absolute Error (MAE): {mae_cat}")
print(f"✅ Mean Squared Error (MSE): {mse_cat}")


0:	learn: 32440.6249735	total: 220us	remaining: 330ms
100:	learn: 662.5854895	total: 56.6ms	remaining: 784ms
200:	learn: 321.9262733	total: 99.2ms	remaining: 641ms
300:	learn: 301.0417364	total: 135ms	remaining: 538ms
400:	learn: 293.5509722	total: 160ms	remaining: 440ms
500:	learn: 218.4100761	total: 189ms	remaining: 376ms
600:	learn: 110.8315989	total: 223ms	remaining: 333ms
700:	learn: 73.9499624	total: 247ms	remaining: 282ms
800:	learn: 73.7316036	total: 282ms	remaining: 246ms
900:	learn: 60.1539231	total: 334ms	remaining: 222ms
1000:	learn: 22.6509961	total: 399ms	remaining: 199ms
1100:	learn: 22.4879495	total: 533ms	remaining: 193ms
1200:	learn: 22.4853161	total: 609ms	remaining: 152ms
1300:	learn: 22.4848916	total: 653ms	remaining: 99.9ms
1400:	learn: 22.4848539	total: 691ms	remaining: 48.9ms
1499:	learn: 22.4848387	total: 734ms	remaining: 0us
🔥 CatBoost Model 🔥
✅ Mean Absolute Error (MAE): 1099.8
✅ Mean Squared Error (MSE): 5047800.6


In [None]:
cat_model.save_model("catboost_neet_model.cbm")

In [None]:
!pip install streamlit pandas numpy seaborn matplotlib catboost pyngrok

Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from catboost import CatBoostRegressor

# Load trained CatBoost Model
@st.cache_resource
def load_model():
    model = CatBoostRegressor()
    model.load_model("catboost_neet_model.cbm")  # Load saved model
    return model

model = load_model()

# Web App Title
st.title("🎯 NEET Rank Predictor & Student Performance Analyzer")

# Upload CSV File
uploaded_file = st.file_uploader("📂 Upload your quiz performance data (CSV)", type=["csv"])
if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write("📊 Sample Data Preview:")
    st.dataframe(df.head())

    # Accuracy Trends Over Time
    st.subheader("📈 Accuracy Trends Over Time")
    plt.figure(figsize=(8, 5))
    sns.lineplot(x=df.index, y=df["accuracy"], marker="o")
    plt.xlabel("Quiz Number")
    plt.ylabel("Accuracy (%)")
    plt.title("Student Accuracy Improvement")
    st.pyplot(plt)

    # ❌ Mistakes Per Subject
    st.subheader("❌ Mistakes per Subject")
    plt.figure(figsize=(7, 5))
    sns.barplot(x=df["subject"], y=df["mistakes"], palette="coolwarm")
    plt.xlabel("Subject")
    plt.ylabel("Mistakes Count")
    plt.title("Mistake Distribution Across Subjects")
    st.pyplot(plt)

    # 🎯 NEET Rank Prediction
    st.subheader("🎯 NEET Rank Prediction")
    if "score" in df.columns:
        df["predicted_neet_rank"] = df["score"].apply(lambda x: int(model.predict(np.array([[x]]))[0]))
        st.dataframe(df[["score", "predicted_neet_rank"]])

        st.success("✅ NEET Rank Prediction Completed!")
    else:
        st.error("❌ The uploaded file must contain a 'score' column to predict NEET Rank.")


Writing app.py


In [None]:
from pyngrok import ngrok

# Start Streamlit server in the background
!streamlit run app.py &

# Expose the local server to the internet
public_url = ngrok.connect(port='8501')
print(f"🌍 Public URL: {public_url}")


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.66.70.94:8501[0m
[0m
[34m  Stopping...[0m


ERROR:pyngrok.process.ngrok:t=2025-01-30T22:42:54+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-01-30T22:42:54+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-01-30T22:42:54+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [None]:
from pyngrok import ngrok

# Kill any existing tunnels
!pkill -9 streamlit
!pkill -9 ngrok

# Start Streamlit in the background
!nohup streamlit run app.py &

# Start Ngrok
public_url = ngrok.connect(port="8501")
print(f"🌍 Click this link to open your web app: {public_url}")

nohup: appending output to 'nohup.out'


ERROR:pyngrok.process.ngrok:t=2025-01-30T22:43:00+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-01-30T22:43:00+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"


PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [None]:
!ngrok authtoken 2sMt0De8FpNw09TDpGHoXAvkFaU_2fNNMmr4bpno7YDHoqqPk


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok

# Start Streamlit in the background
!streamlit run app.py &

# Expose the local server to the internet
public_url = ngrok.connect(port="8501")
print(f"🌍 Click this link to open your web app: {public_url}")



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8502[0m
[34m  External URL: [0m[1mhttp://34.66.70.94:8502[0m
[0m
[34m  Stopping...[0m




PyngrokNgrokHTTPError: ngrok client exception, API returned 400: {"error_code":102,"status_code":400,"msg":"invalid tunnel configuration","details":{"err":"yaml: unmarshal errors:\n  line 1: field port not found in type config.HTTPv2Tunnel"}}


In [None]:
# Save the trained CatBoost model
cat_model.save_model("catboost_neet_model.cbm")

# Ensure the Streamlit script exists
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from catboost import CatBoostRegressor

# Load trained CatBoost Model
@st.cache_resource
def load_model():
    model = CatBoostRegressor()
    model.load_model("catboost_neet_model.cbm")  # Load saved model
    return model

model = load_model()

st.title("🎯 NEET Rank Predictor & Student Performance Analyzer")

uploaded_file = st.file_uploader("📂 Upload your quiz performance data (CSV)", type=["csv"])
if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write("📊 Sample Data Preview:")
    st.dataframe(df.head())

    st.subheader("🎯 NEET Rank Prediction")
    if "score" in df.columns:
        df["predicted_neet_rank"] = df["score"].apply(lambda x: int(model.predict(np.array([[x]]))[0]))
        st.dataframe(df[["score", "predicted_neet_rank"]])
        st.success("✅ NEET Rank Prediction Completed!")
    else:
        st.error("❌ The uploaded file must contain a 'score' column to predict NEET Rank.")


UsageError: Line magic function `%%writefile` not found.


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from catboost import CatBoostRegressor

# Load trained CatBoost Model
@st.cache_resource
def load_model():
    model = CatBoostRegressor()
    model.load_model("catboost_neet_model.cbm")  # Load saved model
    return model

model = load_model()

# Web App Title
st.title("🎯 NEET Rank Predictor & Student Performance Analyzer")

# Upload CSV File
uploaded_file = st.file_uploader("📂 Upload your quiz performance data (CSV)", type=["csv"])
if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write("🛠 Available Columns in Uploaded Data:")
    st.write(df.columns.tolist())  # Prints all column names
    st.write("📊 Sample Data Preview:")
    st.dataframe(df.head())

    # 🎯 Generate Insights: Identify Weak Areas & Performance Gaps
    st.subheader("📉 Insights: Weak Areas & Performance Gaps")

    # Identify weak subjects
   # Check if "subject" exists, otherwise use an alternative column
    if "subject" in df.columns:
      weak_subjects = df[df["accuracy"] < 50]["subject"].unique()
    elif "topic" in df.columns:  # If "subject" is missing, try "topic"
      weak_subjects = df[df["accuracy"] < 50]["topic"].unique()
    else:
      weak_subjects = []  # Empty if neither column exists

    if len(weak_subjects) > 0:
        st.warning(f"❌ Weak Subjects: {', '.join(weak_subjects)} (Accuracy < 50%)")
    else:
        st.success("✅ No major weak subjects detected!")

    # Identify most common mistake topics
    mistake_counts = df.groupby("topic")["mistakes"].sum().sort_values(ascending=False)
    st.write("📌 Topics where most mistakes happen:")
    st.dataframe(mistake_counts.head(5))

    # Plot accuracy trends over time
    st.subheader("📈 Accuracy Trends Over Time")
    plt.figure(figsize=(8, 5))
    sns.lineplot(x=df.index, y=df["accuracy"], marker="o")
    plt.xlabel("Quiz Number")
    plt.ylabel("Accuracy (%)")
    plt.title("Student Accuracy Improvement Over Time")
    st.pyplot(plt)

    # 🎯 Personalized Study Recommendations
    st.subheader("📌 Personalized Study Recommendations")
    recommendations = []

    # Check for common weak topics
    if len(weak_subjects) > 0:
        recommendations.append(f"⚠ Focus more on weak subjects: {', '.join(weak_subjects)}.")

    # Check accuracy vs difficulty level
    if "difficulty" in df.columns:
        hard_accuracy = df[df["difficulty"] == "Hard"]["accuracy"].mean()
        if hard_accuracy < 50:
            recommendations.append("🔥 Practice more difficult-level questions to improve confidence.")

    # General improvement recommendations
    if df["accuracy"].mean() < 70:
        recommendations.append("📚 Revise core concepts and practice more mock tests.")

    if len(recommendations) > 0:
        for rec in recommendations:
            st.warning(rec)
    else:
        st.success("✅ You're on track! Keep practicing.")

    # 🧑‍🎓 Student Persona Analysis
    st.subheader("🧑‍🎓 Student Persona Analysis")

    avg_accuracy = df["accuracy"].mean()
    avg_speed = df["speed"].mean() if "speed" in df.columns else None
    avg_mistakes = df["mistakes"].mean()

    # Define persona based on accuracy & speed
    if avg_accuracy >= 85:
        persona = "🔥 High Achiever - Excels in Accuracy"
    elif avg_accuracy >= 70:
        persona = "📚 Concept Builder - Strong, but Needs More Practice"
    else:
        persona = "⚡ Risk Taker - Needs to Improve Accuracy"

    if avg_speed and avg_speed >= 90:
        persona += " 🏃 (Fast Learner)"
    elif avg_speed and avg_speed < 60:
        persona += " 🐢 (Needs to Improve Speed)"

    if avg_mistakes > 10:
        persona += " ❌ (Tends to Make Many Mistakes)"

    st.success(f"**Student Persona: {persona}**")

    # 🎯 Probabilistic Model for NEET Rank Prediction
    st.subheader("🎯 Predicting NEET Rank with Probabilistic Model")

    def predict_neet_rank(score, accuracy):
        base_rank = 50000  # Approximate base rank
        score_factor = (1 - (score / 200)) * 20000  # Score impact
        accuracy_factor = (1 - accuracy) * 20000  # Accuracy impact
        predicted_rank = base_rank - score_factor - accuracy_factor
        return max(1, int(predicted_rank))

    df["predicted_neet_rank"] = df.apply(lambda row: predict_neet_rank(row["score"], row["accuracy"]), axis=1)
    st.dataframe(df[["score", "accuracy", "predicted_neet_rank"]])

    # Show predicted vs actual ranks (if available)
    if "actual_rank" in df.columns:
        plt.figure(figsize=(8, 5))
        sns.scatterplot(x=df["actual_rank"], y=df["predicted_neet_rank"], color="blue")
        plt.plot([0, 100000], [0, 100000], linestyle="--", color="red")  # Reference line
        plt.title("Predicted vs Actual NEET Rank")
        plt.xlabel("Actual NEET Rank")
        plt.ylabel("Predicted NEET Rank")
        st.pyplot(plt)

    st.success("✅ NEET Rank Prediction Completed!")


Overwriting app.py
