In [12]:
pip install streamlit pandas numpy scikit-learn

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import json
import pandas as pd
import numpy as np
import streamlit as st
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.cluster import KMeans

In [14]:
import pandas as pd
df = pd.read_json("algebra_ques.json")
df.to_csv("algebra_ques.csv", index=False)
print("Converted JSON to CSV. Shape:", df.shape)
print(df.head())


Converted JSON to CSV. Shape: (30, 7)
   QID                Question  Category  Avg_Time_Min  Marks  \
0    1       Solve: x + 5 = 12      Easy           1.0    2.0   
1    2       If 2x = 14 find x      Easy           NaN    NaN   
2    3         Simplify 3(x+2)      Easy           1.5    2.0   
3    4      If x=2 find 2x²+3x  Moderate           NaN    3.0   
4    5  Factorize: x² + 5x + 6  Moderate           2.0    3.0   

                                            Options Correct_Answer  
0                      [x = 6, x = 7, x = 8, x = 9]          x = 7  
1                      [x = 5, x = 6, x = 7, x = 8]          x = 7  
2                            [3x+2, 3x+6, 6x+3, 9x]           3x+6  
3                                  [10, 12, 14, 16]             14  
4  [(x+1)(x+6), (x+2)(x+3), (x+3)(x+4), (x+1)(x+5)]     (x+2)(x+3)  


In [15]:
import json
with open("algebra_theory.json", "r", encoding="utf-8") as f:
    theory_data = json.load(f)

print("Theory dataset loaded. Number of topics:", len(theory_data))
print(theory_data[0])

Theory dataset loaded. Number of topics: 6
{'Chapter': 'Algebra', 'Topic': 'Linear Equations', 'Theory': 'A linear equation is an algebraic expression where the highest power of the variable is 1. It represents a straight line when graphed.', 'Example_Question': 'Solve 2x + 3 = 7', 'Step_by_Step': 'Step 1: Isolate variable term → 2x = 4. Step 2: Solve for x → x = 2. Step 3: Check by substituting → 2*2 + 3 = 7', 'Tips': 'Always perform the same operation on both sides of the equation to maintain equality.', 'YouTube_Link': 'https://www.youtube.com/watch?v=SzxfK7zq9xk', 'Book_Reference': 'NCERT Class 6 Mathematics, Chapter 6'}


In [16]:
# Count missing values
print(df.isnull().sum())

QID                0
Question           0
Category           0
Avg_Time_Min      12
Marks              7
Options            0
Correct_Answer     0
dtype: int64


In [17]:
# Fill missing Avg_Time_Min and Marks by category average
df['Avg_Time_Min'] = df.groupby('Category')['Avg_Time_Min'].transform(lambda x: x.fillna(x.median()))
df['Marks'] = df.groupby('Category')['Marks'].transform(lambda x: x.fillna(x.median()))

# If still missing (for categories with all NaN), fill with global median
df['Avg_Time_Min'].fillna(df['Avg_Time_Min'].median(), inplace=True)
df['Marks'].fillna(df['Marks'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Avg_Time_Min'].fillna(df['Avg_Time_Min'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Marks'].fillna(df['Marks'].median(), inplace=True)


In [18]:
#Handle Missing Text Fields (Optional)
df.fillna({'Question': 'No Question Text', 
           'Options': 'N/A', 
           'Correct_Answer': 'N/A'}, inplace=True)
# Count missing values
print(df.isnull().sum())

QID               0
Question          0
Category          0
Avg_Time_Min      0
Marks             0
Options           0
Correct_Answer    0
dtype: int64


In [19]:
#Convert Data Types
df['Avg_Time_Min'] = df['Avg_Time_Min'].astype(float)
df['Marks'] = df['Marks'].astype(float)

df.to_csv("algebra_ques_cleaned.csv", index=False)
print("✅ Cleaned dataset saved as algebra_ques_cleaned.csv")

✅ Cleaned dataset saved as algebra_ques_cleaned.csv


In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle

df = pd.read_csv('algebra_ques_cleaned.csv')

# Example features (customize as per your columns)
X = df[['Avg_Time_Min', 'Marks']]
y = df['Category']  # Easy / Medium / Hard

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

pickle.dump(model, open('student_performance_model.pkl', 'wb'))


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import json
import pickle

with open('algebra_ques.json', 'r') as f:
    data = json.load(f)

questions = [item['Question'] for item in data]

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(questions)

kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)

pickle.dump(kmeans, open('question_cluster_model.pkl', 'wb'))
pickle.dump(vectorizer, open('question_vectorizer.pkl', 'wb'))


In [3]:
import pickle

# Define states (performance level) and possible actions (next learning step)
states = ["excellent", "good", "needs improvement"]
actions = {
    "excellent": ["Advanced Algebra", "Challenge Exercises"],
    "good": ["Intermediate Exercises", "Revise Theory"],
    "needs improvement": ["Foundational Theory", "Basic Examples"]
}

# Define rewards (example values)
rewards = {
    "excellent": {"Advanced Algebra": 10, "Challenge Exercises": 8},
    "good": {"Intermediate Exercises": 10, "Revise Theory": 6},
    "needs improvement": {"Foundational Theory": 10, "Basic Examples": 7}
}

# Save as dictionary Q-table
q_table = rewards

# Save Q-table to pickle
with open("q_table.pkl", "wb") as f:
    pickle.dump(q_table, f)

# Test loading
with open("q_table.pkl", "rb") as f:
    q_table_loaded = pickle.load(f)

print("Q-table loaded:", q_table_loaded)



Q-table loaded: {'excellent': {'Advanced Algebra': 10, 'Challenge Exercises': 8}, 'good': {'Intermediate Exercises': 10, 'Revise Theory': 6}, 'needs improvement': {'Foundational Theory': 10, 'Basic Examples': 7}}
