# 03. Weakness Detector Model

Сурагчийн сул сэдвүүдийг олох Multi-label Classification модел

**Input:**
- Сэдэв тус бүрийн зөв/буруу хариултын тоо
- Хүнд асуултанд алдсан сэдвүүд

**Output:**
- Сул сэдвүүдийн жагсаалт (multi-label)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

In [None]:
# Сэдвүүд
TOPICS = [
    'algebra', 'geometry', 'trigonometry', 'calculus',  # Math
    'mechanics', 'thermodynamics', 'electromagnetism',  # Physics
    'organic', 'inorganic', 'physical_chemistry'        # Chemistry
]

# Synthetic data
np.random.seed(42)
n_samples = 1000

data = []
labels = []

for _ in range(n_samples):
    row = {}
    weak = []
    
    for topic in TOPICS:
        correct = np.random.randint(0, 10)
        wrong = np.random.randint(0, 10)
        
        row[f'{topic}_correct'] = correct
        row[f'{topic}_wrong'] = wrong
        row[f'{topic}_ratio'] = correct / (correct + wrong + 1)
        
        # If ratio < 0.4, it's a weakness
        if row[f'{topic}_ratio'] < 0.4:
            weak.append(topic)
    
    data.append(row)
    labels.append(weak)

df = pd.DataFrame(data)
print(df.head())
print(f'\nSample labels: {labels[:5]}')

In [None]:
# Encode labels
mlb = MultiLabelBinarizer(classes=TOPICS)
y = mlb.fit_transform(labels)

print(f'Label shape: {y.shape}')
print(f'Classes: {mlb.classes_}')

In [None]:
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    df, y, test_size=0.2, random_state=42
)

# Train model
model = OneVsRestClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=TOPICS))

In [None]:
# Save model
model_data = {
    'model': model,
    'mlb': mlb,
    'topics': TOPICS,
    'features': list(df.columns)
}

joblib.dump(model_data, '../trained_models/weakness_detector.pkl')
print('Model saved!')

In [None]:
# Test prediction
loaded = joblib.load('../trained_models/weakness_detector.pkl')

# Sample student data
sample = {f'{topic}_correct': 2 if topic in ['algebra', 'mechanics'] else 8 
          for topic in TOPICS}
sample.update({f'{topic}_wrong': 8 if topic in ['algebra', 'mechanics'] else 2 
               for topic in TOPICS})
sample.update({f'{topic}_ratio': sample[f'{topic}_correct'] / 
               (sample[f'{topic}_correct'] + sample[f'{topic}_wrong'] + 1) 
               for topic in TOPICS})

sample_df = pd.DataFrame([sample])
pred = loaded['model'].predict(sample_df)
weak_topics = loaded['mlb'].inverse_transform(pred)[0]

print(f'Predicted weak topics: {weak_topics}')