In [30]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [31]:
# Step 2: Load Dataset
df = pd.read_csv("stream_recommendation_dataset_2000.csv")
print("Dataset Shape:", df.shape)
df.head()

Dataset Shape: (2000, 15)


Unnamed: 0,Student_ID,Recommended_Stream,Math,Science,Biology,English,SocialStudies,Language,LogicalReasoning,AnalyticalSkills,NumericalAbility,Creativity,CommunicationSkills,ArtisticSkills,PracticalSkills
0,1,PCM,88,93,58,78,49,41,92,83,99,51,70,54,66
1,2,PCM,83,85,47,74,53,57,86,81,90,43,79,50,68
2,3,PCM,82,85,44,72,47,59,87,92,93,58,80,57,64
3,4,PCM,92,88,49,70,47,42,99,92,95,59,64,48,68
4,5,PCM,87,86,49,73,60,41,81,90,93,51,78,44,73


In [32]:
df = df.sample(2000)

In [33]:
# Step 3: Features and Target
X = df.drop(columns=["Student_ID", "Recommended_Stream"])
y = df["Recommended_Stream"]

In [34]:
# Step 4: Encode Target Labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [35]:
# Step 5: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [36]:
# Step 6: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

In [37]:
# Step 7: Train Model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [38]:

# Step 8: Predictions
y_pred = model.predict(X_test)


In [39]:
# Step 9: Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("✅ Model Accuracy:", accuracy)
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

✅ Model Accuracy: 1.0

📊 Classification Report:
               precision    recall  f1-score   support

        Arts       1.00      1.00      1.00        80
    Commerce       1.00      1.00      1.00        80
         PCB       1.00      1.00      1.00        80
         PCM       1.00      1.00      1.00        80
  Vocational       1.00      1.00      1.00        80

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400



In [44]:
# Step 10: Save Important Elements
with open("stream_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("stream_scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

with open("stream_label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

print("🎉 Model, Scaler, and Label Encoder saved successfully!")

🎉 Model, Scaler, and Label Encoder saved successfully!


In [41]:
# Step 11: Stream Recommendation Function
def recommend_stream(student_data):
    """
    student_data: dict with subject & aptitude scores
    Example:
    {
        "Math": 85, "Science": 90, "Biology": 55, "English": 70,
        "SocialStudies": 60, "Language": 65,
        "LogicalReasoning": 88, "AnalyticalSkills": 92, "NumericalAbility": 80,
        "Creativity": 55, "CommunicationSkills": 72,
        "ArtisticSkills": 50, "PracticalSkills": 68
    }
    """
    # Convert to DataFrame
    student_df = pd.DataFrame([student_data])
    # Scale features
    student_scaled = scaler.transform(student_df)
    # Predict probabilities
    probs = model.predict_proba(student_scaled)[0]
    top_indices = np.argsort(probs)[::-1]  # Sort in descending order
    recommendations = [(le.classes_[i], probs[i]) for i in top_indices[:2]]
    return recommendations

In [42]:

# Step 12: Test Recommendation
sample_student = {
    "Math": 85, "Science": 90, "Biology": 55, "English": 70,
    "SocialStudies": 60, "Language": 65,
    "LogicalReasoning": 88, "AnalyticalSkills": 92, "NumericalAbility": 80,
    "Creativity": 55, "CommunicationSkills": 72,
    "ArtisticSkills": 50, "PracticalSkills": 68
}

In [43]:
print("\n🎓 Recommended Streams for Sample Student:")
print(recommend_stream(sample_student))


🎓 Recommended Streams for Sample Student:
[('PCM', 0.92), ('Commerce', 0.08)]
