# B.Tech Branch Selection — Streamlit App

In [None]:
# Streamlit App: B.Tech Branch Selection System
# To run: streamlit run app_branch_selection_full.ipynb

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
import joblib
import os
import matplotlib.pyplot as plt

MODEL_SAVE_PATH = "branch_selection_pipeline.joblib"

def make_synthetic(N=600, seed=42):
    np.random.seed(seed)
    rows = []
    for i in range(N):
        math = np.clip(np.random.normal(75, 12), 30, 100)
        physics = np.clip(np.random.normal(70, 15), 20, 100)
        chemistry = np.clip(np.random.normal(68, 14), 20, 100)
        biology = np.clip(np.random.normal(60, 20), 0, 100)
        english = np.clip(np.random.normal(72, 10), 30, 100)
        extr = np.clip(np.random.normal(4, 2), 0, 10)
        board = np.random.choice(['CBSE','StateBoard','ISC'])
        gender = np.random.choice(['M','F','Other'], p=[0.48,0.48,0.04])
        probs = {
            'CSE': max(0, (math - 60)/40),
            'ECE': max(0, (physics - 55)/45),
            'ME' : max(0, (chemistry - 50)/50),
            'CE' : max(0, (english - 60)/45),
            'EE' : max(0, (physics + chemistry)/200)
        }
        vals = np.array(list(probs.values())) + 0.01
        vals = vals / vals.sum()
        branch = np.random.choice(list(probs.keys()), p=vals)
        rows.append({
            'math_marks': round(math,1),
            'physics_marks': round(physics,1),
            'chemistry_marks': round(chemistry,1),
            'biology_marks': round(biology,1),
            'english_marks': round(english,1),
            'extracurricular_score': round(extr,1),
            'board': board,
            'gender': gender,
            'branch': branch
        })
    return pd.DataFrame(rows)

def build_pipeline():
    numeric_features = ['math_marks','physics_marks','chemistry_marks','biology_marks','english_marks','extracurricular_score']
    numeric_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())])
    categorical_features = ['board','gender']
    categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False))])
    preprocessor = ColumnTransformer(transformers=[('num', numeric_transformer, numeric_features), ('cat', categorical_transformer, categorical_features)])
    clf = Pipeline(steps=[('pre', preprocessor), ('clf', RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1))])
    return clf

def train_on_dataframe(df):
    clf = build_pipeline()
    required = ['math_marks','physics_marks','chemistry_marks','biology_marks','english_marks','extracurricular_score','board','gender','branch']
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    X = df[['math_marks','physics_marks','chemistry_marks','biology_marks','english_marks','extracurricular_score','board','gender']]
    y = df['branch']
    clf.fit(X, y)
    joblib.dump(clf, MODEL_SAVE_PATH)
    return clf

def load_pipeline_if_exists():
    if os.path.exists(MODEL_SAVE_PATH):
        try:
            return joblib.load(MODEL_SAVE_PATH)
        except Exception:
            return None
    return None

def compute_averages_from_row(r):
    vals = [float(r.get(k,0)) for k in ['math_marks','physics_marks','chemistry_marks','biology_marks','english_marks']]
    percent = round(sum(vals)/len(vals),2)
    pcm_mean = round((r['math_marks']+r['physics_marks']+r['chemistry_marks'])/3,2)
    stem_score = round(0.45*r['math_marks'] + 0.35*r['physics_marks'] + 0.2*r['chemistry_marks'],2)
    return {'percentage': percent, 'pcm_mean': pcm_mean, 'stem_score': stem_score}

def recommend_top_k(model, student_row, k=3):
    df_row = pd.DataFrame([student_row])
    probs = model.predict_proba(df_row)[0]
    classes = model.classes_
    idx = np.argsort(probs)[::-1][:k]
    return [(classes[i], float(probs[i])) for i in idx]

def main():
    st.title("B.Tech Branch Selection System")
    st.sidebar.header("Model Options")
    uploaded = st.sidebar.file_uploader("Upload training CSV", type=['csv'])
    if uploaded:
        df_upload = pd.read_csv(uploaded)
        if st.sidebar.button("Train Model on Uploaded Data"):
            train_on_dataframe(df_upload)
            st.sidebar.success("Model Trained and Saved")
    if st.sidebar.button("Use Synthetic Data (Demo)"):
        demo_df = make_synthetic(600)
        train_on_dataframe(demo_df)
        st.sidebar.success("Trained on Synthetic Data")
    model = load_pipeline_if_exists()
    if model:
        st.sidebar.success("Model Loaded")
    else:
        st.sidebar.warning("No model found, train first")
    st.header("Enter Student Marks")
    math = st.number_input("Maths", 0.0, 100.0, 75.0)
    physics = st.number_input("Physics", 0.0, 100.0, 70.0)
    chemistry = st.number_input("Chemistry", 0.0, 100.0, 68.0)
    biology = st.number_input("Biology", 0.0, 100.0, 60.0)
    english = st.number_input("English", 0.0, 100.0, 72.0)
    extr = st.number_input("Extracurricular", 0.0, 10.0, 4.0)
    board = st.selectbox("Board", ['CBSE','StateBoard','ISC'])
    gender = st.selectbox("Gender", ['M','F','Other'])
    if st.button("Predict Branch"):
        if model is None:
            st.error("Train or load a model first")
        else:
            student = {'math_marks':math,'physics_marks':physics,'chemistry_marks':chemistry,'biology_marks':biology,'english_marks':english,'extracurricular_score':extr,'board':board,'gender':gender}
            avgs = compute_averages_from_row(student)
            st.write("Computed Averages:", avgs)
            recs = recommend_top_k(model, student, k=3)
            st.subheader("Top 3 Recommended Branches")
            for b,p in recs:
                st.write(f"{b}: {p:.3f}")
    st.markdown("---")
    st.write("B.Tech Admission Information System — Streamlit Demo")

if __name__ == "__main__":
    main()
