In [1]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Example dataset (you can replace this with your real data)
data = {
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female', 'Male'],
    'Extracurricular_activities': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'Age': [18, 17, 19, 16, 18, 17],
    'Score': [75, 88, 95, 70, 85, 80],
    'Pass': [1, 1, 1, 0, 1, 0]  # Target variable (e.g. pass/fail)
}

df = pd.DataFrame(data)

# Features and target
X = df[['Gender', 'Extracurricular_activities', 'Age', 'Score']]
y = df['Pass']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Columns
categorical_cols = ['Gender', 'Extracurricular_activities']
numeric_cols = ['Age', 'Score']

# Preprocessor for categorical columns (OneHotEncoder) and passthrough numeric columns
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), categorical_cols),
    ],
    remainder='passthrough'
)

# Pipeline with preprocessing and logistic regression classifier
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

# Fit model
model_pipeline.fit(X_train, y_train)

# Predict on test data
predictions = model_pipeline.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy:.2f}")

# To see transformed feature names (optional)
feature_names = model_pipeline.named_steps['preprocessor'].get_feature_names_out()
print("Features after encoding:", feature_names)


Accuracy: 0.50
Features after encoding: ['cat__Gender_Male' 'cat__Extracurricular_activities_Yes' 'remainder__Age'
 'remainder__Score']


In [None]:
import streamlit as st
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

# Sample data for training
data = {
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female', 'Male'],
    'Extracurricular_activities': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'Age': [18, 17, 19, 16, 18, 17],
    'Score': [75, 88, 95, 70, 85, 80],
    'Pass': [1, 1, 1, 0, 1, 0]  # Target variable
}
df = pd.DataFrame(data)

# Features and target
X = df[['Gender', 'Extracurricular_activities', 'Age', 'Score']]
y = df['Pass']

# Train model pipeline
categorical_cols = ['Gender', 'Extracurricular_activities']
numeric_cols = ['Age', 'Score']

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), categorical_cols),
    ],
    remainder='passthrough'
)

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

model_pipeline.fit(X, y)

# Streamlit UI
st.title("Student Pass Prediction")

gender = st.selectbox("Gender", options=['Male', 'Female'])
extracurricular = st.selectbox("Extracurricular Activities", options=['Yes', 'No'])
age = st.number_input("Age", min_value=10, max_value=30, value=18)
score = st.number_input("Score", min_value=0, max_value=100, value=75)

if st.button("Predict"):
    input_df = pd.DataFrame({
        'Gender': [gender],
        'Extracurricular_activities': [extracurricular],
        'Age': [age],
        'Score': [score]
    })

    prediction = model_pipeline.predict(input_df)[0]
    proba = model_pipeline.predict_proba(input_df)[0][1]

    if prediction == 1:
        st.success(f"Prediction: Pass ✅ (Probability: {proba:.2f})")
    else:
        st.error(f"Prediction: Fail ❌ (Probability: {proba:.2f})")
