In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import joblib

In [8]:
data = pd.read_csv('StudentsPerformance.csv')

In [9]:
categorical_features = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
numerical_features = ['reading score', 'writing score']

# === Build preprocessing pipeline ===
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) #cat is a random variable name, it can be anythingsc
    ],
    remainder='passthrough'  # Keep numerical features as they are
)


# === Combine preprocessor with a regression model ===
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())  # Or your model like DecisionTreeRegressor, etc.
])

# === Separate features and target from training data ===
X = data[categorical_features + numerical_features]
y = data['math score']

# === Train the pipeline ===
pipeline.fit(X, y)

# === Now collect new user input for prediction ===
Gender = input('Enter Your Gender: ')
Race = input('Enter your race/ethnicity: ')
ParentalEducation = input('Enter Your Parental Level of Education: ')
Lunch = input('Enter Your Lunch: ')
TestPrep = input('Enter Your Test Preparation Course: ')
ReadingScore = int(input('Enter Your Reading Score: '))
WritingScore = int(input('Enter Your Writing Score: '))

# === Organize input into a single-row DataFrame ===
new_input = pd.DataFrame([{
    'gender': Gender,
    'race/ethnicity': Race,
    'parental level of education': ParentalEducation,
    'lunch': Lunch,
    'test preparation course': TestPrep,
    'reading score': ReadingScore,
    'writing score': WritingScore
}])

# === Make prediction ===
prediction = pipeline.predict(new_input)
print("Approximated Predicted Math Score:", round(prediction[0]))

if prediction >= 60:
    print('You Passed The Maths test')
if prediction < 60:
    print('You Failed The Maths Test')



Enter Your Gender:  female
Enter your race/ethnicity:  group A
Enter Your Parental Level of Education:  bachelor's degree
Enter Your Lunch:  standard
Enter Your Test Preparation Course:  complete
Enter Your Reading Score:  90
Enter Your Writing Score:  75


Approximated Predicted Math Score: 69
You Passed The Maths test


In [19]:
joblib.dump(pipeline, 'Student_Performance.pkl')

['Student_Performance.pkl']