In [6]:

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler

print("Setup successfully done !")

Setup successfully done !


In [7]:


    df = pd.read_csv("student-scores.csv")
    print("Dataset loaded successfully.")


Dataset loaded successfully.


In [8]:


print("\nMissing Values:")
print(df.isnull().sum())
numeric_cols = df.select_dtypes(include=[np.number]).columns

df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())


columns_to_drop = ["id", "first_name", "last_name", "email"]
df.drop(columns=columns_to_drop, axis=1, inplace=True, errors='ignore')



for column in ["gender", "career_aspiration"]:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column].astype(str))

print("\nPreprocessing complete. Updated dataset info:")
df.info()
df.head()


Missing Values:
id                            0
first_name                    0
last_name                     0
email                         0
gender                        0
part_time_job                 0
absence_days                  0
extracurricular_activities    0
weekly_self_study_hours       0
career_aspiration             0
math_score                    0
history_score                 0
physics_score                 0
chemistry_score               0
biology_score                 0
english_score                 0
geography_score               0
dtype: int64

Preprocessing complete. Updated dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 13 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   gender                      2000 non-null   int64
 1   part_time_job               2000 non-null   bool 
 2   absence_days                2000 non-null   int

Unnamed: 0,gender,part_time_job,absence_days,extracurricular_activities,weekly_self_study_hours,career_aspiration,math_score,history_score,physics_score,chemistry_score,biology_score,english_score,geography_score
0,1,False,3,False,27,9,73,81,93,97,63,80,87
1,0,False,2,False,47,6,90,86,96,100,90,88,90
2,0,False,9,True,13,8,81,97,95,96,65,77,94
3,0,False,5,False,3,1,71,74,88,80,89,63,86
4,1,False,5,False,10,15,84,77,65,65,80,74,76


In [9]:
X = df.drop(["math_score"], axis=1)
Y = pd.cut(df["math_score"], bins=[0, 50, 75, 100], labels=["Low", "Medium", "High"])
print(Y)


0       Medium
1         High
2         High
3       Medium
4         High
         ...  
1995      High
1996      High
1997      High
1998    Medium
1999      High
Name: math_score, Length: 2000, dtype: category
Categories (3, object): ['Low' < 'Medium' < 'High']


In [10]:

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [11]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Naive Bayes": GaussianNB()
}


In [12]:

max_acc = 0  #
for model_name, model in models.items():
    model.fit(X_train, Y_train)
    predictions = model.predict(X_test)


    accuracy = accuracy_score(Y_test, predictions)
    precision = precision_score(Y_test, predictions, average='weighted')
    recall = recall_score(Y_test, predictions, average='weighted')


    print(f"{model_name} Results:")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    if max_acc<accuracy:
        max_acc=accuracy
        best_model=model
print(f"Best Model: {best_model}")

best_model.predict([[1,False,1,False,44,44,44,44,44,44,44,44]])

Logistic Regression Results:
Accuracy: 0.77
Precision: 0.70
Random Forest Results:
Accuracy: 0.74
Precision: 0.68
Naive Bayes Results:
Accuracy: 0.69
Precision: 0.67
Best Model: LogisticRegression()


array(['High'], dtype=object)

----------------------------------------------------------------------------------------------------------------------------

In [13]:
import gradio as gr
def predict_math(gender,part_time_job,absence_days,extracurricular_activities,weekly_self_study_hours,career_aspiration,history_score,
                physics_score,chemistry_score,biology_score,english_score,geography_score):

  arr=[[
    gender,
    part_time_job,
    absence_days,
    extracurricular_activities,
    weekly_self_study_hours,
    career_aspiration,
    history_score,
    physics_score,
    chemistry_score,
    biology_score,
    english_score,
    geography_score
  ]]
  pred= best_model.predict(arr)
  return pred[0]
iface= gr.Interface(fn=predict_math,
                    inputs=[
                        gr.Checkbox(label="gender( true=male)"),
                        gr.Checkbox(label="Part-Time Job"),
                        gr.Slider(0,30,step=1,label="Absence Days"),
                        gr.Checkbox(label="Extracurricular Activities"),
                        gr.Slider(0,50,step=1,label="Weekly Self-Study Hours"),
                        gr.Slider(0,20,step=1,label="Career Aspiration"),
                        gr.Slider(0,100,step=1,label="History Score"),
                        gr.Slider(0,100,step=1,label="Physics Score"),
                        gr.Slider(0,100,step=1,label="Chemistry Score"),
                        gr.Slider(0,100,step=1,label="Biology Score"),
                        gr.Slider(0,100,step=1,label="English Score"),
                        gr.Slider(0,100,step=1,label="Geography Score"),
                    ],
                    outputs=gr.Textbox(label="Predicted Math Score"))

iface.launch()









It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7a031670fea3e416fe.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


