<a href="https://colab.research.google.com/github/DheviSri/python-lab/blob/main/Final_Grade.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================
# 1. Setup
# ==========================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# If uploading manually in Colab:
# from google.colab import files
# uploaded = files.upload()


In [None]:
# ==========================
# 2. Load Data
# ==========================
df = pd.read_csv("learning style.csv")   # change path if needed
print(df.head())
print(df.shape)


  Student_ID  Age  Gender  Study_Hours_per_Week Preferred_Learning_Style  \
0     S00001   18  Female                    48              Kinesthetic   
1     S00002   29  Female                    30          Reading/Writing   
2     S00003   20  Female                    47              Kinesthetic   
3     S00004   23  Female                    13                 Auditory   
4     S00005   19  Female                    24                 Auditory   

   Online_Courses_Completed Participation_in_Discussions  \
0                        14                          Yes   
1                        20                           No   
2                        11                           No   
3                         0                          Yes   
4                        19                          Yes   

   Assignment_Completion_Rate (%)  Exam_Score (%)  Attendance_Rate (%)  \
0                             100              69                   66   
1                              71 

In [None]:
# ==========================
# 3. Define target & features
# ==========================
target = "Final_Grade"
drop_cols = ["Student_ID"]   # remove ID column (not useful)

X = df.drop(columns=[target] + drop_cols)
y = df[target]

print("Features:", X.columns.tolist())
print("Target distribution:\n", y.value_counts())


Features: ['Age', 'Gender', 'Study_Hours_per_Week', 'Preferred_Learning_Style', 'Online_Courses_Completed', 'Participation_in_Discussions', 'Assignment_Completion_Rate (%)', 'Exam_Score (%)', 'Attendance_Rate (%)', 'Use_of_Educational_Tech', 'Self_Reported_Stress_Level', 'Time_Spent_on_Social_Media (hours/week)', 'Sleep_Hours_per_Night']
Target distribution:
 Final_Grade
A    2678
B    2455
C    2440
D    2427
Name: count, dtype: int64


In [None]:
# ==========================
# 4. Train-Test Split
# ==========================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
# ==========================
# 5. Preprocessing
# ==========================
# Numeric and categorical features
num_features = X.select_dtypes(include=[np.number]).columns.tolist()
cat_features = X.select_dtypes(exclude=[np.number]).columns.tolist()

print("Numeric:", num_features)
print("Categorical:", cat_features)

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
    ]
)


Numeric: ['Age', 'Study_Hours_per_Week', 'Online_Courses_Completed', 'Assignment_Completion_Rate (%)', 'Exam_Score (%)', 'Attendance_Rate (%)', 'Time_Spent_on_Social_Media (hours/week)', 'Sleep_Hours_per_Night']
Categorical: ['Gender', 'Preferred_Learning_Style', 'Participation_in_Discussions', 'Use_of_Educational_Tech', 'Self_Reported_Stress_Level']


In [None]:
# ==========================
# 6. Build Pipeline with RandomForest
# ==========================
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("clf", RandomForestClassifier(random_state=42))
])

# Grid search (keep small for Colab runtime)
param_grid = {
    "clf__n_estimators": [100, 200],
    "clf__max_depth": [None, 10, 20],
    "clf__min_samples_split": [2, 5]
}

grid_search = GridSearchCV(
    pipeline, param_grid, cv=3, scoring="accuracy", n_jobs=-1, verbose=2
)

grid_search.fit(X_train, y_train)


Fitting 3 folds for each of 12 candidates, totalling 36 fits


In [None]:
# ==========================
# 7. Evaluation (All in %)
# ==========================
from sklearn.metrics import classification_report

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred) * 100  # Accuracy in %

print("✅ Best Params:", grid_search.best_params_)
print(f"✅ Accuracy: {accuracy:.2f}%")

# Convert classification report to DataFrame to show % values
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).T * 100  # convert to %

print("\n📊 Classification Report (in %):\n")
print(report_df.round(2))


✅ Best Params: {'clf__max_depth': None, 'clf__min_samples_split': 2, 'clf__n_estimators': 100}
✅ Accuracy: 100.00%

📊 Classification Report (in %):

              precision  recall  f1-score   support
A                 100.0   100.0     100.0   53600.0
B                 100.0   100.0     100.0   49100.0
C                 100.0   100.0     100.0   48800.0
D                 100.0   100.0     100.0   48500.0
accuracy          100.0   100.0     100.0     100.0
macro avg         100.0   100.0     100.0  200000.0
weighted avg      100.0   100.0     100.0  200000.0


In [None]:
# ==========================
# 8. Save preprocessed datasets
# ==========================
# Transform train & test sets with fitted preprocessor
X_train_trans = pd.DataFrame(
    grid_search.best_estimator_.named_steps["preprocessor"].transform(X_train),
    columns=grid_search.best_estimator_.named_steps["preprocessor"].get_feature_names_out()
)
X_test_trans = pd.DataFrame(
    grid_search.best_estimator_.named_steps["preprocessor"].transform(X_test),
    columns=grid_search.best_estimator_.named_steps["preprocessor"].get_feature_names_out()
)

X_train_trans["Final_Grade"] = y_train.values
X_test_trans["Final_Grade"] = y_test.values

X_train_trans.to_csv("preprocessed_train.csv", index=False)
X_test_trans.to_csv("preprocessed_test.csv", index=False)

print("✅ Preprocessed train/test datasets saved.")


✅ Preprocessed train/test datasets saved.


In [None]:
# ==========================
# 9. Save trained model
# ==========================
joblib.dump(best_model, "final_grade_model.pkl")
print("✅ Model saved as final_grade_model.pkl")


✅ Model saved as final_grade_model.pkl


In [None]:
# ==========================
# 10. Prediction from User Input
# ==========================

def predict_final_grade(user_input: dict):
    model = joblib.load("final_grade_model.pkl")
    df_input = pd.DataFrame([user_input])
    prediction = model.predict(df_input)
    return prediction[0]

# Ask user for input values
print("Please enter student details:\n")

user_input = {
    "Age": int(input("Age: ")),
    "Gender": input("Gender (Male/Female): "),
    "Study_Hours_per_Week": int(input("Study Hours per Week: ")),
    "Preferred_Learning_Style": input("Preferred Learning Style (Visual/Auditory/Reading/Writing/Kinesthetic): "),
    "Online_Courses_Completed": int(input("Online Courses Completed: ")),
    "Participation_in_Discussions": input("Participation in Discussions (Yes/No): "),
    "Assignment_Completion_Rate (%)": int(input("Assignment Completion Rate (%): ")),
    "Exam_Score (%)": int(input("Exam Score (%): ")),
    "Attendance_Rate (%)": int(input("Attendance Rate (%): ")),
    "Use_of_Educational_Tech": input("Use of Educational Tech (Yes/No): "),
    "Self_Reported_Stress_Level": input("Stress Level (Low/Medium/High): "),
    "Time_Spent_on_Social_Media (hours/week)": int(input("Social Media Hours/Week: ")),
    "Sleep_Hours_per_Night": int(input("Sleep Hours per Night: "))
}

# Prediction
print("\n📌 Predicted Final Grade:", predict_final_grade(user_input))


Please enter student details:

Age: 29
Gender (Male/Female): Female
Study Hours per Week: 30
Preferred Learning Style (Visual/Auditory/Reading/Writing/Kinesthetic): Reading/Visual
Online Courses Completed: 20
Participation in Discussions (Yes/No): No
Assignment Completion Rate (%): 71
Exam Score (%): 40
Attendance Rate (%): 57
Use of Educational Tech (Yes/No): Yes
Stress Level (Low/Medium/High): Medium
Social Media Hours/Week: 28
Sleep Hours per Night: 8

📌 Predicted Final Grade: D


In [None]:
# ==========================
# 10. User Interface with ipywidgets
# ==========================
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load model
model = joblib.load("final_grade_model.pkl")

# Create input widgets
age = widgets.IntText(description="Age:", value=20)

gender = widgets.Dropdown(
    options=["Male", "Female"],
    description="Gender:"
)

study_hours = widgets.IntSlider(
    description="Study Hours/Week:",
    min=0, max=50, step=1, value=20
)

learning_style = widgets.Dropdown(
    options=["Visual", "Auditory", "Reading/Writing", "Kinesthetic"],
    description="Learning Style:"
)

online_courses = widgets.IntSlider(
    description="Online Courses:",
    min=0, max=20, step=1, value=2
)

discussion = widgets.Dropdown(
    options=["Yes", "No"],
    description="Discussions:"
)

assignment_rate = widgets.IntSlider(
    description="Assignments (%):",
    min=0, max=100, step=1, value=80
)

exam_score = widgets.IntSlider(
    description="Exam Score (%):",
    min=0, max=100, step=1, value=75
)

attendance = widgets.IntSlider(
    description="Attendance (%):",
    min=0, max=100, step=1, value=90
)

edu_tech = widgets.Dropdown(
    options=["Yes", "No"],
    description="EdTech:"
)

stress = widgets.Dropdown(
    options=["Low", "Medium", "High"],
    description="Stress:"
)

social_media = widgets.IntSlider(
    description="Social Media (hrs/wk):",
    min=0, max=50, step=1, value=10
)

sleep = widgets.IntSlider(
    description="Sleep (hrs/night):",
    min=0, max=12, step=1, value=7
)

# Button for prediction
button = widgets.Button(description="Predict Final Grade", button_style="success")

# Output area
output = widgets.Output()

# Function when button is clicked
def on_button_click(b):
    with output:
        clear_output()
        user_input = {
            "Age": age.value,
            "Gender": gender.value,
            "Study_Hours_per_Week": study_hours.value,
            "Preferred_Learning_Style": learning_style.value,
            "Online_Courses_Completed": online_courses.value,
            "Participation_in_Discussions": discussion.value,
            "Assignment_Completion_Rate (%)": assignment_rate.value,
            "Exam_Score (%)": exam_score.value,
            "Attendance_Rate (%)": attendance.value,
            "Use_of_Educational_Tech": edu_tech.value,
            "Self_Reported_Stress_Level": stress.value,
            "Time_Spent_on_Social_Media (hours/week)": social_media.value,
            "Sleep_Hours_per_Night": sleep.value
        }
        prediction = model.predict(pd.DataFrame([user_input]))[0]
        print("📌 Predicted Final Grade:", prediction)

button.on_click(on_button_click)

# Display all widgets
display(
    age, gender, study_hours, learning_style, online_courses,
    discussion, assignment_rate, exam_score, attendance, edu_tech,
    stress, social_media, sleep, button, output
)


IntText(value=20, description='Age:')

Dropdown(description='Gender:', options=('Male', 'Female'), value='Male')

IntSlider(value=20, description='Study Hours/Week:', max=50)

Dropdown(description='Learning Style:', options=('Visual', 'Auditory', 'Reading/Writing', 'Kinesthetic'), valu…

IntSlider(value=2, description='Online Courses:', max=20)

Dropdown(description='Discussions:', options=('Yes', 'No'), value='Yes')

IntSlider(value=80, description='Assignments (%):')

IntSlider(value=75, description='Exam Score (%):')

IntSlider(value=90, description='Attendance (%):')

Dropdown(description='EdTech:', options=('Yes', 'No'), value='Yes')

Dropdown(description='Stress:', options=('Low', 'Medium', 'High'), value='Low')

IntSlider(value=10, description='Social Media (hrs/wk):', max=50)

IntSlider(value=7, description='Sleep (hrs/night):', max=12)

Button(button_style='success', description='Predict Final Grade', style=ButtonStyle())

Output()