In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import tkinter as tk
from tkinter import ttk, messagebox

# Step 1: Data Loading
# Replace 'student_data.csv' with your actual data file
data = pd.read_csv('student_data.csv')

# Step 2: Data Preprocessing
# Assuming the data has columns like 'Student ID', 'Gender', etc.
features = ['Gender', 'Admission Criteria', 'Program', 'Section', 'Instructor ID', 'Course ID', 'Course Name', 'GPA']
target = 'Grade'

# Handling missing values
data.dropna(inplace=True)

# Splitting features and target
X = data[features]
y = data[target]

# Encoding categorical features
encoder = OneHotEncoder(drop='first', handle_unknown='ignore')
encoded_cols = encoder.fit_transform(data[['Gender', 'Admission Criteria', 'Program', 'Section', 'Course Name']])
encoded_cols_df = pd.DataFrame(encoded_cols.toarray(), columns=encoder.get_feature_names_out())

# Combine the encoded columns with the rest of the data, excluding 'Grade' and 'Student ID'
X = data.drop(columns=['Gender', 'Admission Criteria', 'Program', 'Section', 'Course Name', 'Grade', 'Student ID'])
X = pd.concat([X.reset_index(drop=True), encoded_cols_df.reset_index(drop=True)], axis=1)

# Step 3: Data Splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Model Training
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Step 5: Model Evaluation
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Step 6: Example Prediction with GUI
def predict_performance():
    try:
        gpa = float(gpa_entry.get())
        if gpa < 0 or gpa > 4.0:
            raise ValueError("GPA must be between 0 and 4.0")
        
        gender = gender_var.get()
        admission_criteria = criteria_var.get()
        program = program_var.get()
        section = section_var.get()
        course_name = course_var.get()

        input_data = pd.DataFrame([[gender, admission_criteria, program, section, 0, 0, course_name, gpa]],
                                  columns=['Gender', 'Admission Criteria', 'Program', 'Section', 'Instructor ID', 'Course ID', 'Course Name', 'GPA'])

        input_data_encoded = encoder.transform(input_data[['Gender', 'Admission Criteria', 'Program', 'Section', 'Course Name']])
        input_data_encoded_df = pd.DataFrame(input_data_encoded.toarray(), columns=encoder.get_feature_names_out())
        input_data_final = input_data.drop(columns=['Gender', 'Admission Criteria', 'Program', 'Section', 'Course Name'])
        input_data_final = pd.concat([input_data_final.reset_index(drop=True), input_data_encoded_df.reset_index(drop=True)], axis=1)

        input_data_scaled = scaler.transform(input_data_final)
        prediction = model.predict(input_data_scaled)
        result_label.config(text=f'Predicted Grade: {prediction[0]:.2f}')
    except ValueError as e:
        messagebox.showerror("Input Error", f"Please enter valid numbers. Error: {e}")

app = tk.Tk()
app.title('Student Performance Predictor')

frame = ttk.Frame(app, padding="10")
frame.grid(row=0, column=0)

ttk.Label(frame, text="GPA").grid(row=0, column=0)
gpa_entry = ttk.Entry(frame)
gpa_entry.grid(row=0, column=1)

ttk.Label(frame, text="Gender").grid(row=1, column=0)
gender_var = tk.StringVar()
gender_entry = ttk.Combobox(frame, textvariable=gender_var, values=["Male", "Female"])
gender_entry.grid(row=1, column=1)

ttk.Label(frame, text="Admission Criteria").grid(row=2, column=0)
criteria_var = tk.StringVar()
criteria_entry = ttk.Combobox(frame, textvariable=criteria_var, values=["Entrance Exam", "Merit-based", "Sports Quota"])
criteria_entry.grid(row=2, column=1)

ttk.Label(frame, text="Program").grid(row=3, column=0)
program_var = tk.StringVar()
program_entry = ttk.Combobox(frame, textvariable=program_var, values=["Computer Science", "Engineering", "Arts"])
program_entry.grid(row=3, column=1)

ttk.Label(frame, text="Section").grid(row=4, column=0)
section_var = tk.StringVar()
section_entry = ttk.Combobox(frame, textvariable=section_var, values=["A", "B", "C"])
section_entry.grid(row=4, column=1)

ttk.Label(frame, text="Course Name").grid(row=5, column=0)
course_var = tk.StringVar()
course_entry = ttk.Combobox(frame, textvariable=course_var, values=["Course1", "Course2", "Course3"])
course_entry.grid(row=5, column=1)

predict_button = ttk.Button(frame, text="Predict", command=predict_performance)
predict_button.grid(row=6, column=0, columnspan=2)

result_label = ttk.Label(frame, text="Predicted Grade: ")
result_label.grid(row=7, column=0, columnspan=2)

app.mainloop()


Mean Squared Error: 225.57789640880418
