# Application for estimating the grades of portuguese students

In [None]:
! python -m venv ../.env
! source ../.env/bin/activate
! pip install -r ../requirements.txt

## 1. Import all needed libraries

In [8]:
import os

import tkinter as tk
import pandas as pd

from typing import Any
from tkinter import ttk
from tkinter import filedialog as fd

from pgmpy.models import BayesianNetwork
from pgmpy.inference import VariableElimination

from app.preprocessing import TunedPreprocessing, BoundOutPutPreprocessing
from app.combobox_builder import CmbxBuilder, CmbxSettings
from app.network import Model

## 2. Settings for the application

In [9]:
TRAIN_PATH = "../data/student-por_2.csv"
DELIMITER = ";"
NUMBER_CORES = 10 # !IMPORTANT: Set this to the number of cores of your machine for parallelization. It is recommended to use not all cores.

## 3. Define Application

In [10]:
desc_1 = "Please select a file to predict the final grade of a student *OR* define the input values manually."

left_container_settings = {
    'school': CmbxSettings('school', 'School', ['GP', 'MS'], ['Gabriel Pereira', 'Mousinho da Silveira']),
    'sex': CmbxSettings('sex', 'Gender', ['F', 'M'], ['Female', 'Male']),
    'age': CmbxSettings('age', 'Age', [str(i) for i in range(15, 21, 1)] + ['>20'], [str(i) for i in range(15, 21, 1)] + ['>20']),
    'address': CmbxSettings('address', 'Address Type', ['U', 'R'], ['Urban', 'Rural']),
    'famsize': CmbxSettings('famsize', 'Family Size', ['LE3', 'GT3'], ['Less than or equal to 3', 'Greater than 3']),
    'Pstatus': CmbxSettings('Pstatus', 'Parental Status', ['T', 'A'], ['Living Together', 'Apart']),
    'Fedu': CmbxSettings('Fedu', 'Mother\'s Education', [i for i in range(0, 5, 1)], ["None", "Primary Education (4th grade)", "5th to 9th grade", "Secondary Education", "Higher Education"]),
    'Medu': CmbxSettings('Medu', 'Father\'s Education', [i for i in range(0, 5, 1)], ["None", "Primary Education (4th grade)", "5th to 9th grade", "Secondary Education", "Higher Education"]),
    'Fjob': CmbxSettings('Fjob', 'Mother\'s Job', ['teacher', 'health', 'services', 'at_home', 'other'], ['Teacher', 'Healthcare', 'Civil Services', 'At Home', 'Other']),
    'Mjob': CmbxSettings('Mjob', 'Father\'s Job', ['teacher', 'health', 'services', 'at_home', 'other'], ['Teacher', 'Healthcare', 'Civil Services', 'At Home', 'Other']),
    'reason': CmbxSettings('reason', 'Reason to Choose this School', ['home', 'reputation', 'course', 'other'], ['Close to Home', 'School Reputation', 'Course', 'Other']),
    'guardian': CmbxSettings('guardian', 'Student\'s Guardian', ['mother', 'father', 'other'], ['Mother', 'Father', 'Other']),
    'traveltime': CmbxSettings('traveltime', 'Travel Time to School', [i for i in range(1, 5, 1)], ['<15 min', '15 to 30 min', '30 min to 1 hour', '>1 hour']),
    'studytime': CmbxSettings('studytime', 'Weekly Study Time', [i for i in range(1, 5, 1)], ['<2 hours', '2 to 5 hours', '5 to 10 hours', '>10 hours']),
    'failures': CmbxSettings('failures', 'Number of Past Class Failures', [i for i in range(0, 4, 1)], ['None', '1', '2', '3 or more']),
    'G1': CmbxSettings('G1', 'First Period Grade', ['<7'] + [f'{i}' for i in range(7, 18, 1)] + ['>17'], ['<7'] + [f'{i}' for i in range(7, 18, 1)] + ['>17']),
}

right_container_settings = {
    'schoolsup': CmbxSettings('schoolsup', 'Extra Paid Classes within the Course Subject', ['yes', 'no'], ['Yes', 'No']),
    'famsup': CmbxSettings('famsup', 'Family Educational Support', ['yes', 'no'], ['Yes', 'No']),
    'paid': CmbxSettings('paid', 'Extra Paid Classes within the Course Subject', ['yes', 'no'], ['Yes', 'No']),
    'activities': CmbxSettings('activities', 'Extra Curricular Activities', ['yes', 'no'], ['Yes', 'No']),
    'nursery': CmbxSettings('nursery', 'Attended Preschool', ['yes', 'no'], ['Yes', 'No']),
    'higher': CmbxSettings('higher', 'Wants to Take Higher Education', ['yes', 'no'], ['Yes', 'No']),
    'internet': CmbxSettings('internet', 'Internet Access at Home', ['yes', 'no'], ['Yes', 'No']),
    'romantic': CmbxSettings('romantic', 'In a Romantic Relationship', ['yes', 'no'], ['Yes', 'No']),
    'famrel': CmbxSettings('famrel', 'Quality of Family Relationships', [i for i in range(1, 6, 1)], ['Very Bad', 'Bad', 'Neutral', 'Good', 'Very Good']),
    'freetime': CmbxSettings('freetime', 'Free Time after School', [i for i in range(1, 6, 1)], ['Very Low', 'Low', 'Neutral', 'High', 'Very High']),
    'goout': CmbxSettings('goout', 'Going Out with Friends', [i for i in range(1, 6, 1)], ['Very Low', 'Low', 'Neutral', 'High', 'Very High']),
    'Dalc': CmbxSettings('Dalc', 'Workday Alcohol Consumption', [i for i in range(1, 6, 1)], ['Very Low', 'Low', 'Neutral', 'High', 'Very High']),
    'Walc': CmbxSettings('Walc', 'Weekend Alcohol Consumption', [i for i in range(1, 6, 1)], ['Very Low', 'Low', 'Neutral', 'High', 'Very High']),
    'health': CmbxSettings('health', 'Current Health Status', [i for i in range(1, 6, 1)], ['Very Bad', 'Bad', 'Neutral', 'Good', 'Very Good']),
    'absences': CmbxSettings('absences', 'Number of School Absences', [str(i) for i in range(0, 16, 1)] + ['>15'], [str(i) for i in range(0, 16, 1)] + ['>15']),
    'G2': CmbxSettings('G2', 'Second Period Grade', ['<7'] + [f'{i}' for i in range(7, 18, 1)] + ['>17'], ['<7'] + [f'{i}' for i in range(7, 18, 1)] + ['>17'])
}

In [11]:
class App(tk.Tk):
    PAD_X = 32
    PAD_Y = 8
    X_MAX = 1200
    Y_MAX = 800
    WRAP_LENGTH = X_MAX - (PAD_X * 2)
    
    def __init__(self):
        super().__init__()
        
        self._original_data: pd.DataFrame | None = None
        
        self._path: str = ""
        self._csv_data: pd.DataFrame | None = None
        self._model: BayesianNetwork | None = None
        
        self.build_ui()
        self.create_model()
    
    def build_ui(self) -> None:
        """Builds the UI for the application"""
        self.title("Student Grade Prediction")
        self.geometry(f"{self.X_MAX}x{self.Y_MAX}")
        self.resizable(False, False)
        
        self.header = ttk.Label(self, text="Student Grade predictor", font=("Arial", 16, "bold"))
        self.header.pack(padx=self.PAD_X, pady=self.PAD_Y)
        self.dsc_1 = ttk.Label(self, text=desc_1, wraplength=self.WRAP_LENGTH)
        self.dsc_1.pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
        # Open CSV file
        self.open_csv_btn = ttk.Button(self, text="Load Students CSV file", command=self.select_csv)
        self.open_csv_btn.pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
        self.reset_path_btn = ttk.Button(self, text="Reset Path", command=self.reset_path, state="disabled")
        self.reset_path_btn.pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
        
        # Path label
        self.path_lbl = ttk.Label(self, text=f"Path: {self._path if self._path else 'Not selected'}", wraplength=512)
        self.path_lbl.pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
        # Divider
        ttk.Separator(self, orient="horizontal").pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
        # Form
        self.build_form()
        
        # Predict button
        self.predict_btn = ttk.Button(self, text="Predict", command=self.predict)
        self.predict_btn.pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
        # Divider
        ttk.Separator(self, orient="horizontal").pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
        # Result
        self.result_lbl = ttk.Label(self, text="", font=("Arial", 16, "bold"))
        self.result_lbl.pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
    def build_form(self) -> None:
        """Build the form for the application"""
        self.form = ttk.Frame(self)
        left_container = ttk.Frame(self.form)
        left_container.columnconfigure(0, weight=1)
        left_container.columnconfigure(1, weight=4)

        self.left_builder = CmbxBuilder(left_container_settings, left_container)
        self.left_builder.build()
        
        left_container.grid(padx=8, pady=8, row=0, column=0, sticky="w")
        
        ###### Right container ######
        right_container = ttk.Frame(self.form)
        right_container.columnconfigure(0, weight=1)
        right_container.columnconfigure(1, weight=4)
        
        self.right_builder = CmbxBuilder(right_container_settings, right_container)
        self.right_builder.build()
                
        right_container.grid(padx=8, pady=8, row=0, column=1, sticky="e")
        self.form.pack(padx=self.PAD_X, pady=self.PAD_Y, fill="x")
        
    def create_model(self) -> None:
        """Creates the bayesian model and fits the data to it"""
        create_model: Model = Model(TRAIN_PATH, NUMBER_CORES)
        create_model.create()
        create_model.fit()
        
        # Get networks
        self._model = create_model.get()
    
    def reset_path(self):
        """Reset the path of the CSV file"""
        self.set_states("readonly")
        self._path = ""
        self.reset_path_btn["state"] = "disabled"
        self.result_lbl["text"] = ""
    
    def select_csv(self):
        """Select a CSV file to load"""
        accepted_formats = [("CSV files", "*.csv"), ("All files", "*.*")]
        self._path = fd.askopenfilename(title="Select CSV file", filetypes=accepted_formats, initialdir=os.pardir + "/data")
        if self._path == "":
            return
        self._original_data = pd.read_csv(self._path, sep=DELIMITER)
        preprocessing = TunedPreprocessing(self._original_data)
        preprocessing.process()
        self._csv_data = preprocessing.processed_data
        preprocessing = BoundOutPutPreprocessing(self._csv_data)
        preprocessing.process()
        self._csv_data = preprocessing.processed_data
        self.path_lbl["text"] = f"Path: {self._path}"
        self.reset_path_btn["state"] = "normal"
        self.set_states("disabled")
        
    def set_states(self, state: str = "disabled"):
        """Set the states of the comboboxes"""
        self.left_builder.set_states(state)
        self.right_builder.set_states(state)
        
    def _save_predictions_csv(self, predictions: list[int]):
        """Save the predictions to a CSV file

        Args:
            predictions (list[int]): The predictions to save
        """
        if self._path is None:
            print("No CSV file selected")
            return
        df = pd.DataFrame(predictions, columns=["Predicted Grade (G3)"])
        df = pd.concat([self._original_data, df], axis=1, join="inner")
        df.to_csv(self._path[:-4] + "-predicted.csv", index=False, sep=DELIMITER)
        
    def predict(self):
        """Predict the grades of the students"""
        if self._path:
            if 'G3' in self._csv_data.columns:
                print("G3 column found. No need to predict.")
                return
            
            predictions = self._model.predict(self._csv_data, stochastic=False, n_jobs=NUMBER_CORES)
            self._save_predictions_csv(predictions)
            self.result_lbl["text"] = f"Predictions saved to {self._path[:-4]}-predicted.csv"
        else:
            data = {**self.left_builder.get_values(), **self.right_builder.get_values()}
            
            delete_keys = []
            for key in data.keys():
                if data[key] is None:
                    delete_keys.append(key)
            
            for key in delete_keys:
                del data[key]
            
            if len(data.keys()) == 0:
                print("No data to predict")
                return
            
            df = pd.DataFrame(data)
            pre = TunedPreprocessing(df)
            pre.process()
            df = pre.processed_data
            pre = BoundOutPutPreprocessing(df)
            pre.process()
            df = pre.processed_data
            prediction = self.fast_prediction(self._model, df)
            self.result_lbl["text"] = f"Predicted grade (G3): {prediction.loc[0, 'G3']}"
    
    def map_to_grade(self, grade_int: int) -> str:
        """Map the integer to a grade as a string

        Args:
            grade_int (int): The integer to map

        Returns:
            str: The mapped grade
        """
        if grade_int == 0:
            return '<7'
        elif grade_int == 12:
            return '>17'
        else:
            return str(grade_int + 6)

    def fast_prediction(self, model: BayesianNetwork, data: pd.DataFrame)->pd.DataFrame:
        """Fast method to predict the grades of the students

        Args:
            model (BayesianNetwork): The model to use
            data (pd.dataFrame): The data to predict

        Returns:
            pd.DataFrame: The predictions
        """
        pred = {'G3': [], 'likelihood': []}
        length = len(data.index)
        
        for row in data.iterrows():
            print(f"Predicting {row[0]}/{length}", end='\r')
            elimination = VariableElimination(model)
            estimation = elimination.query(variables=['G3'], evidence={k: v for k, v in row[1].items() if k != 'G3'}, joint=True)
            prediction = estimation.values
            pred['G3'].append(self.map_to_grade(prediction.argmax()))
            pred['likelihood'].append(prediction.max())
        pred_df = pd.DataFrame(pred)
        return pred_df
    
    def map_cmbox_text_values(self, text: str, settings: dict) -> Any:
        """Map the text of the combobox to the values

        Args:
            text (str): The text to map
            settings (dict): The settings to use

        Returns:
            Any: The mapped value
        """
        if not text or text not in settings["text"] or text == " ":
            return None
        return settings["map_values"][settings["text"].index(text)]

## Start App

In [12]:
if __name__ == "__main__":
    app = App()
    app.mainloop()