In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline


def load_data():

    data = pd.read_csv("ResearchInformation3.csv")
    
    X = data[["Preparation", "Computer", "Gaming", "HSC", "SSC", "Attendance", "Job", "English", "Extra", "Last" ]].copy()
    y = data["Overall"]
    
    prep_map = {
        "More than 3 Hours" : 4.0,
        "0-1 Hours" : 0.5,
        "2-3 Hours" : 2.5
    }
    gaming_map = {
        "0-1 Hour" : 0.5,
        "More than 3 Hours" : 4,
        "2-3 Hours" : 2.5
    }
    job_map = {
        "Yes" : 1,
        "No" : 0
    }
    extra_map = {
        "Yes" : 1,
        "No" : 0
    }
    att_map = {
        "80%-100%" : 90,
        "Below 40%" : 30,
        "60%-79%" : 70,
        "40%-59%" : 50
    }
    
    X["Preparation"] =  X["Preparation"].map(prep_map)
    X["Gaming"] = X["Gaming"].map(gaming_map)
    X["Attendance"] = X["Attendance"].map(att_map)
    X["Job"] = X["Job"].map(job_map)
    X["Extra"] = X["Extra"].map(extra_map)
    
    return X, y


def pre_process(X, y, test_size=0.2, random_state = 42):
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

def build_pipeline():
    pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="mean")),
        ("model", LinearRegression())
    ])
    
    return pipe

def evaluate(model, X_test, y_test):
    predicted = model.predict(X_test)   
    MAE = mean_absolute_error(y_test, predicted)
    MSE = mean_squared_error(y_test, predicted)
    RMAE = np.sqrt(MSE)
    R2 = r2_score(y_test, predicted)
    
    print(f"MAE: {MAE}")
    print(f"MSE: {MSE}")
    print(f"RMAE: {RMAE}")
    print(f"R2: {R2}")


if __name__ == "__main__":
    
    X, y = load_data()
    X_train, X_test, y_train, y_test = pre_process(X, y)
    
    pipe = build_pipeline()
    pipe.fit(X_train,y_train)
    evaluate(pipe, X_test, y_test)
    
    

MAE: 0.14478198670508077
MSE: 0.040523021195654985
RMAE: 0.20130330646975222
R2: 0.8914541344665041
