In [1]:
import numpy as np
import matplotlib. pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn . preprocessing import PolynomialFeatures
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = pd.read_csv("Student_Performance.csv")
data.head(10)

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0
5,3,78,No,9,6,61.0
6,7,73,Yes,5,6,63.0
7,8,45,Yes,4,6,42.0
8,5,77,No,8,2,61.0
9,4,89,No,4,0,69.0


In [2]:
X = data[['Hours Studied', 'Previous Scores','Sleep Hours']]
y = data['Performance Index']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a ColumnTransformer to handle preprocessing for different types of features
numeric_features = ['Hours Studied', 'Previous Scores', 'Sleep Hours']
# categorical_features = ['Extracurricular Activities']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features)
        # ,('cat', OneHotEncoder(), categorical_features)
    ])

# Define linear regression model
Degree = 2
polynomial_features = PolynomialFeatures(Degree)  # You can adjust the degree as needed
model = LinearRegression()


# Create and fit a pipeline 
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('polynomial_features', polynomial_features),
    ('model', model)
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

# Evaluate the performance of regression model using MAE, MSE, R^2:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared (R^2): {r2}')

Mean Absolute Error: 1.7041350163621223
Mean Squared Error: 4.54387525936063
R-squared (R^2): 0.9877386460945705


In [3]:
import joblib

# Save the trained model to a file
model_filename = "assignment04_model.pkl"
joblib.dump(pipeline, model_filename)

['assignment04_model.pkl']