In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = pd.read_csv("Student_Performance.csv")

X = data[['Hours Studied', 'Previous Scores', 'Sleep Hours']]
y = data['Performance Index']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a ColumnTransformer to handle preprocessing for different types of features
numeric_features = ['Hours Studied', 'Previous Scores', 'Sleep Hours']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features)
    ])

# Define linear regression model
Degree = 2
polynomial_features = PolynomialFeatures(Degree)
model = LinearRegression()

# Create and fit a pipeline 
pipelines = Pipeline([
    ('preprocessor', preprocessor),
    ('polynomial_features', polynomial_features),
    ('model', model)
])
pipelines.fit(X_train, y_train)
y_pred = pipelines.predict(X_test)

# Evaluate the performance of the regression model using MAE, MSE, R^2:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Store the model and performance metrics in a dictionary
model_and_metrics = {
    'model': pipelines,
    'mae': mae,
    'mse': mse,
    'r2': r2
}

# Save the dictionary containing the model and metrics to a file
import joblib
model_filename = "assignment04_model.pkl"
joblib.dump(model_and_metrics, model_filename)

['assignment04_model.pkl']