In [1]:
import pandas as pd
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

#### Fetch the data

In [2]:
# Fetch the dataset
student_performance = fetch_ucirepo(id=320)

# Headers for the features
headers = student_performance.data['headers']

# Convert features to DataFrame
df_features = pd.DataFrame(student_performance.data['features'], columns=headers[:-3])  # Exclude target columns
df_targets = pd.DataFrame(student_performance.data['targets'], columns=headers[-3:])  # Only target columns

# Use G3 as the target
df_features['target'] = df_targets["G3"]

In [3]:
df_features.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,target
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,no,no,4,3,4,1,1,3,4,11
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,yes,no,5,3,3,1,1,3,2,11
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,yes,no,4,3,2,2,3,3,6,12
3,GP,F,15,U,GT3,T,4,2,health,services,...,yes,yes,3,2,2,1,1,5,0,14
4,GP,F,16,U,GT3,T,3,3,other,other,...,no,no,4,3,2,1,2,5,0,13


#### Split feature and target 

In [4]:
# Split features and target
X = df_features.drop('target', axis=1)
y = df_features['target']

# Select only numerical features
X = X.select_dtypes(exclude=['object'])

#### Standardrized the data

In [5]:
# Standardize numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)

#### Split the data

In [6]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Initializing the metrics

In [7]:
mse_lr, r2_lr = 0, 0
mse_poly, r2_poly = 0, 0
mse_dt, r2_dt = 0, 0

#### Linear Regression model

In [8]:
# Linear Regression
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

#### Polynomial Regression model

In [9]:
# Polynomial Regression (degree=2)
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)

model_poly = LinearRegression()
model_poly.fit(X_train_poly, y_train)
y_pred_poly = model_poly.predict(X_test_poly)
mse_poly = mean_squared_error(y_test, y_pred_poly)
r2_poly = r2_score(y_test, y_pred_poly)

#### Decision Tree model

In [10]:
# Decision Tree Regression
model_dt = DecisionTreeRegressor(random_state=42)
model_dt.fit(X_train, y_train)
y_pred_dt = model_dt.predict(X_test)
mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

In [11]:
# Print the evaluation metrics
print(f"Linear Regression: Mean Squared Error: {mse_lr:.6f}, R-squared: {r2_lr:.6f}")
print(f"Polynomial Regression (degree=2): Mean Squared Error: {mse_poly:.6f}, R-squared: {r2_poly:.6f}")
print(f"Decision Tree Regression: Mean Squared Error: {mse_dt:.6f}, R-squared: {r2_dt:.6f}")

Linear Regression: Mean Squared Error: 8.046443, R-squared: 0.174869
Polynomial Regression (degree=2): Mean Squared Error: 10.021868, R-squared: -0.027703
Decision Tree Regression: Mean Squared Error: 17.300000, R-squared: -0.774047
