Title: Understanding Regression Metrics

Task 1: Calculate MAE and MSE on test predictions and compare errors.

In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np

# Example synthetic data
X = np.array([[1], [2], [3], [4], [5], [6]])
y = np.array([3, 4, 2, 5, 6, 7])

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Train a simple model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Calculate errors
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae:.3f}")
print(f"Mean Squared Error (MSE): {mse:.3f}")


Mean Absolute Error (MAE): 3.300
Mean Squared Error (MSE): 10.980


Task 2: Evaluate R2 Score on varying datasets and discuss significance.

In [5]:
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np

# Generate two example datasets with different noise levels
np.random.seed(42)
X = np.linspace(0, 10, 100).reshape(-1, 1)

# Dataset 1: Low noise
y1 = 2 * X.flatten() + 1 + np.random.normal(0, 1, 100)

# Dataset 2: High noise
y2 = 2 * X.flatten() + 1 + np.random.normal(0, 5, 100)

def evaluate_r2(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

r2_dataset1 = evaluate_r2(X, y1)
r2_dataset2 = evaluate_r2(X, y2)

print(f"R² Score (Low noise dataset): {r2_dataset1:.3f}")
print(f"R² Score (High noise dataset): {r2_dataset2:.3f}")


R² Score (Low noise dataset): 0.984
R² Score (High noise dataset): 0.620


Task 3: Use a sample dataset, compute all three metrics, and deduce model performance.

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Load a real dataset
data = load_diabetes()
X = data.data
y = data.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Compute metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print results
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.3f}")
