Title: Understanding Regression Metrics

Task 1: Calculate MAE and MSE on test predictions and compare errors.

In [1]:
# Write your code here
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")


Mean Absolute Error (MAE): 0.08437614476432091
Mean Squared Error (MSE): 0.011344800318058875


Task 2: Evaluate R2 Score on varying datasets and discuss significance.

In [2]:
# Write your code here
from sklearn.metrics import r2_score

X1, y1 = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)
X2, y2 = make_regression(n_samples=100, n_features=5, noise=20, random_state=42)

X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, test_size=0.2, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2, random_state=42)

model1 = LinearRegression().fit(X_train1, y_train1)
model2 = LinearRegression().fit(X_train2, y_train2)

y_pred1 = model1.predict(X_test1)
y_pred2 = model2.predict(X_test2)

r2_1 = r2_score(y_test1, y_pred1)
r2_2 = r2_score(y_test2, y_pred2)

print(f"R² Score for Dataset 1: {r2_1}")
print(f"R² Score for Dataset 2: {r2_2}")


R² Score for Dataset 1: 0.9999994350808352
R² Score for Dataset 2: 0.9781176327564886


Task 3: Use a sample dataset, compute all three metrics, and deduce model performance.

In [4]:
# Write your code here
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

data = fetch_california_housing()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")

if r2 > 0.7:
    print("The model fits the data well, with a high R² score.")
elif r2 > 0.4:
    print("The model fits the data moderately, with a reasonable R² score.")
else:
    print("The model doesn't fit the data well, with a low R² score.")



Mean Absolute Error (MAE): 0.533200130495698
Mean Squared Error (MSE): 0.5558915986952422
R² Score: 0.5757877060324524
The model fits the data moderately, with a reasonable R² score.
