# Q6: Mean Squared Error and R² Evaluation for the Linear Regression Model

In [1]:
import numpy as np
import pandas as pd

# Create the training dataset
data = {
    'ID': [1, 2, 3, 4, 5, 6, 7, 8],
    'Age': [35, 28, 45, 31, 52, 29, 42, 33],
    'CreditScore': [720, 650, 750, 600, 780, 630, 710, 640],
    'Education': [16, 14, None, 12, 18, 14, 16, 12]
}

df = pd.DataFrame(data)

# Impute missing Education values using mean imputation
education_mean = df['Education'].mean()
df['Education'].fillna(education_mean, inplace=True)

print("Training Data (after imputation):")
print(df)

# Construct the design matrix X (including an intercept term)
X = np.column_stack((np.ones(len(df)), df['Age'].values, df['Education'].values))
y = df['CreditScore'].values.reshape(-1, 1)

# Compute the regression coefficients using the normal equation
beta = np.linalg.inv(X.T @ X) @ (X.T @ y)
print("Coefficients (beta):")
print(beta)

# Compute predictions for all training records
y_pred = X @ beta

# Calculate the Mean Squared Error (MSE)
mse = np.mean((y - y_pred) ** 2)
print(f"Mean Squared Error (MSE): {mse:.2f}")

# Calculate R² value
y_mean = np.mean(y)
sst = np.sum((y - y_mean) ** 2)  # Total Sum of Squares
ssr = np.sum((y - y_pred) ** 2)  # Residual Sum of Squares
r2 = 1 - ssr/sst
print(f"R² Value: {r2:.4f}")

Training Data (after imputation):
   ID  Age  CreditScore  Education
0   1   35          720  16.000000
1   2   28          650  14.000000
2   3   45          750  14.571429
3   4   31          600  12.000000
4   5   52          780  18.000000
5   6   29          630  14.000000
6   7   42          710  16.000000
7   8   33          640  12.000000
Coefficients (beta):
[[327.32340185]
 [  4.24933015]
 [ 13.79292002]]
Mean Squared Error (MSE): 370.77
R² Value: 0.8963


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Education'].fillna(education_mean, inplace=True)
