# Q5: Multiple Linear Regression for Predicting CreditScore

In [1]:
import numpy as np
import pandas as pd

# Create the training dataset
data = {
    'ID': [1, 2, 3, 4, 5, 6, 7, 8],
    'Age': [35, 28, 45, 31, 52, 29, 42, 33],
    'CreditScore': [720, 650, 750, 600, 780, 630, 710, 640],
    'Education': [16, 14, None, 12, 18, 14, 16, 12]
}

df = pd.DataFrame(data)

# Impute missing Education values using mean imputation
education_mean = df['Education'].mean()  
df['Education'].fillna(education_mean, inplace=True)

print("Training Data (after imputation):")
print(df)

# Construct the design matrix X with an intercept
X = np.column_stack((np.ones(len(df)), df['Age'].values, df['Education'].values))
y = df['CreditScore'].values.reshape(-1, 1)

# Compute the normal equation solution: beta = (X^T X)^{-1} X^T y
beta = np.linalg.inv(X.T @ X) @ (X.T @ y)

print("Coefficients (beta):")
print(beta)

# Interpretation:
# beta[0] is the intercept: predicted CreditScore when Age and Education are zero
# beta[1] is the coefficient for Age: expected change in CreditScore per unit increase in Age, holding Education constant
# beta[2] is the coefficient for Education: expected change in CreditScore per unit increase in Education, holding Age constant

Training Data (after imputation):
   ID  Age  CreditScore  Education
0   1   35          720  16.000000
1   2   28          650  14.000000
2   3   45          750  14.571429
3   4   31          600  12.000000
4   5   52          780  18.000000
5   6   29          630  14.000000
6   7   42          710  16.000000
7   8   33          640  12.000000
Coefficients (beta):
[[327.32340185]
 [  4.24933015]
 [ 13.79292002]]


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Education'].fillna(education_mean, inplace=True)
