In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score


In [13]:
f = pd.read_csv('USA_Housing.csv')

X = df.drop('Price', axis=1).values  
y = df['Price'].values  


In [15]:
scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)


In [17]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)


In [19]:
best_r2_score = -np.inf
best_beta_matrix = None

r2_scores = []

for train_index, test_index in kf.split(X_scaled):
    
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    X_train_b = np.c_[np.ones((X_train.shape[0], 1)), X_train]  
    beta_matrix = np.linalg.inv(X_train_b.T @ X_train_b) @ X_train_b.T @ y_train
    
    X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test] 
    y_pred = X_test_b @ beta_matrix
    
    r2 = r2_score(y_test, y_pred)
    r2_scores.append(r2)
    
    
    if r2 > best_r2_score:
        best_r2_score = r2
        best_beta_matrix = beta_matrix

    print(f"Iteration R2 Score: {r2}")

print(f"Best R2 Score: {best_r2_score}")
print(f"Best Beta Matrix: \n{best_beta_matrix}")


Iteration R2 Score: 0.9179971706985147
Iteration R2 Score: 0.9145677884802819
Iteration R2 Score: 0.9116116385364479
Iteration R2 Score: 0.9193091764960817
Iteration R2 Score: 0.9243869413350317
Best R2 Score: 0.9243869413350317
Best Beta Matrix: 
[1.23161736e+06 2.30225051e+05 1.63956839e+05 1.21115120e+05
 7.83467170e+02 1.50662447e+05]


In [21]:
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_train_full_b = np.c_[np.ones((X_train_full.shape[0], 1)), X_train_full]
X_test_full_b = np.c_[np.ones((X_test_full.shape[0], 1)), X_test_full]
y_test_pred_full = X_test_full_b @ best_beta_matrix
final_r2_score = r2_score(y_test_full, y_test_pred_full)
print(f"Final R2 Score on the 30% Test Data: {final_r2_score}")


Final R2 Score on the 30% Test Data: 0.9147458156636434


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score


In [3]:
df = pd.read_csv('USA_Housing.csv')

X = df.drop('Price', axis=1).values  
y = df['Price'].values  


In [5]:

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.44, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.68, random_state=42) 


In [7]:

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


In [9]:
def gradient_descent(X, y, learning_rate, iterations):
    m, n = X.shape
    beta = np.zeros(n)
    for _ in range(iterations):
        y_pred = X @ beta
        gradient = (1 / m) * X.T @ (y_pred - y)
        beta -= learning_rate * gradient
    return beta


In [11]:
learning_rates = [0.001, 0.01, 0.1, 1]
iterations = 1000
best_r2_val = -np.inf  
best_beta = None 

X_train_scaled_b = np.c_[np.ones((X_train_scaled.shape[0], 1)), X_train_scaled]
X_val_scaled_b = np.c_[np.ones((X_val_scaled.shape[0], 1)), X_val_scaled]
X_test_scaled_b = np.c_[np.ones((X_test_scaled.shape[0], 1)), X_test_scaled]

for lr in learning_rates:
    print(f"Training with learning rate: {lr}")
    
    beta = gradient_descent(X_train_scaled_b, y_train, learning_rate=lr, iterations=iterations)
    
    y_val_pred = X_val_scaled_b @ beta
    
    r2_val = r2_score(y_val, y_val_pred)
    
    y_test_pred = X_test_scaled_b @ beta
   
    r2_test = r2_score(y_test, y_test_pred)
    
    print(f"Learning Rate: {lr}, R² Validation: {r2_val}, R² Test: {r2_test}")
    
    if r2_val > best_r2_val:
        best_r2_val = r2_val
        best_beta = beta
        best_learning_rate = lr


Training with learning rate: 0.001
Learning Rate: 0.001, R² Validation: -0.9915340004162254, R² Test: -0.9243736786923582
Training with learning rate: 0.01
Learning Rate: 0.01, R² Validation: 0.9202047028645081, R² Test: 0.9133296021779286
Training with learning rate: 0.1
Learning Rate: 0.1, R² Validation: 0.9202207766800662, R² Test: 0.9133419747998835
Training with learning rate: 1
Learning Rate: 1, R² Validation: 0.9202207766800662, R² Test: 0.9133419747998835


In [13]:
y_test_pred_best = X_test_scaled_b @ best_beta

final_r2_test = r2_score(y_test, y_test_pred_best)

print(f"Best Learning Rate: {best_learning_rate}")
print(f"Final R² Score on the Test Set: {final_r2_test}")
print(f"Best Beta Coefficients: \n{best_beta}")


Best Learning Rate: 0.1
Final R² Score on the Test Set: 0.9133419747998835
Best Beta Coefficients: 
[1225106.34781021  231827.54854547  166006.22902472  120763.07797071
    2922.26769971  152609.02782229]
