In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score


csv_file = "C:/Users/predator/Downloads/USA_Housing.csv"
df = pd.read_csv(csv_file)


df.head()


Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,79545.45857,5.682861,7.009188,4.09,23086.8005,1059034.0
1,79248.64245,6.0029,6.730821,3.09,40173.07217,1505891.0
2,61287.06718,5.86589,8.512727,5.13,36882.1594,1058988.0
3,63345.24005,7.188236,5.586729,3.26,34310.24283,1260617.0
4,59982.19723,5.040555,7.839388,4.23,26354.10947,630943.5


In [2]:



X = df.drop(columns='Price')  
y = df['Price']  


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.44, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.68, random_state=42)


learning_rates = [0.001, 0.01, 0.1, 1]
iterations = 1000


def gradient_descent(X, y, learning_rate, iterations):
    m = X.shape[0]  
    n = X.shape[1]  
    beta = np.zeros(n) 
    cost_history = []

    for i in range(iterations):
        
        y_pred = X.dot(beta)
        
        
        error = y_pred - y
        
        
        gradient = (1/m) * X.T.dot(error)
        
        
        beta = beta - learning_rate * gradient

        
        cost = (1/(2*m)) * np.sum(error ** 2)
        cost_history.append(cost)
    
    return beta, cost_history


results = {}

for lr in learning_rates:

    beta, cost_history = gradient_descent(X_train, y_train, lr, iterations)
    

    y_val_pred = X_val.dot(beta)
    y_test_pred = X_test.dot(beta)
    

    r2_val = r2_score(y_val, y_val_pred)
    r2_test = r2_score(y_test, y_test_pred)
    

    results[lr] = {
        'beta': beta,
        'r2_val': r2_val,
        'r2_test': r2_test
    }


for lr, res in results.items():
    print(f"Learning Rate: {lr}")
    print(f"R² on Validation Set: {res['r2_val']}")
    print(f"R² on Test Set: {res['r2_test']}")
    print(f"Beta Coefficients: {res['beta']}")
    print('-'*40)


best_lr = max(results, key=lambda lr: results[lr]['r2_val'])
best_beta = results[best_lr]['beta']

print(f"Best Learning Rate: {best_lr}")
print(f"Best Beta Coefficients: {best_beta}")


Learning Rate: 0.001
R² on Validation Set: -12.130170603815042
R² on Test Set: -11.939483130222705
Beta Coefficients: [134601.31121822 106469.49612607  57366.26933387  21000.64655586
  85368.90280971]
----------------------------------------
Learning Rate: 0.01
R² on Validation Set: -11.805959841246487
R² on Test Set: -11.793587277875929
Beta Coefficients: [213116.338435   168085.28260102  96447.21089248   2217.37778004
 136600.45515858]
----------------------------------------
Learning Rate: 0.1
R² on Validation Set: -11.805747484790814
R² on Test Set: -11.793491787299299
Beta Coefficients: [213136.53934291 168106.98501703  96703.42031391   1952.55683139
 136600.18554736]
----------------------------------------
Learning Rate: 1
R² on Validation Set: -11.805747484790816
R² on Test Set: -11.793491787299299
Beta Coefficients: [213136.53934291 168106.98501703  96703.42031391   1952.55683139
 136600.18554736]
----------------------------------------
Best Learning Rate: 0.1
Best Beta Coeff