In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score


csv_file = "C:/Users/predator/Downloads/USA_Housing.csv"
df = pd.read_csv(csv_file)


df.head()


In [None]:



X = df.drop(columns='Price')  
y = df['Price']  


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.44, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.68, random_state=42)


learning_rates = [0.001, 0.01, 0.1, 1]
iterations = 1000


def gradient_descent(X, y, learning_rate, iterations):
    m = X.shape[0]  
    n = X.shape[1]  
    beta = np.zeros(n) 
    cost_history = []

    for i in range(iterations):
        
        y_pred = X.dot(beta)
        
        
        error = y_pred - y
        
        
        gradient = (1/m) * X.T.dot(error)
        
        
        beta = beta - learning_rate * gradient

        
        cost = (1/(2*m)) * np.sum(error ** 2)
        cost_history.append(cost)
    
    return beta, cost_history


results = {}

for lr in learning_rates:

    beta, cost_history = gradient_descent(X_train, y_train, lr, iterations)
    

    y_val_pred = X_val.dot(beta)
    y_test_pred = X_test.dot(beta)
    

    r2_val = r2_score(y_val, y_val_pred)
    r2_test = r2_score(y_test, y_test_pred)
    

    results[lr] = {
        'beta': beta,
        'r2_val': r2_val,
        'r2_test': r2_test
    }


for lr, res in results.items():
    print(f"Learning Rate: {lr}")
    print(f"R² on Validation Set: {res['r2_val']}")
    print(f"R² on Test Set: {res['r2_test']}")
    print(f"Beta Coefficients: {res['beta']}")
    print('-'*40)


best_lr = max(results, key=lambda lr: results[lr]['r2_val'])
best_beta = results[best_lr]['beta']

print(f"Best Learning Rate: {best_lr}")
print(f"Best Beta Coefficients: {best_beta}")
