In [None]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

In [None]:
# Load the dataset
california = fetch_california_housing()

In [None]:
# Create a DataFrame
df = pd.DataFrame(california.data, columns=california.feature_names)

# Add the target variable
df['MedHouseVal'] = california.target

# Display the first few rows
print(df.head())


   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  MedHouseVal  
0    -122.23        4.526  
1    -122.22        3.585  
2    -122.24        3.521  
3    -122.25        3.413  
4    -122.25        3.422  


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the data into features and target
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Define a common tolerance
tolerance = 0.10

# Calculate and print metrics for each model
def print_metrics(model_name, y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    accuracy = calculate_accuracy(y_true, y_pred, tolerance)
    print(f'{model_name} MSE: {mse}')
    print(f'{model_name} R^2: {r2}')
    print(f'{model_name} MAE: {mae}')
    print(f'{model_name} RMSE: {rmse}')
    print(f'{model_name} Accuracy (within {tolerance*100}% tolerance): {accuracy:.2f}%')
    print("-" * 30)  # Separator between models

In [None]:
# Initialize and train the model
lr = LinearRegression()
lr.fit(X_train, y_train)
# Evaluate the model
print(f'Linear Regression MSE: {mean_squared_error(y_test, y_pred_lr)}')
print(f'Linear Regression R^2: {r2_score(y_test, y_pred_lr)}')
print(f'Linear Regression MAE: {mae}')
print(f'Linear Regression RMSE: {rmse}')
print(f'Linear Regression Accuracy (within {tolerance*100}% tolerance): {accuracy:.2f}%')


Linear Regression MSE: 0.5558915986952442
Linear Regression R^2: 0.575787706032451
Linear Regression MAE: 0.4461535271317829
Linear Regression RMSE: 0.6575877238850522
Linear Regression Accuracy (within 10.0% tolerance): 30.04%


In [None]:
ridge = Ridge()
ridge.fit(X_train, y_train)



In [None]:
y_pred_ridge = ridge.predict(X_test)

In [None]:
lasso = Lasso()
lasso.fit(X_train, y_train)

In [None]:
y_pred_lasso = lasso.predict(X_test)

In [None]:
tree = DecisionTreeRegressor()
tree.fit(X_train, y_train)


In [None]:
y_pred_tree = tree.predict(X_test)


In [None]:
forest = RandomForestRegressor()
forest.fit(X_train, y_train)

In [None]:
y_pred_forest = forest.predict(X_test)

In [None]:
svr = SVR()
svr.fit(X_train, y_train)

In [None]:
y_pred_svr = svr.predict(X_test)

In [None]:
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)

In [None]:
y_pred_gbr = gbr.predict(X_test)

In [None]:
# Initialize and train the KNN regressor
knn = KNeighborsRegressor(n_neighbors=5)  # n_neighbors=5 is the default
knn.fit(X_train, y_train)

In [None]:
# Predict on the test set
y_pred_knn = knn.predict(X_test)

In [None]:

# Initialize and train the AdaBoost regressor with a Decision Tree as the base estimator
ada = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50)  # 50 estimators is the default
ada.fit(X_train, y_train)




In [None]:
# Predict on the test set
y_pred_ada = ada.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Define a tolerance level for "Accuracy" (e.g., 10%)
tolerance = 0.10

# Function to evaluate and print metrics for a regression model
def evaluate_model(model_name, y_test, y_pred, tolerance):
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    accuracy = np.mean(np.abs((y_test - y_pred) / y_test) < tolerance) * 100

    print(f'{model_name} MSE: {mse}')
    print(f'{model_name} R^2: {r2}')
    print(f'{model_name} MAE: {mae}')
    print(f'{model_name} RMSE: {rmse}')
    print(f'{model_name} Accuracy (within {tolerance*100}% tolerance): {accuracy:.2f}%')
    print('--------------------------------------')

# Assume these are the predictions from each model (Replace with actual predictions)
y_pred_lr = lr.predict(X_test)         # Linear Regression
y_pred_ridge = ridge.predict(X_test)   # Ridge Regression
y_pred_lasso = lasso.predict(X_test)   # Lasso Regression
y_pred_tree = tree.predict(X_test)     # Decision Tree Regression
y_pred_forest = forest.predict(X_test) # Random Forest Regression
y_pred_svr = svr.predict(X_test)       # Support Vector Regression
y_pred_gbr = gbr.predict(X_test)       # Gradient Boosting Regression
y_pred_knn = knn.predict(X_test)       # KNN Regression
y_pred_ada = ada.predict(X_test)       # AdaBoost Regression

# Evaluate and print metrics for each model
evaluate_model('Linear Regression', y_test, y_pred_lr, tolerance)
evaluate_model('Ridge Regression', y_test, y_pred_ridge, tolerance)
evaluate_model('Lasso Regression', y_test, y_pred_lasso, tolerance)
evaluate_model('Decision Tree Regression', y_test, y_pred_tree, tolerance)
evaluate_model('Random Forest Regression', y_test, y_pred_forest, tolerance)
evaluate_model('Support Vector Regression', y_test, y_pred_svr, tolerance)
evaluate_model('Gradient Boosting Regression', y_test, y_pred_gbr, tolerance)
evaluate_model('KNN Regression', y_test, y_pred_knn, tolerance)
evaluate_model('AdaBoost Regression', y_test, y_pred_ada, tolerance)



Linear Regression MSE: 0.5558915986952442
Linear Regression R^2: 0.575787706032451
Linear Regression MAE: 0.5332001304956565
Linear Regression RMSE: 0.7455813830127763
Linear Regression Accuracy (within 10.0% tolerance): 23.64%
--------------------------------------
Ridge Regression MSE: 0.5558548589435971
Ridge Regression R^2: 0.5758157428913684
Ridge Regression MAE: 0.5331931195789733
Ridge Regression RMSE: 0.7455567442814779
Ridge Regression Accuracy (within 10.0% tolerance): 23.64%
--------------------------------------
Lasso Regression MSE: 1.3106960720039365
Lasso Regression R^2: -0.00021908714592466794
Lasso Regression MAE: 0.9060685490007149
Lasso Regression RMSE: 1.1448563543099792
Lasso Regression Accuracy (within 10.0% tolerance): 12.96%
--------------------------------------
Decision Tree Regression MSE: 0.49896955015421507
Decision Tree Regression R^2: 0.6192260901447466
Decision Tree Regression MAE: 0.45581096414728683
Decision Tree Regression RMSE: 0.7063777673130823
Dec

In [None]:
tolerance = 0.10
def evaluate_model(model_name, y_test, y_pred, tolerance):
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    accuracy = np.mean(np.abs((y_test - y_pred) / y_test) < tolerance) * 100

    print(f'{model_name} MSE: {mse}')
    print(f'{model_name} R^2: {r2}')
    print(f'{model_name} MAE: {mae}')
    print(f'{model_name} RMSE: {rmse}')
    print(f'{model_name} Accuracy (within {tolerance*100}% tolerance): {accuracy:.2f}%')
    print('--------------------------------------')

    return mse, r2, mae, rmse, accuracy # Return the metrics

# List of model names and predictions (Replace with actual predictions)
model_names = ['Linear Regression', 'Ridge Regression', 'Lasso Regression',
                'Decision Tree Regression', 'Random Forest Regression', 'Support Vector Regression',
               'Gradient Boosting Regression', 'KNN Regression', 'AdaBoost Regression']

# List of predictions from each model (Replace with actual predictions)
predictions = [y_pred_lr, y_pred_ridge, y_pred_lasso,
               y_pred_tree, y_pred_forest, y_pred_svr, y_pred_gbr,
               y_pred_knn, y_pred_ada]

# Initialize an empty list to store results
results = []

# Loop through each model and evaluate it
for model_name, y_pred in zip(model_names, predictions):
    mse, r2, mae, rmse, accuracy = evaluate_model(model_name, y_test, y_pred, tolerance)
    results.append({
        'Model': model_name,
        'MSE': mse,
        'R^2': r2,
        'MAE': mae,
        'RMSE': rmse,
        'Accuracy (%)': accuracy
    })

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Display the DataFrame
print(results_df)


Linear Regression MSE: 0.5558915986952442
Linear Regression R^2: 0.575787706032451
Linear Regression MAE: 0.5332001304956565
Linear Regression RMSE: 0.7455813830127763
Linear Regression Accuracy (within 10.0% tolerance): 23.64%
--------------------------------------
Ridge Regression MSE: 0.5558548589435971
Ridge Regression R^2: 0.5758157428913684
Ridge Regression MAE: 0.5331931195789733
Ridge Regression RMSE: 0.7455567442814779
Ridge Regression Accuracy (within 10.0% tolerance): 23.64%
--------------------------------------
Lasso Regression MSE: 1.3106960720039365
Lasso Regression R^2: -0.00021908714592466794
Lasso Regression MAE: 0.9060685490007149
Lasso Regression RMSE: 1.1448563543099792
Lasso Regression Accuracy (within 10.0% tolerance): 12.96%
--------------------------------------
Decision Tree Regression MSE: 0.49896955015421507
Decision Tree Regression R^2: 0.6192260901447466
Decision Tree Regression MAE: 0.45581096414728683
Decision Tree Regression RMSE: 0.7063777673130823
Dec