In [1]:
 ! pip install scikit-learn



In [2]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [7]:
# Load and preprocess the dataset
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

In [9]:
# check missing values-checking for missing values ensures data completeness
df.isnull().sum()

MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
target        0
dtype: int64

In [11]:
# standardization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('target', axis=1))
scaled_df = pd.DataFrame(scaled_features, columns=df.columns[:-1])
scaled_df['target'] = df['target']

In [15]:
# Split features and target variable
X = scaled_df.drop('target', axis=1)
y = scaled_df['target']

In [17]:
# Implement regression algorithms
# 1. Linear Regression
lr_model = LinearRegression()
lr_model.fit(scaled_df.drop('target', axis=1), scaled_df['target'])
print("Linear Regression Coefficients:", lr_model.coef_)
print("Intercept:", lr_model.intercept_)


Linear Regression Coefficients: [ 0.8296193   0.11875165 -0.26552688  0.30569623 -0.004503   -0.03932627
 -0.89988565 -0.870541  ]
Intercept: 2.0685581690891386


In [19]:
# Random Forest model
rf_model = RandomForestRegressor()
rf_model.fit(scaled_df.drop('target', axis=1), scaled_df['target'])
print("Random Forest Feature Importances:", rf_model.feature_importances_)

Random Forest Feature Importances: [0.51981449 0.05300094 0.04641399 0.03009936 0.03082312 0.13646702
 0.09194925 0.09143183]


In [20]:
# Decision Tree model
dt_model = DecisionTreeRegressor()
dt_model.fit(scaled_df.drop('target', axis=1), scaled_df['target'])
print("Feature Importances:", dt_model.feature_importances_)

Feature Importances: [0.52496246 0.05110571 0.05301587 0.0271709  0.03244387 0.13231149
 0.09384651 0.0851432 ]


In [21]:
# Gradient Boosting model
gb_model = GradientBoostingRegressor()
gb_model.fit(scaled_df.drop('target', axis=1), scaled_df['target'])
print("Gradient Boosting Feature Importances:", gb_model.feature_importances_)

Gradient Boosting Feature Importances: [0.6001048  0.03278221 0.01959819 0.00437646 0.00310677 0.12575372
 0.09430189 0.11997596]


In [22]:
# SVR model
svr_model = SVR()
svr_model.fit(scaled_df.drop('target', axis=1), scaled_df['target'])
predictions = svr_model.predict(scaled_df.drop('target', axis=1))
print("SVR Predictions:", predictions[:5])

SVR Predictions: [4.40193096 4.3042361  4.30026265 3.40263268 2.45377066]


In [33]:
# Function to evaluate models
def evaluate_model(model, X, y):
    predictions = model.predict(X)
    mse = mean_squared_error(y, predictions)
    mae = mean_absolute_error(y, predictions)
    r2 = r2_score(y, predictions)
    return mse, mae, r2

# Evaluate all models
mse_lr, mae_lr, r2_lr = evaluate_model(lr_model, X, y)
mse_dt, mae_dt, r2_dt = evaluate_model(dt_model, X, y)
mse_rf, mae_rf, r2_rf = evaluate_model(rf_model, X, y)
mse_gb, mae_gb, r2_gb = evaluate_model(gb_model, X, y)
mse_svr, mae_svr, r2_svr = evaluate_model(svr_model, X, y)

# Store evaluation results
results = {
    'Linear Regression': [mse_lr, mae_lr, r2_lr],
    'Decision Tree': [mse_dt, mae_dt, r2_dt],
    'Random Forest': [mse_rf, mae_rf, r2_rf],
    'Gradient Boosting': [mse_gb, mae_gb, r2_gb],
    'SVR': [mse_svr, mae_svr, r2_svr]
}

# Display the results
results_df = pd.DataFrame(results, index=['MSE', 'MAE', 'R2'])
print(results_df)



     Linear Regression  Decision Tree  Random Forest  Gradient Boosting  \
MSE           0.524321   1.063281e-31       0.034269           0.261884   
MAE           0.531164   4.449498e-17       0.119416           0.356249   
R2            0.606233   1.000000e+00       0.974263           0.803324   

          SVR  
MSE  0.332568  
MAE  0.381632  
R2   0.750240  
