In [None]:
import pandas as pd
import numpy as np
# Dataset URL
data_url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.data"
# Column names
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
# Load the dataset
df = pd.read_csv(data_url, header=None, delim_whitespace=True, names=column_names)
print("First 5 rows of the dataset:")
print(df.head())
print("\nDataset Info:")
df.info()

First 5 rows of the dataset:
      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296.0   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242.0   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242.0   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222.0   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222.0   

   PTRATIO       B  LSTAT  MEDV  
0     15.3  396.90   4.98  24.0  
1     17.8  396.90   9.14  21.6  
2     17.8  392.83   4.03  34.7  
3     18.7  394.63   2.94  33.4  
4     18.7  396.90   5.33  36.2  

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CRIM     506 non-null    float64
 1   ZN       506 non-null    float64
 2   INDUS    506 non-null    float64
 3   CHAS     506 

  df = pd.read_csv(data_url, header=None, delim_whitespace=True, names=column_names)


In [None]:
# Define features (X) and target (y)
X = df.drop('MEDV', axis=1) # All columns except MEDV are features
y = df['MEDV']             # The 'MEDV' column is our target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (404, 13)
Shape of X_test: (102, 13)
Shape of y_train: (404,)
Shape of y_test: (102,)


In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso

# 1. Create and train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
print("Linear Regression model trained successfully.")
# 2. Create and train the Ridge Regression model
ridge_model = Ridge()
ridge_model.fit(X_train, y_train)
print("Ridge Regression model trained successfully.")
# 3. Create and train the Lasso Regression model
lasso_model = Lasso()
lasso_model.fit(X_train, y_train)
print("Lasso Regression model trained successfully.")

Linear Regression model trained successfully.
Ridge Regression model trained successfully.
Lasso Regression model trained successfully.


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Predictions and evaluation for Linear Regression
y_pred_lr = lr_model.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
rmse_lr = np.sqrt(mse_lr)
r2_lr = r2_score(y_test, y_pred_lr)
mae_lr = mean_absolute_error(y_test, y_pred_lr)
# Predictions and evaluation for Ridge Regression
y_pred_ridge = ridge_model.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
rmse_ridge = np.sqrt(mse_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
# Predictions and evaluation for Lasso Regression
y_pred_lasso = lasso_model.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
rmse_lasso = np.sqrt(mse_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)
mae_lasso = mean_absolute_error(y_test, y_pred_lasso)

In [None]:
results_data = {
    'Model': ['Linear Regression', 'Ridge Regression', 'Lasso Regression'],
    'MSE': [mse_lr, mse_ridge, mse_lasso],
    'RMSE': [rmse_lr, rmse_ridge, rmse_lasso],
    'R²': [r2_lr, r2_ridge, r2_lasso],
    'MAE': [mae_lr, mae_ridge, mae_lasso]
}

results_df = pd.DataFrame(results_data)
print("Model Performance Comparison:")
print(results_df.round(4))

Model Performance Comparison:
               Model      MSE    RMSE      R²     MAE
0  Linear Regression  24.2911  4.9286  0.6688  3.1891
1   Ridge Regression  24.4772  4.9474  0.6662  3.1329
2   Lasso Regression  24.4095  4.9406  0.6671  3.2535
