In [46]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso,ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [47]:
# Load the dataset
boston_data = pd.read_csv('/Users/mahendra/Documents/Boston.csv')

In [48]:
# Check for missing values
missing_values = boston_data.isnull().sum()
print("Missing Values:\n", missing_values)

# Define a function to handle outliers using the IQR method
def handle_outliers(df, columns):
    for column in columns:
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        # Cap the outliers
        df[column] = df[column].apply(lambda x: lower_bound if x < lower_bound else x)
        df[column] = df[column].apply(lambda x: upper_bound if x > upper_bound else x)
        
    return df


Missing Values:
 crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
black      0
lstat      0
medv       0
dtype: int64


In [49]:
# Apply the outlier handling function to the dataset
columns_with_outliers = ['crim', 'zn', 'rm', 'black', 'lstat']
boston_data_cleaned = handle_outliers(boston_data.copy(), columns_with_outliers)

# Define the features (X) and target (y)
X = boston_data_cleaned.drop('crim', axis=1)
y = boston_data_cleaned['crim']


In [50]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features for SVM and MLP
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [51]:
# Train and evaluate Linear Regression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
y_pred = linear_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Linear Regression - MSE:", mse, "R2:", r2)

Linear Regression - MSE: 1.2897734324233345 R2: 0.8869846524837794


In [52]:
# Train and evaluate Ridge Regression
ridge_reg = Ridge(alpha=1.0)
ridge_reg.fit(X_train, y_train)
y_pred_ridge = ridge_reg.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)
print("Ridge Regression - MSE:", mse_ridge, "R2:", r2_ridge)


Ridge Regression - MSE: 1.2933167332444493 R2: 0.8866741736325441


In [53]:
# Train and evaluate Lasso Regression
lasso_reg = Lasso(alpha=0.1)
lasso_reg.fit(X_train, y_train)
y_pred_lasso = lasso_reg.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)
print("Lasso Regression - MSE:", mse_lasso, "R2:", r2_lasso)

Lasso Regression - MSE: 1.3690951216949356 R2: 0.8800341540061047


In [54]:
# Train and evaluate Elastic Net Regression
elastic_net_reg = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic_net_reg.fit(X_train, y_train)
y_pred_enet = elastic_net_reg.predict(X_test)
mse_enet = mean_squared_error(y_test, y_pred_enet)
r2_enet = r2_score(y_test, y_pred_enet)
print("Elastic Net Regression - MSE:", mse_enet, "R2:", r2_enet)

Elastic Net Regression - MSE: 1.3466185489222644 R2: 0.8820036454059271


In [55]:
# Train and evaluate Support Vector Machine (SVM)
svm_reg = SVR(kernel='rbf', C=1, gamma=0.1, epsilon=.1)
svm_reg.fit(X_train_scaled, y_train)
y_pred_svm = svm_reg.predict(X_test_scaled)
mse_svm = mean_squared_error(y_test, y_pred_svm)
r2_svm = r2_score(y_test, y_pred_svm)
print("Support Vector Machine (SVM) - MSE:", mse_svm, "R2:", r2_svm)

Support Vector Machine (SVM) - MSE: 1.2000210070983905 R2: 0.8948491357205476


In [56]:
# Train and evaluate Multi-Layer Perceptron (MLP)
mlp_reg = MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=1000, random_state=42)
mlp_reg.fit(X_train_scaled, y_train)
y_pred_mlp = mlp_reg.predict(X_test_scaled)
mse_mlp = mean_squared_error(y_test, y_pred_mlp)
r2_mlp = r2_score(y_test, y_pred_mlp)
print("Multi-Layer Perceptron (MLP) - MSE:", mse_mlp, "R2:", r2_mlp)

Multi-Layer Perceptron (MLP) - MSE: 0.8041741717788742 R2: 0.929534892561403


In [43]:
elastic_net_reg.alpha

0.1

In [28]:
check_df = boston_data_cleaned.head(1).drop('crim',axis=1)

In [29]:
check_df_scaled = scaler.fit_transform(check_df)

In [31]:
linear_reg.predict(check_df)

array([-0.65677258])

In [32]:
ridge_reg.predict(check_df)

array([-0.66164865])

In [33]:
lasso_reg.predict(check_df)

array([-0.78937814])

In [25]:
elastic_net_reg.predict(check_df)

array([-0.72944907])

In [34]:
svm_reg.predict(check_df_scaled)

array([0.73690687])

In [35]:
mlp_reg.predict(check_df_scaled)

array([0.37689467])

In [58]:
import statsmodels.api as sm
import pandas as pd

# Add a constant term to the predictors
X_multiple = sm.add_constant(X)

# Fit a multiple regression model
model_multiple = sm.OLS(y, X_multiple).fit()

# Print the summary of the model
print(model_multiple.summary())


                            OLS Regression Results                            
Dep. Variable:                   crim   R-squared:                       0.895
Model:                            OLS   Adj. R-squared:                  0.892
Method:                 Least Squares   F-statistic:                     321.3
Date:                Fri, 12 Jul 2024   Prob (F-statistic):          1.16e-230
Time:                        22:01:50   Log-Likelihood:                -754.38
No. Observations:                 506   AIC:                             1537.
Df Residuals:                     492   BIC:                             1596.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9882      1.700      0.581      0.5