In [None]:
import pandas as pd
file_path = "/content/Boston.csv"
boston_data = pd.read_csv(file_path)

boston_data.head()


Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create the binary response variable
boston_data['crime_above_median'] = (boston_data['crim'] > boston_data['crim'].median()).astype(int)

# Define the feature set (excluding the index and response variable)
X = boston_data.drop(['Unnamed: 0', 'crim', 'crime_above_median'], axis=1)
y = boston_data['crime_above_median']

# Split the data into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the feature set
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Display the first few rows of the standardized training data
pd.DataFrame(X_train_scaled, columns=X.columns).head()


Unnamed: 0,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat,medv
0,-0.505125,-1.292142,-0.281546,-0.851085,0.145264,-0.365584,1.081628,-0.746179,-1.11279,0.187271,-1.015316,0.606292
1,-0.505125,-0.162083,-0.281546,-0.087967,-0.208401,0.133941,-0.487876,-0.398464,0.150088,-0.21209,-0.053663,-0.193681
2,-0.505125,-0.609489,-0.281546,-0.936828,-0.896237,-1.2669,0.628596,-0.746179,-1.046639,-0.167716,-0.311324,-0.396341
3,-0.505125,-0.43197,-0.281546,-0.165136,-0.543965,-1.429789,0.345133,-0.630274,-0.601625,1.207859,-0.822422,0.008979
4,-0.505125,1.0055,-0.281546,0.194987,-0.556496,0.079645,-0.403892,1.687825,1.557294,0.852872,0.8038,-0.417673


Here, I have created a binary response variable which is "crime_above_median" which has two values that is 1 when the crime rate is above median and 0 if the crime rate is below median.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Initializing the model
log_reg = LogisticRegression(random_state=42)

# Fitting the model to the training data
log_reg.fit(X_train_scaled, y_train)

# Predict using the test data
log_reg_pred = log_reg.predict(X_test_scaled)

# Evaluate model using accuracy
log_reg_acc = accuracy_score(y_test, log_reg_pred)

# Compiling results
model_performance = pd.DataFrame({
    'Model': ['Logistic Regression'],
    'Accuracy': [log_reg_acc]
})

# Display classification reports for deeper insight
log_reg_report = classification_report(y_test, log_reg_pred, output_dict=True)

display(model_performance)
print("Classification Report for Logistic Regression:")
print(classification_report(y_test, log_reg_pred))

model_performance, log_reg_report


Unnamed: 0,Model,Accuracy
0,Logistic Regression,0.842105


Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.78      0.95      0.86        76
           1       0.93      0.74      0.82        76

    accuracy                           0.84       152
   macro avg       0.86      0.84      0.84       152
weighted avg       0.86      0.84      0.84       152



(                 Model  Accuracy
 0  Logistic Regression  0.842105,
 {'0': {'precision': 0.782608695652174,
   'recall': 0.9473684210526315,
   'f1-score': 0.8571428571428571,
   'support': 76.0},
  '1': {'precision': 0.9333333333333333,
   'recall': 0.7368421052631579,
   'f1-score': 0.8235294117647058,
   'support': 76.0},
  'accuracy': 0.8421052631578947,
  'macro avg': {'precision': 0.8579710144927537,
   'recall': 0.8421052631578947,
   'f1-score': 0.8403361344537814,
   'support': 152.0},
  'weighted avg': {'precision': 0.8579710144927537,
   'recall': 0.8421052631578947,
   'f1-score': 0.8403361344537815,
   'support': 152.0}})

Logistic Regression:

Precision (Class 0): 0.78
Recall (Class 0): 0.95
F1-Score (Class 0): 0.86
Precision (Class 1): 0.93
Recall (Class 1): 0.74
F1-Score (Class 1): 0.82
Overall Accuracy: 0.8421

In [None]:
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error

# Define the feature set
X_reg = boston_data.drop(['Unnamed: 0', 'crim', 'crime_above_median'], axis=1)
y_reg = boston_data['crim']

# Split the data into training and testing sets (70% training, 30% testing)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)

# Standardize the feature set
scaler_reg = StandardScaler()
X_train_scaled_reg = scaler_reg.fit_transform(X_train_reg)
X_test_scaled_reg = scaler_reg.transform(X_test_reg)

# Fitting the models
ridge = Ridge(alpha=1.0, random_state=42)  # Regularization strength = 1.0
lasso = Lasso(alpha=0.1, random_state=42)  # Regularization strength = 0.1

# Train the models
ridge.fit(X_train_scaled_reg, y_train_reg)
lasso.fit(X_train_scaled_reg, y_train_reg)

# Getting predictions
ridge_pred = ridge.predict(X_test_scaled_reg)
lasso_pred = lasso.predict(X_test_scaled_reg)

# Evaluate the models using Mean Squared Error (MSE)
ridge_mse = mean_squared_error(y_test_reg, ridge_pred)
lasso_mse = mean_squared_error(y_test_reg, lasso_pred)

# Compiling results
regression_performance = pd.DataFrame({
    'Model': ['Ridge Regression', 'Lasso Regression'],
    'Mean Squared Error (MSE)': [ridge_mse, lasso_mse]
})


display(regression_performance)

regression_performance
ridge_mse, lasso_mse


Unnamed: 0,Model,Mean Squared Error (MSE)
0,Ridge Regression,46.726959
1,Lasso Regression,47.314425


(46.72695922739183, 47.31442490271197)

Here, I have utilized the "Mean Squared Error (MSE)" Analysis method. So,

The Ridge method got a slightly lower MSE value than the Lasso method indicating that Ridge is slightly better in predicting the crim rates than the Lasso model.