In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-darkgrid')

In [None]:
# Load dataset
data = pd.read_csv('City_types.csv')
X = data[['CO', 'NO2', 'SO2', 'O3', 'PM2.5', 'PM10']]
y = data['Type'].map({'Industrial': 0, 'Residential': 1})
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initial Random Forest Model
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
predictions = rf.predict(X_test)
print('Initial Accuracy:', accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

In [None]:
# Grid Search CV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)
print('Best Grid Search Params:', grid_search.best_params_)
best_grid = grid_search.best_estimator_
grid_predictions = best_grid.predict(X_test)
print('Grid Search Accuracy:', accuracy_score(y_test, grid_predictions))
print(classification_report(y_test, grid_predictions))

In [None]:
# Randomized Search CV
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'bootstrap': [True, False]
}
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=10, cv=3, n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)
print('Best Random Search Params:', random_search.best_params_)
best_random = random_search.best_estimator_
random_predictions = best_random.predict(X_test)
print('Random Search Accuracy:', accuracy_score(y_test, random_predictions))
print(classification_report(y_test, random_predictions))

In [None]:
# Plot before tuning
feature_importances = rf.feature_importances_
features = X.columns
sns.barplot(x=feature_importances, y=features)
plt.title('Feature Importances Before Tuning')
plt.show()

# Plot after tuning
best_feature_importances = best_grid.feature_importances_
sns.barplot(x=best_feature_importances, y=features)
plt.title('Feature Importances After Tuning (Grid Search)')
plt.show()