In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import joblib








In [3]:
# Load dataset
data = pd.read_csv('parking_zones.csv')

# Separate features and target variable
X = data.drop('Zone Type', axis=1)
y = data['Zone Type']


# Check class distribution before SMOTE
class_distribution_before = y.value_counts()
print("Class Distribution Before SMOTE:")
print(class_distribution_before)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE to oversample the minority class (Safe Zone)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Check class distribution after SMOTE
class_distribution_after = pd.Series(y_train_resampled).value_counts()
print("\nClass Distribution After SMOTE:")
print(class_distribution_after)


Class Distribution Before SMOTE:
Zone Type
Danger Zone    859
Safe Zone      141
Name: count, dtype: int64

Class Distribution After SMOTE:
Zone Type
Danger Zone    689
Safe Zone      689
Name: count, dtype: int64


In [4]:
# Initialize RandomForestClassifier
rf_clf = RandomForestClassifier(random_state=42)

# Perform grid search for hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(estimator=rf_clf, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train_resampled, y_train_resampled)

# Get the best model from grid search
best_rf_clf = grid_search.best_estimator_

# Predict on test data
y_pred = best_rf_clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

# Save the model
joblib.dump(best_rf_clf, 'parking_zone_model.pkl')
print("Model saved as 'parking_zone_model.pkl'")

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Model Accuracy: 0.98
              precision    recall  f1-score   support

 Danger Zone       0.98      1.00      0.99       170
   Safe Zone       1.00      0.90      0.95        30

    accuracy                           0.98       200
   macro avg       0.99      0.95      0.97       200
weighted avg       0.99      0.98      0.98       200

Model saved as 'parking_zone_model.pkl'
