In [73]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report,r2_score

In [60]:
try:
    bus_df = pd.read_csv('bus_processed_output.csv')  # Assuming pre-processing is already done
except FileNotFoundError:
    print("Bus route file not found!")

In [22]:
try:
    rainfall_df = pd.read_csv('chennai-monthly-rains.csv')  # Make sure this file is pre-processed too
except FileNotFoundError:
    print("Rainfall data file not found!")

In [27]:
total_rainfall = rainfall_df['Total'].mean()  # or use a specific value if needed
bus_df['rainfall_index'] = total_rainfall

In [29]:
X_high_freq = bus_df[['starting_point', 'ending_point', 'via', 'rainfall_index']]  # Using relevant columns
y_high_freq = bus_df['high_frequency_route'] 

In [30]:
X_train_high, X_test_high, y_train_high, y_test_high = train_test_split(X_high_freq, y_high_freq, test_size=0.2, random_state=42)

In [35]:
X_train_high = pd.get_dummies(X_train_high)
X_test_high = pd.get_dummies(X_test_high)


In [36]:
X_train_high, X_test_high = X_train_high.align(X_test_high, join='left', axis=1, fill_value=0)

In [37]:
rf_classifier_high = RandomForestClassifier()
rf_classifier_high.fit(X_train_high, y_train_high)

In [38]:
y_pred_high = rf_classifier_high.predict(X_test_high)
print("High-Frequency Route Classification Report:\n", classification_report(y_test_high, y_pred_high))


High-Frequency Route Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.99      0.85       107
           1       0.67      0.05      0.10        39

    accuracy                           0.74       146
   macro avg       0.70      0.52      0.47       146
weighted avg       0.72      0.74      0.65       146



In [39]:
X_night = bus_df[['starting_point', 'ending_point', 'via', 'rainfall_index']]  # Using relevant columns
y_night = bus_df['night_service_route'] 

In [40]:
X_train_night, X_test_night, y_train_night, y_test_night = train_test_split(X_night, y_night, test_size=0.2, random_state=42)


In [50]:
X_train_night = pd.get_dummies(X_train_night)
X_test_night = pd.get_dummies(X_test_night)

In [51]:
X_train_night, X_test_night = X_train_night.align(X_test_night, join='left', axis=1, fill_value=0)


In [52]:
rf_classifier_night = RandomForestClassifier()
rf_classifier_night.fit(X_train_night, y_train_night)

In [53]:
y_pred_night = rf_classifier_night.predict(X_test_night)
print("Night Service Route Classification Report:\n", classification_report(y_test_night, y_pred_night))

Night Service Route Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.98       140
           1       0.00      0.00      0.00         6

    accuracy                           0.95       146
   macro avg       0.48      0.50      0.49       146
weighted avg       0.92      0.95      0.94       146



In [54]:
X_low_freq = bus_df[['starting_point', 'ending_point', 'via', 'rainfall_index']]  # Using relevant columns
y_low_freq = bus_df['low_frequency_route']

In [55]:
X_train_low, X_test_low, y_train_low, y_test_low = train_test_split(X_low_freq, y_low_freq, test_size=0.2, random_state=42)


In [56]:
X_train_low = pd.get_dummies(X_train_low)
X_test_low = pd.get_dummies(X_test_low)

In [57]:
X_train_low, X_test_low = X_train_low.align(X_test_low, join='left', axis=1, fill_value=0)


In [58]:
rf_classifier_low = RandomForestClassifier()
rf_classifier_low.fit(X_train_low, y_train_low)

In [59]:
y_pred_low = rf_classifier_low.predict(X_test_low)
print("Low-Frequency Route Classification Report:\n", classification_report(y_test_low, y_pred_low))

Low-Frequency Route Classification Report:
               precision    recall  f1-score   support

           0       0.82      1.00      0.90       119
           1       0.00      0.00      0.00        27

    accuracy                           0.82       146
   macro avg       0.41      0.50      0.45       146
weighted avg       0.66      0.82      0.73       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [61]:
X_fleet = bus_df[['starting_point', 'ending_point', 'via', 'rainfall_index','high_frequency_route','night_service_route','low_frequency_route']]
y_fleet = bus_df['bus_count']

In [62]:
X_train_fleet, X_test_fleet, y_train_fleet, y_test_fleet = train_test_split(X_fleet, y_fleet, test_size=0.2, random_state=42)


In [64]:
X_train_fleet = pd.get_dummies(X_train_fleet)
X_test_fleet = pd.get_dummies(X_test_fleet)

In [65]:
X_train_fleet, X_test_fleet = X_train_fleet.align(X_test_fleet, join='left', axis=1, fill_value=0)


In [66]:
rf_regressor_fleet = RandomForestRegressor()
rf_regressor_fleet.fit(X_train_fleet, y_train_fleet)

In [67]:
y_pred_fleet = rf_regressor_fleet.predict(X_test_fleet)

In [74]:
mse_fleet = mean_squared_error(y_test_fleet, y_pred_fleet)
r2_fleet = r2_score(y_test_fleet, y_pred_fleet)
print("Fleet Management Model Mean Squared Error:", mse_fleet)
print("Fleet Management Model R^2 Score:", r2_fleet)

Fleet Management Model Mean Squared Error: 11.77459109589041
Fleet Management Model R^2 Score: -0.05561474319072679
