In [1]:
import pandas as pd

# -------------------------------
# STEP 1 â€” Load all datasets
# -------------------------------
weather = pd.read_csv("../data/bangalore_weather_clean.csv")
passenger = pd.read_csv("../data/passenger_demand.csv")
traffic = pd.read_csv("../data/traffic_data.csv")

print("âœ… Files loaded successfully!")

# -------------------------------
# STEP 2 â€” Preprocess: ensure consistent column names
# -------------------------------
# Convert date columns to proper datetime format
weather['Date'] = pd.to_datetime(weather[['Year', 'Month', 'Day']])
passenger['Date'] = pd.to_datetime(passenger['Date'])
traffic['Date'] = pd.to_datetime(traffic['Date'])

# Clean up column names
passenger.rename(columns={'Weather': 'Weather_Condition'}, inplace=True)

# -------------------------------
# STEP 3 â€” Merge datasets
# -------------------------------
# Merge passenger and weather data (on Date)
merged_pw = pd.merge(passenger, weather,
                     how='left',
                     on='Date')

# Merge the traffic data (on Date and Time_Slot)
merged_final = pd.merge(merged_pw, traffic,
                        how='left',
                        on=['Date', 'Time_Slot'])

# -------------------------------
# STEP 4 â€” Save final dataset
# -------------------------------
merged_final.to_csv("../data/merged_transit_data.csv", index=False)

print("âœ… merged_transit_data.csv created successfully!")
print(f"Rows: {len(merged_final)}")
print("ðŸ“Š Columns:", list(merged_final.columns))
print("\nSample preview:")
print(merged_final.head(10))


âœ… Files loaded successfully!
âœ… merged_transit_data.csv created successfully!
Rows: 1200
ðŸ“Š Columns: ['Date', 'Route_ID', 'Route_Name', 'Time_Slot', 'Weather_Condition', 'Temperature_C_x', 'Rainfall_mm_x', 'Passenger_Count', 'Year', 'Month', 'Day', 'Hour', 'Rainfall_mm_y', 'SolarRadiation_Wm2', 'Temperature_C_y', 'WindSpeed_mps', 'WindDirection_deg', 'Live_Congestion_Level(%)', 'Usual_Congestion_Level(%)', 'Live_Speed_kmph', 'Delay_Minutes_per_10km', 'Accident_Risk_Level']

Sample preview:
        Date Route_ID              Route_Name  Time_Slot Weather_Condition  \
0 2025-01-01       R1  Majestic to Whitefield    Morning            Cloudy   
1 2025-01-01       R1  Majestic to Whitefield  Afternoon             Foggy   
2 2025-01-01       R1  Majestic to Whitefield    Evening             Sunny   
3 2025-01-01       R1  Majestic to Whitefield      Night             Sunny   
4 2025-01-01       R2         Majestic to BTM    Morning             Rainy   
5 2025-01-01       R2         Ma