In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Configuration
STATIONS = ['Aluva', 'Kalamassery', 'Edappally', 'MG Road', 'Maharajaâ€™s College', 'SN Junction', 'Tripunithura']
DEPOTS = ['Aluva Depot', 'Muttom Yard']
DIRECTIONS = ['Up', 'Down']
PLATFORMS = [1, 2]
START_DATE = datetime(2025, 10, 1)
DAYS = 90

# 1. Platform-Level Data Generation
platform_rows = []
time_slots = pd.date_range("05:30", "23:00", freq="15min").time

for d in range(DAYS):
    current_date = START_DATE + timedelta(days=d)
    is_weekend = current_date.weekday() >= 5
    day_type = 'Weekend' if is_weekend else 'Weekday'
    
    # Randomly assign weather and events
    weather = np.random.choice(['Clear', 'Rain', 'Heavy Rain'], p=[0.7, 0.2, 0.1])
    special_event = 1 if np.random.random() < 0.05 else 0 # 5% chance
    
    for station in STATIONS:
        # Base demand multiplier by station importance
        station_mult = 1.5 if station in ['Edappally', 'MG Road', 'Aluva'] else 1.0
        
        for ts in time_slots:
            hour = ts.hour
            # Peak Logic
            is_peak = (8 <= hour <= 11) or (17 <= hour <= 20)
            peak_mult = 3.0 if is_peak else 1.0
            weather_mult = 1.2 if weather != 'Clear' else 1.0
            
            for direction in DIRECTIONS:
                # Core logic for entry/exit
                base_val = np.random.randint(50, 150)
                entry = int(base_val * station_mult * peak_mult * weather_mult)
                exit_c = int(base_val * station_mult * peak_mult * weather_mult * np.random.uniform(0.8, 1.2))
                
                # Logic-based features
                utilization = min(100, (entry / 400) * 100)
                crowd = 'High' if utilization > 75 else ('Medium' if utilization > 40 else 'Low')
                dwell = 30 + (20 if crowd == 'High' else 0) + np.random.randint(0, 10)
                
                platform_rows.append([
                    current_date.date(), ts, station, np.random.choice(PLATFORMS),
                    direction, day_type, weather, special_event,
                    entry, exit_c, crowd, dwell, round(utilization, 2)
                ])

platform_df = pd.DataFrame(platform_rows, columns=[
    'date', 'time_slot', 'station_name', 'platform_id', 'direction', 
    'day_type', 'weather', 'special_event', 'entry_count', 'exit_count', 
    'platform_crowd_level', 'avg_dwell_time_sec', 'train_capacity_utilization_pct'
])

# 2. Depot-Level Data Generation
depot_rows = []
for d in range(DAYS):
    current_date = START_DATE + timedelta(days=d)
    for ts in time_slots:
        hour = ts.hour
        for depot in DEPOTS:
            total = 25 if depot == 'Muttom Yard' else 10
            # More trains in service during peak hours
            is_peak = (8 <= hour <= 11) or (17 <= hour <= 20)
            in_service = int(total * (0.8 if is_peak else 0.4))
            maint = np.random.randint(1, 4)
            ready = total - in_service - maint
            
            depot_rows.append([
                current_date.date(), ts, depot, total, in_service,
                maint, max(0, ready), np.random.randint(15, 25),
                np.random.randint(200, 500), round(np.random.uniform(0.01, 0.05), 4)
            ])

depot_df = pd.DataFrame(depot_rows, columns=[
    'date', 'time_slot', 'depot_name', 'total_trains_available', 
    'trains_in_service', 'trains_under_maintenance', 'ready_for_induction',
    'average_turnaround_time_min', 'energy_consumption_kwh', 'fault_probability'
])

# Save files
platform_df.to_csv('kmrl_platform_data.csv', index=False)
depot_df.to_csv('kmrl_depot_data.csv', index=False)

print("Files 'kmrl_platform_data.csv' and 'kmrl_depot_data.csv' generated successfully.")

Files 'kmrl_platform_data.csv' and 'kmrl_depot_data.csv' generated successfully.
