In [4]:
# Import libraries
import pandas as pd
df_cleaned = pd.read_csv('output/Railway_info_cleaned.csv')

# Check data
print("Rows loaded:", len(df_cleaned))
print(df_cleaned.head())

Rows loaded: 11113
   Train_No    Train_Name Source_Station_Name  \
0       107  SWV-MAO-VLNK     SAWANTWADI ROAD   
1       108  VLNK-MAO-SWV         MADGOAN JN.   
2       128  MAO-KOP SPEC         MADGOAN JN.   
3       290  PALACE ON WH   DELHI-SAFDAR JANG   
4       401  BSB BHARATDA          AURANGABAD   

             Destination_Station_Name       days  
0                         MADGOAN JN.   Saturday  
1                     SAWANTWADI ROAD     Friday  
2  CHHATRAPATI SHAHU MAHARAJ TERMINUS     Friday  
3                   DELHI-SAFDAR JANG  Wednesday  
4                        VARANASI JN.   Saturday  


In [11]:
# 3.1 Filter trains that operate on Saturday
saturday_trains = df_cleaned[df_cleaned['days'].str.contains('Sat', case=False, na=False)]
print("\n Trains operating on Saturday:", len(saturday_trains))
print(saturday_trains.head())

# 3.2 Filter trains starting from a specific station (e.g., DELHI)
station_name = 'DELHI'  # change as needed
station_trains = df_cleaned[df_cleaned['Source_Station_Name'] == station_name]
print(f"\n Trains starting from {station_name}:", len(station_trains))
print(station_trains.head())


 Trains operating on Saturday: 1593
    Train_No    Train_Name Source_Station_Name Destination_Station_Name  \
0        107  SWV-MAO-VLNK     SAWANTWADI ROAD              MADGOAN JN.   
4        401  BSB BHARATDA          AURANGABAD             VARANASI JN.   
21      1196  NGP-KRMI SPL      NAGPUR JN.(CR)                  KARMALI   
28      1706   JBP-BDTS SF            JABALPUR          BANDRA TERMINUS   
45      2834  SRC-RJT SF A     SANTRAGACHI JN.                   RAJKOT   

        days  Num_Days Train_Category  
0   Saturday         1        Weekend  
4   Saturday         1        Weekend  
21  Saturday         1        Weekend  
28  Saturday         1        Weekend  
45  Saturday         1        Weekend  

 Trains starting from DELHI: 0
Empty DataFrame
Columns: [Train_No, Train_Name, Source_Station_Name, Destination_Station_Name, days, Num_Days, Train_Category]
Index: []


In [12]:
# 4.1 Group by Source Station & count trains
trains_per_source = df_cleaned.groupby('Source_Station_Name').size().reset_index(name='Train_Count')
print("\n Number of trains per source station:")
print(trains_per_source.head())


 Number of trains per source station:
  Source_Station_Name  Train_Count
0        ABHANPUR JN.            2
1              ABOHAR            1
2            ABU ROAD            1
3        ACHHNERA JN.            1
4            ADILABAD            5


In [13]:
# 4.2 Calculate average trains per day (simple estimate)
# Assumption: total trains divided by 7 days
trains_per_source['Avg_Trains_Per_Day'] = trains_per_source['Train_Count'] / 7
print("\n Average trains per day (simple estimate):")
print(trains_per_source.head())


 Average trains per day (simple estimate):
  Source_Station_Name  Train_Count  Avg_Trains_Per_Day
0        ABHANPUR JN.            2            0.285714
1              ABOHAR            1            0.142857
2            ABU ROAD            1            0.142857
3        ACHHNERA JN.            1            0.142857
4            ADILABAD            5            0.714286


In [14]:
# 4.3 (Optional) More precise: Count actual days per train
df_cleaned['Num_Days'] = df_cleaned['days'].apply(lambda x: len(str(x).split(',')))
avg_trains_per_day_precise = df_cleaned.groupby('Source_Station_Name')['Num_Days'].mean().reset_index(name='Avg_Trains_Per_Day')
print("\n Average trains per day (precise by operating days):")
print(avg_trains_per_day_precise.head())




 Average trains per day (precise by operating days):
  Source_Station_Name  Avg_Trains_Per_Day
0        ABHANPUR JN.                 1.0
1              ABOHAR                 1.0
2            ABU ROAD                 1.0
3        ACHHNERA JN.                 1.0
4            ADILABAD                 1.0


In [15]:
# 5.1 Add new column: Train Category (Weekday or Weekend)
def categorize_days(days):
    days = str(days)
    if 'Sat' in days or 'Sun' in days:
        return 'Weekend'
    else:
        return 'Weekday'

df_cleaned['Train_Category'] = df_cleaned['days'].apply(categorize_days)
print("\n Train Category column added:")
print(df_cleaned[['Source_Station_Name', 'days', 'Train_Category']].head())


 Train Category column added:
  Source_Station_Name       days Train_Category
0     SAWANTWADI ROAD   Saturday        Weekend
1         MADGOAN JN.     Friday        Weekday
2         MADGOAN JN.     Friday        Weekday
3   DELHI-SAFDAR JANG  Wednesday        Weekday
4          AURANGABAD   Saturday        Weekend


In [16]:
df_cleaned.to_csv('output/Railway_info_enriched.csv', index=False)
print("\n Enriched CSV saved as: output/Railway_info_enriched.csv")

print("\n LEVEL 2 COMPLETED SUCCESSFULLY!")


 Enriched CSV saved as: output/Railway_info_enriched.csv

 LEVEL 2 COMPLETED SUCCESSFULLY!
