In [None]:

#Author : Santosh Kumar Udayakumar
#Function : Build the subgraph between Nybroplan and Ropsten and calculate the elapased time between the stops .

import pandas as pd
import json
from datetime import timedelta

# Load the data
data = pd.read_csv("routes_master_data.csv.gz")

# Filter water routes
water_routes = data[data['TransportMode'] == 'water']
water_routes = water_routes[water_routes['Private_Code'].isin([80, 82, 83, 84, 89])]

# Filter stops
stops = data[data['Private_Code'].isin([13, 76, 57, 67, 53, 515, 176])]

route = data[data['Route_Name'] == 'Dalarö']

# Get unique Route_Name
unique_route_names = water_routes['Route_Name'].unique()

# Filter by specific Route_Name
Norra = water_routes[water_routes['Route_Name'] == 'Norra Lagnö']

# Get column names
water_routes_columns = water_routes.columns.tolist()

# Select specific columns
routes_df = data[['Route_Name', 'Direction', 'Private_Code', 'StopPlace.Name', 'Order',
           'StopPlace.Latitude', 'StopPlace.Longitude', 'TransportMode', 'ArrivalTime', 'DepartureTime']]

# Get the first 100 rows
stop_points = routes_df.head(100)
stop_points.to_csv('stops.csv', index=False)

# Get distinct rows
updates_routes = routes_df.drop_duplicates(subset=['Route_Name', 'Direction', 'Private_Code', 'StopPlace.Name',
                                  'Order', 'TransportMode'], keep='first')
updates_routes.to_csv('water_routes.csv', index=False)

# Generate routes JSON
routes_json = (
    updates_routes.drop_duplicates(subset=['Route_Name', 'Direction', 'Order', 'StopPlace.Latitude',
                                 'StopPlace.Longitude', 'StopPlace.Name', 'TransportMode'])
    .groupby(['Route_Name', 'Direction'])
    .apply(lambda group: {
        "Route_Name": group['Route_Name'].iloc[0],
        "Direction": group['Direction'].iloc[0],
        "Stops": group[['Order', 'StopPlace.Name', 'TransportMode', 'StopPlace.Latitude', 'StopPlace.Longitude']]
        .rename(columns={
            'StopPlace.Name': 'StopName',
            'StopPlace.Latitude': 'Latitude',
            'StopPlace.Longitude': 'Longitude'
        }).to_dict(orient='records')
    })
    .tolist()
)

with open('other_routes.json', 'w') as f:
    json.dump(routes_json, f, indent=4)

# Reassign unique sequential order numbers and generate JSON
routes_json = (
    updates_routes.drop_duplicates(subset=['Route_Name', 'Direction', 'StopPlace.Name', 'StopPlace.Latitude', 'StopPlace.Longitude'])
    .sort_values(by=['Route_Name', 'Direction', 'Order'])
    .assign(Order=lambda x: x.groupby(['Route_Name', 'Direction']).cumcount() + 1)
    .groupby(['Route_Name', 'Direction'])
    .apply(lambda group: {
        "Route_Name": group['Route_Name'].iloc[0],
        "Direction": group['Direction'].iloc[0],
        "Stops": group[['Order', 'StopPlace.Name', 'TransportMode', 'StopPlace.Latitude', 'StopPlace.Longitude']]
        .rename(columns={
            'StopPlace.Name': 'StopName',
            'StopPlace.Latitude': 'Latitude',
            'StopPlace.Longitude': 'Longitude'
        }).to_dict(orient='records')
    })
    .tolist()
)

with open('water_routes.json', 'w') as f:
    json.dump(routes_json, f, indent=4)

# Load JSON files
#a = json.load(open('other_routes.json'))
#b = json.load(open('water_routes.json'))

# Calculate time difference
stops_data = routes_df.copy()
stops_data['TimeDifference'] = (pd.to_datetime(stops_data['DepartureTime']) - 
                                pd.to_datetime(stops_data['ArrivalTime'])).dt.total_seconds() / 60

# Prepare the output data
output_data = stops_data.assign(
    NeighborStop=stops_data['StopPlace.Name'].shift(-1)
).rename(columns={
    'StopPlace.Name': 'StopName',
    'Route_Name': 'RouteName',
    'Private_Code': 'LineNumber',
    'TransportMode': 'TravelMode',
    'StopPlace.Latitude': 'StopLatitude',
    'StopPlace.Longitude': 'StopLongitude'
})[['StopName', 'RouteName', 'LineNumber', 'TravelMode', 'StopLatitude', 'StopLongitude', 'NeighborStop', 'TimeDifference']]

# Convert to JSON
output_json = output_data.to_json(orient='records', indent=4)

# Save JSON to file
with open('output.json', 'w') as f:
    f.write(output_json)

# Print JSON output
print(output_json)
