<a href="https://colab.research.google.com/github/RemyaVKarthikeyan/AA-Stagecoach-Project/blob/main/File_share_49_Arrival_predictions_corrected_with_Headway_calculation_correction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import pytz
import time

def fetch_arrival_predictions(line_id, stop_point_id, direction):
    try:
        base_url = f"https://api.tfl.gov.uk/Line/{line_id}/Arrivals/{stop_point_id}"
        params = {'direction': direction}
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        if len(data) == 0:
            return pd.DataFrame(), None  # No data available
        station_name = data[0]['stationName']
        predictions = []
        for item in data:
            arrival_time = datetime.strptime(item['expectedArrival'], '%Y-%m-%dT%H:%M:%SZ')
            arrival_time_bst = arrival_time + timedelta(hours=1)
            predictions.append({
                'Line': item['lineName'],
                'Vehicle ID': item['vehicleId'],
                'Stop Point': stop_point_id,
                'Direction': direction,
                'Expected Arrival (BST)': arrival_time_bst,
                'Expected Arrival (HM)': arrival_time_bst.strftime('%H:%M')
            })
        df = pd.DataFrame(predictions)
        df = df.sort_values(by='Expected Arrival (BST)', ascending=True)
        df['Expected Arrival (BST)'] = pd.to_datetime(df['Expected Arrival (BST)'])  # Convert to datetime
        df['Expected Arrival (HM)'] = pd.to_datetime(df['Expected Arrival (HM)'], format='%H:%M')
        df['Gap'] = df['Expected Arrival (HM)'].diff().fillna(pd.Timedelta(seconds=0)).dt.total_seconds() / 60
        df['2_Gap'] = (df['Gap'] * 2).round(2)
        df['Gap_Sq'] = (df['Gap'] * df['Gap']).round(2)
        return df, station_name
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None, None

def main():
    line_id = input("Enter Line ID: ")
    stop_point_id = input("Enter Stop Point ID: ")
    direction = input("Enter Direction (inbound/outbound): ")

    cumulative_df = pd.DataFrame(columns=[
        'Line', 'Vehicle ID', 'Stop Point', 'Direction',
        'Expected Arrival (BST)', 'Expected Arrival (HM)',
        'Gap', '2_Gap', 'Gap_Sq'
    ])

    while True:
        arrival_predictions_df, station_name = fetch_arrival_predictions(line_id, stop_point_id, direction)

        if arrival_predictions_df is not None and not arrival_predictions_df.empty:
            current_hour = datetime.now(pytz.timezone('Europe/London')).hour
            next_hour = (current_hour + 1) % 24

            for _, row in arrival_predictions_df.iterrows():
                vehicle_id = row['Vehicle ID']
                expected_hour = row['Expected Arrival (BST)'].hour

                # Check if vehicle_id already exists in cumulative_df
                mask = cumulative_df['Vehicle ID'] == vehicle_id

                if cumulative_df[mask].empty:
                    # If vehicle_id is new, append the row
                    cumulative_df = pd.concat([cumulative_df, row.to_frame().T], ignore_index=True)
                else:
                    # Vehicle_id already exists, check expected arrival time criteria
                    existing_hour = cumulative_df.loc[mask, 'Expected Arrival (BST)'].iloc[0].hour

                    if expected_hour > existing_hour + 1:
                        # Append the row if new expected arrival hour is greater than (existing_hour + 1)
                        cumulative_df = pd.concat([cumulative_df, row.to_frame().T], ignore_index=True)
                    else:
                        # Overwrite the row if new expected arrival hour is <= (existing_hour + 1)
                        cumulative_df.loc[mask, ['Line', 'Stop Point', 'Direction', 'Expected Arrival (BST)', 'Expected Arrival (HM)', 'Gap', '2_Gap', 'Gap_Sq']] = row[['Line', 'Stop Point', 'Direction', 'Expected Arrival (BST)', 'Expected Arrival (HM)', 'Gap', '2_Gap', 'Gap_Sq']].values

            # Recalculate headway for cumulative_df
            cumulative_df = cumulative_df.sort_values(by='Expected Arrival (BST)', ascending=True)
            cumulative_df['Expected Arrival (BST)'] = pd.to_datetime(cumulative_df['Expected Arrival (BST)'])  # Ensure datetime format
            cumulative_df['Expected Arrival (HM)'] = pd.to_datetime(cumulative_df['Expected Arrival (HM)'], format='%H:%M')

            cumulative_df['Gap'] = cumulative_df['Expected Arrival (HM)'].diff().fillna(pd.Timedelta(seconds=0)).dt.total_seconds() / 60
            cumulative_df['2_Gap'] = (cumulative_df['Gap'] * 2).round(2)
            cumulative_df['Gap_Sq'] = (cumulative_df['Gap'] * cumulative_df['Gap']).round(2)

            # Calculate the number of buses observed in the current hour
            num_buses_observed = cumulative_df[cumulative_df['Expected Arrival (BST)'].dt.hour == current_hour].shape[0]

            print(f"\nArrival Predictions for stop point {stop_point_id} ({station_name}):")
            print(arrival_predictions_df.to_string(index=False))
            print("\nCumulative DataFrame:")
            print(cumulative_df.to_string(index=False))
            print(f"\nNumber of buses observed in the current hour: {num_buses_observed}")
        else:
            print("No arrival predictions available.")

        print("Refreshing data in 30 seconds...\n")
        time.sleep(30)  # Wait for 30 seconds before fetching data again

if __name__ == "__main__":
    main()


Enter Line ID: 141
Enter Stop Point ID: 490015195M
Enter Direction (inbound/outbound): outbound

Arrival Predictions for stop point 490015195M (City Road / Leonard Street):
Line Vehicle ID Stop Point Direction Expected Arrival (BST) Expected Arrival (HM)  Gap  2_Gap  Gap_Sq
 141    LC67AHF 490015195M  outbound    2024-07-17 15:48:36   1900-01-01 15:48:00  0.0    0.0     0.0
 141    LK66GEJ 490015195M  outbound    2024-07-17 15:59:56   1900-01-01 15:59:00 11.0   22.0   121.0
 141    LK66GFJ 490015195M  outbound    2024-07-17 16:03:21   1900-01-01 16:03:00  4.0    8.0    16.0
 141    LK66GEU 490015195M  outbound    2024-07-17 16:06:01   1900-01-01 16:06:00  3.0    6.0     9.0

Cumulative DataFrame:
Line Vehicle ID Stop Point Direction Expected Arrival (BST) Expected Arrival (HM)  Gap  2_Gap  Gap_Sq
 141    LC67AHF 490015195M  outbound    2024-07-17 15:48:36   1900-01-01 15:48:00  0.0    0.0     0.0
 141    LK66GEJ 490015195M  outbound    2024-07-17 15:59:56   1900-01-01 15:59:00 11.0   2

KeyboardInterrupt: 

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time

def fetch_arrival_predictions(line_id, stop_point_id, direction, existing_data):
    try:
        base_url = f"https://api.tfl.gov.uk/Line/{line_id}/Arrivals/{stop_point_id}"
        params = {'direction': direction}
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()

        if len(data) == 0:
            return existing_data, None  # No data available

        station_name = data[0]['stationName']
        vehicle_arrivals = {}  # Dictionary to keep track of vehicle arrivals

        for item in data:
            vehicle_id = item['vehicleId']
            arrival_time = datetime.strptime(item['expectedArrival'], '%Y-%m-%dT%H:%M:%SZ')
            arrival_time_bst = arrival_time + timedelta(hours=1)

            if vehicle_id in vehicle_arrivals:
                existing_time = vehicle_arrivals[vehicle_id]
                if arrival_time_bst > existing_time + timedelta(hours=1):
                    vehicle_arrivals[vehicle_id] = arrival_time_bst
            else:
                vehicle_arrivals[vehicle_id] = arrival_time_bst

        new_predictions = []
        for vehicle_id, arrival_time_bst in vehicle_arrivals.items():
            new_predictions.append({
                'Line': line_id,
                'Vehicle ID': vehicle_id,
                'Stop Point': stop_point_id,
                'Direction': direction,
                'Expected Arrival (BST)': arrival_time_bst,
                'Expected Arrival (HM)': arrival_time_bst.strftime('%H:%M')
            })

        df_new = pd.DataFrame(new_predictions)

        if existing_data is not None:
            for i, row in df_new.iterrows():
                if row['Vehicle ID'] in existing_data['Vehicle ID'].values:
                    existing_index = existing_data.index[existing_data['Vehicle ID'] == row['Vehicle ID']].tolist()[0]
                    existing_time = existing_data.at[existing_index, 'Expected Arrival (BST)']
                    if row['Expected Arrival (BST)'] > existing_time + timedelta(hours=1):
                        existing_data = pd.concat([existing_data, df_new.iloc[[i]]], ignore_index=True)
                    elif row['Expected Arrival (BST)'] <= existing_time + timedelta(hours=1):
                        existing_data.at[existing_index, 'Expected Arrival (BST)'] = row['Expected Arrival (BST)']
                        existing_data.at[existing_index, 'Expected Arrival (HM)'] = row['Expected Arrival (HM)']
                else:
                    existing_data = pd.concat([existing_data, df_new.iloc[[i]]], ignore_index=True)
        else:
            existing_data = df_new

        existing_data = existing_data.sort_values(by='Expected Arrival (BST)', ascending=True)
        existing_data['Expected Arrival (BST)'] = pd.to_datetime(existing_data['Expected Arrival (BST)'])  # Convert to datetime
        existing_data['Expected Arrival (HM)'] = pd.to_datetime(existing_data['Expected Arrival (HM)'], format='%H:%M')
        existing_data['Gap'] = existing_data['Expected Arrival (HM)'].diff().fillna(pd.Timedelta(seconds=0)).dt.total_seconds() / 60
        existing_data['2_Gap'] = (existing_data['Gap'] * 2).round(2)
        existing_data['Gap_Sq'] = (existing_data['Gap'] * existing_data['Gap']).round(2)

        return existing_data, station_name
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return existing_data, None

if __name__ == "__main__":
    line_id = input("Enter Line ID: ")
    stop_point_id = input("Enter Stop Point ID: ")
    direction = input("Enter Direction (inbound/outbound): ")

    existing_data = None
    station_name = None

    while True:
        existing_data, station_name = fetch_arrival_predictions(line_id, stop_point_id, direction, existing_data)

        if existing_data is not None and station_name is not None:
            print(f"Station Name: {station_name}")
            print(existing_data)
        else:
            print("No data available or an error occurred.")

        time.sleep(30)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
8   141    LK66GEJ  490015195M  outbound    2024-07-17 15:59:56   
9   141    LK66GFJ  490015195M  outbound    2024-07-17 16:03:21   
10  141    LK66GEU  490015195M  outbound    2024-07-17 16:06:01   

   Expected Arrival (HM)   Gap  2_Gap  Gap_Sq  
0    2024-07-17 14:56:00   0.0    0.0     0.0  
1    2024-07-17 14:59:00   3.0    6.0     9.0  
2    2024-07-17 15:02:00   3.0    6.0     9.0  
3    2024-07-17 15:06:00   4.0    8.0    16.0  
4    2024-07-17 15:20:00  14.0   28.0   196.0  
5    2024-07-17 15:28:00   8.0   16.0    64.0  
6    2024-07-17 15:36:00   8.0   16.0    64.0  
7    2024-07-17 15:50:00  14.0   28.0   196.0  
8    2024-07-17 15:59:00   9.0   18.0    81.0  
9    2024-07-17 16:03:00   4.0    8.0    16.0  
10   2024-07-17 16:06:00   3.0    6.0     9.0  
Station Name: City Road / Leonard Street
   Line Vehicle ID  Stop Point Direction Expected Arrival (BST)  \
0   141    BG14ONV  490015195M  outbound    2024-

KeyboardInterrupt: 