<a href="https://colab.research.google.com/github/RemyaVKarthikeyan/AA-Stagecoach-Project/blob/main/File_share_18_QSI_points%2C_Timetable_%2C_Scheduled_buses_of_given_LineId.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import requests
from difflib import SequenceMatcher
from datetime import datetime
import pytz

# Function to normalize stop names
def normalize_stop_name(name):
    return ' '.join(name.lower().split())

# Function to fetch data from the API
def fetch_data(url):
    response = requests.get(url)
    return response.json()

# Function to extract schedule names
def extract_schedule_names(data, schedule_names_dict={}):
    if isinstance(data, dict):
        if data.get('$type') == "Tfl.Api.Presentation.Entities.Schedule, Tfl.Api.Presentation.Entities" and 'knownJourneys' in data:
            if 'name' in data:
                schedule_names_dict[data['name']] = data['knownJourneys']
        for key, value in data.items():
            extract_schedule_names(value, schedule_names_dict)
    elif isinstance(data, list):
        for item in data:
            extract_schedule_names(item, schedule_names_dict)
    return schedule_names_dict

# Function to categorize journeys into hourly slots
def categorize_into_slots(timetable):
    slots = [[] for _ in range(24)]
    for journey in timetable:
        hour = int(journey['hour'])  # Convert hour to integer
        if 0 <= hour < 24:  # Ensure hour is within the valid range
            slots[hour].append(journey)
    return slots

# Function to fetch the current day of the week
def get_day_of_week():
    bst = pytz.timezone('Europe/London')
    now = datetime.now(bst)
    return now.strftime('%A')  # %A gives full weekday name (e.g., 'Monday')

# Function to retrieve stop names from TfL API and match with Route_Dir_QSI_No
def find_route_details(lineID):
    # Ensure the 'Route_Dir_QSI_No' column exists
    if 'Route_Dir_QSI_No' not in df.columns:
        print("The 'Route_Dir_QSI_No' column is not present in the provided file.")
        return

    # Convert the lineID to uppercase to ensure case-insensitivity
    lineID = lineID.upper()

    # Convert the 'Route_Dir_QSI_No' column to uppercase for comparison
    df['Route_Dir_QSI_No'] = df['Route_Dir_QSI_No'].str.upper()

    # Normalize the stop names in the DataFrame
    df['STOP_NAME'] = df['STOP_NAME'].apply(normalize_stop_name)

    # Filter the DataFrame based on the lineID
    pattern_A = f"^{lineID}_A\\d+$"  # Regular expression for D7_A**
    pattern_B = f"^{lineID}_B\\d+$"  # Regular expression for D7_B**

    # Filter rows where the 'Route_Dir_QSI_No' column matches the pattern
    filtered_df_A = df[df['Route_Dir_QSI_No'].str.match(pattern_A, na=False)]
    filtered_df_B = df[df['Route_Dir_QSI_No'].str.match(pattern_B, na=False)]

    # Print the stop names from the Excel file for direction A
    print(f"\n\n\033[1m\033[4mStop names from the Excel file for direction {lineID}_A\033[0m\n")
    print(filtered_df_A[['Route_Dir_QSI_No', 'STOP_NAME']])

    # Print the stop names from the Excel file for direction B
    print(f"\n\n\033[1m\033[4mStop names from the Excel file for direction {lineID}_B\033[0m\n")
    print(filtered_df_B[['Route_Dir_QSI_No', 'STOP_NAME']])

    # Function to fetch and process route sequence data from TfL API
    def fetch_and_process_route_data(route_type, pattern, filtered_df):
        api_url = f"https://api.tfl.gov.uk/Line/{lineID}/Route/Sequence/{route_type}"
        response = requests.get(api_url)

        results_list = []

        if response.status_code == 200:
            route_data = response.json()

            # Iterate through each stop in the route data
            for stop in route_data['stopPointSequences'][0]['stopPoint']:
                stop_name_api = normalize_stop_name(stop['name'])
                stop_id = stop['id']

                # Check if the stop_name_api exists in the filtered DataFrame for the correct direction
                matched_row = filtered_df[(filtered_df['STOP_NAME'] == stop_name_api) &
                                          (filtered_df['Route_Dir_QSI_No'].str.match(pattern))]

                if not matched_row.empty:
                    route_dir_qsi_no = matched_row.iloc[0]['Route_Dir_QSI_No']
                    results_list.append({
                        'Route_Dir_QSI_No': route_dir_qsi_no,
                        'STOP_Name': stop['name'],
                        'ID': stop_id
                    })
                else:
                    # If exact match not found, try partial matching based on words before and after '/'
                    api_stop_name_parts = stop_name_api.split('/')
                    for index, row in filtered_df.iterrows():
                        df_stop_name_parts = row['STOP_NAME'].split('/')
                        for api_part in api_stop_name_parts:
                            for df_part in df_stop_name_parts:
                                if SequenceMatcher(None, df_part.strip(), api_part.strip()).ratio() > 0.8:
                                    matched_row = pd.DataFrame([row])
                                    break
                            if not matched_row.empty:
                                break
                        if not matched_row.empty:
                            break

                    if not matched_row.empty:
                        route_dir_qsi_no = matched_row.iloc[0]['Route_Dir_QSI_No']
                        results_list.append({
                            'Route_Dir_QSI_No': route_dir_qsi_no,
                            'STOP_Name': stop['name'],
                            'ID': stop_id
                        })
        else:
            print(f"Failed to fetch route sequence data from TfL API for {route_type} route. Status code: {response.status_code}")

        return results_list

    # Fetch and process outbound route data for _A**
    matched_results_A = fetch_and_process_route_data('outbound', pattern_A, filtered_df_A)

    # Fetch and process inbound route data for _B**
    matched_results_B = fetch_and_process_route_data('inbound', pattern_B, filtered_df_B)

    # Create DataFrames from the matched results for each direction
    matched_results_df_A = pd.DataFrame(matched_results_A)
    matched_results_df_B = pd.DataFrame(matched_results_B)

    # Function to remove partial matches if exact matches are found
    def remove_partial_matches(exact_df, matched_df):
        for index, row in exact_df.iterrows():
            exact_stop_name = row['STOP_NAME']
            route_dir_qsi_no = row['Route_Dir_QSI_No']
            # Find exact matches in matched_df
            exact_matches = matched_df[(matched_df['Route_Dir_QSI_No'] == route_dir_qsi_no) &
                                       (matched_df['STOP_Name'].apply(normalize_stop_name) == exact_stop_name)]
            if not exact_matches.empty:
                # Remove partial matches
                matched_df = matched_df[~((matched_df['Route_Dir_QSI_No'] == route_dir_qsi_no) &
                                          (matched_df['STOP_Name'].apply(normalize_stop_name) != exact_stop_name))]
        return matched_df

    # Remove partial matches for direction A
    matched_results_df_A = remove_partial_matches(filtered_df_A, matched_results_df_A)

    # Remove partial matches for direction B
    matched_results_df_B = remove_partial_matches(filtered_df_B, matched_results_df_B)

    # Print the matched results for direction A
    print(f"\n\n\033[1m\033[4mQSI stop points for direction {lineID}_A\033[0m\n")
    matched_results_df_A = matched_results_df_A[matched_results_df_A['Route_Dir_QSI_No'].str.match(pattern_A)]
    print(matched_results_df_A[['Route_Dir_QSI_No', 'STOP_Name', 'ID']])

    # Print the matched results for direction B
    print(f"\n\n\033[1m\033[4mQSI stop points for direction {lineID}_B\033[0m\n")
    matched_results_df_B = matched_results_df_B[matched_results_df_B['Route_Dir_QSI_No'].str.match(pattern_B)]
    print(matched_results_df_B[['Route_Dir_QSI_No', 'STOP_Name', 'ID']])

    # Concatenate the matched results DataFrames for directions A and B
    combined_df = pd.concat([matched_results_df_A, matched_results_df_B], ignore_index=True)

    # Fetch timetable for each stop point ID and print the timetable for the current hour
    bst = pytz.timezone('Europe/London')
    now = datetime.now(bst)
    current_hour = now.hour
    day_of_week = get_day_of_week()

    # Store selected schedule name to ensure it's printed only once
    selected_schedule_name = None
    printed_schedule_name = False

    for index, row in combined_df.iterrows():
        stop_point_id = row['ID']
        route_dir_qsi_no = row['Route_Dir_QSI_No']

        if f"{lineID}_A" in route_dir_qsi_no:
            direction = 'outbound'
        elif f"{lineID}_B" in route_dir_qsi_no:
            direction = 'inbound'
        else:
            continue

        url = f'https://api.tfl.gov.uk/Line/{lineID}/Timetable/{stop_point_id}?direction={direction}'
        data = fetch_data(url)

        schedule_names_dict = extract_schedule_names(data)

        if not selected_schedule_name:
            if day_of_week.lower() in ['monday', 'tuesday', 'wednesday', 'thursday']:
                preferred_schedule_names = ['Mon-Fri Schooldays', 'Monday to Thursday', 'Monday to Friday']
            elif day_of_week.lower() == 'friday':
                preferred_schedule_names = ['Mon-Fri Schooldays', 'Monday to Friday', 'Friday']
            elif day_of_week.lower() == 'saturday':
                preferred_schedule_names = ['Saturday']
            elif day_of_week.lower() == 'sunday':
                preferred_schedule_names = ['Sunday']
            else:
                preferred_schedule_names = [day_of_week]

            for preferred_name in preferred_schedule_names:
                if preferred_name in schedule_names_dict:
                    selected_schedule_name = preferred_name
                    break

        if selected_schedule_name and not printed_schedule_name:
            print(f"\nToday is {day_of_week}. The selected Schedule name is {selected_schedule_name}.")
            printed_schedule_name = True

        if selected_schedule_name:
            timetable = schedule_names_dict[selected_schedule_name]
            slots = categorize_into_slots(timetable)

            # Print number of buses scheduled for this hour at stop point
            total_buses_this_hour = len(slots[current_hour])
            print(f"\nNumber of buses scheduled for this hour at stop point {stop_point_id}: {total_buses_this_hour}")

            # Print timetable for the current hour
            print(f"\nTimetable for stop point {stop_point_id} at hour {current_hour}:")
            for journey in slots[current_hour]:
                journey_hour = str(journey['hour']).zfill(2)
                journey_minute = str(journey['minute']).zfill(2)
                time = f"{journey_hour}:{journey_minute}"
                print(f"{time}   {lineID}    {stop_point_id}  {direction.capitalize()}")

# Example usage
# Load the Excel file into a DataFrame
file_path = '/content/QSI points.xlsx'  # Modify this path accordingly
df = pd.read_excel(file_path)

# Ask the user to enter a lineID
lineID = input("Please enter the lineID: ")

# Find and display the route details
find_route_details(lineID)


Please enter the lineID: 199


[1m[4mStop names from the Excel file for direction 199_A[0m

    Route_Dir_QSI_No                            STOP_NAME
292           199_A1             canada water bus station
293           199_A2         surrey quays shopping centre
294           199_A3            grove street / plough way
295           199_A4                 deptford high street
296           199_A5  greenwich town centre / nelson road
297           199_A6                     lewisham station
298           199_A7                    lewisham hospital


[1m[4mStop names from the Excel file for direction 199_B[0m

    Route_Dir_QSI_No                          STOP_NAME
299           199_B1                       newquay road
300           199_B2  bromley road / lewisham town hall
301           199_B3                  lewisham hospital
302           199_B4               lewisham clock tower
303           199_B5            greenwich church street
304           199_B6               dept