### Reading from CTA API

In [1]:
# Loading the libraries
import pandas as pd
import numpy as np
import os
import re
import datetime
import requests
import json
import urllib3
import urllib.request,urllib.parse,urllib.error
from apscheduler.schedulers.background import BlockingScheduler

In [2]:
# Getting the current working directory
key_path=os.getcwd()
# Extracting the cta key from the json file created in the step above
with open("cta_key.json","r") as cta_key_file:
    json_key_cta=json.load(cta_key_file)
    # Reading the Key1 variable that contains the cta key
    cta_key=json_key_cta['key1']

In [3]:
def create_url():
    # Assigning the base URL to a variable
    cta_base_url="http://lapi.transitchicago.com/api/1.0/ttpositions.aspx?"
    # Creating a list of routes that willbe used in this project
    route_colors=["Red","Blue","Brn",'G',"Org","Pink"]
    # Randomly choosing a Route color list which will be used in the API call
    #route_color="Blue"
    route_color=np.random.choice(route_colors)
    print(f"The route used in the API is {route_color}")
    # Creating a dictionary of Parameters to be used in the API Call
    params2={'key':str(cta_key),'rt':route_color,"outputType":"JSON"}
    # Creating the final url by combining the base url and the Parameters that includes the API Key
    cta_api_url=str(cta_base_url)+urllib.parse.urlencode(params2)
    return cta_api_url

In [4]:
def extract_cta_data(cta_json_data):
    """
    This function takes the json input and parses the data and returns a Dataframe of the extracted fields.
    """
    # Creating a list of fields that will be extracted from the API
    list_of_fields=["rn","destSt","destNm","trDr","nextStaId"
                    ,"nextStpId","nextStaNm","prdt","arrT","isApp","isDly","lat","lon"]
    # Checking of the error code from the API is 0 which indicates successful data retrieval
    if cta_json_data['ctatt']['errCd']=="0":
        # Extracting the Timestamp field
        cta_timestamp=cta_json_data['ctatt']['tmst']
        # Extracting the route information
        cta_routes=cta_json_data['ctatt']['route']
        # Extracting the route data
        cta_route_name=cta_routes[0]['@name']
        # Extracting the details of the train details 
        cta_train=cta_routes[0]['train']
        # Getting the ctive number of trains
        cta_train_len=len(cta_train)
        # Creating a dummy dataframe with number of rows equal to number of active trains. The column
        # length is same as number of columns extracted from the API
        cta_df1=pd.DataFrame(np.random.rand(cta_train_len,len(list_of_fields)),columns=list_of_fields)
        # Adding new columns to the daraframe to contain the timestamp and Route name
        cta_df1["ROUTE_NAME"]=cta_route_name
        cta_df1["TIMESTAMP"]=cta_timestamp
        # Parsing each row and column in the dataframe and updating the values extracted from the API
        for idx1 in range(cta_train_len):
            # Looping through each field in the list of fields 
            for idx2,field in enumerate(list_of_fields):
                #updating the Dataframe with the data extracted from API based on the Index positions
                cta_df1.iloc[idx1,idx2]=cta_train[idx1][field]
        # Formatting the dataframe by renaming columns
        cta_df1.rename(columns={"rn":"RUN_NUMBER",
                               "destSt":"DEST_STREET",
                               "destNm":"DEST_NAME",
                               "trDr":"TRAIN_ROUTE_NBR",
                               "nextStaId":"NEXT_STATION_ID",
                               "nextStpId":"NEXT_STOP_ID",
                               "nextStaNm":"NEXT_STATION_NAME",
                               "prdt":"PREDICTION_TS",
                               "arrT":"ARRIVAL_TS",
                               "isApp":"IS_APPROACHING",
                               "isDly":"IS_DELAYED",
                                "lat":"LATITUDE",
                                "lon":"LONGITUDE"
                               },inplace=True)
        # Converting the data to Upper case and stripping off the extra spaces.
        for col in cta_df1.columns:
            cta_df1[col]=cta_df1[col].apply(str.strip)
            cta_df1[col]=cta_df1[col].apply(str.upper)
            return cta_df1
    # If the errCd returned by the API is not 0, it indicates error
    else:
        print(f"Error Occurred: {cta_json_data['ctatt']['errNm']}")
        return None

In [5]:
def call_api():
    # Getting the API response
    cta_api_url=create_url()
    try:
        cta_url_response=urllib.request.urlopen(cta_api_url)
    # Handling the HTTPErrors if the movie details cannot be extracted or if page cannot be found
    except urllib.error.HTTPError as error1:
        print(f"Sorry could not retrieve the details of the movie {movie_name}")
    # Handling URLexceptions such as Incorrect URL or Internet connection issues
    except urllib.error.URLError as error2:
        print("Failed to reach the server")
        print(f"Reason: {error2.reason}" )
    # If no exceptions are found, data is extracted from the API response 
    else:
        cta_url_data=cta_url_response.read()
        # The response is converted to json
        cta_json_data=json.loads(cta_url_data)
        cta_df1=extract_cta_data(cta_json_data)
        cta_df2=cta_df1[["ROUTE_NAME","RUN_NUMBER","DEST_STREET","DEST_NAME","NEXT_STATION_ID"
                     ,"NEXT_STATION_NAME","PREDICTION_TS","ARRIVAL_TS","IS_DELAYED",
                    "LATITUDE","LONGITUDE"]]
        #display(cta_df2)
        csv_dump_path=key_path+"/cta/cta_api_dump.csv"
        cta_df2.to_csv(csv_dump_path,mode="a",index=False,header=False)
        current_time = datetime.datetime.now()
        print(f"Successfully appended the output at {current_time}")

In [8]:
call_api()

The route used in the API is Pink
Successfully appended the output at 2024-02-23 02:36:23.161200


In [None]:
import time
while True:
    # Code executed here
    time.sleep(10)
    call_api()