In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

### First, let's import the stop_times file

In [2]:
stop_times = pd.read_csv("stop_times.txt")
stop_times = stop_times.rename(columns = {"stop_headsign":"destination"})
stop_times = stop_times.drop(["pickup_type", "drop_off_type", "shape_dist_traveled"], axis=1)
stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,destination
0,7712.y1009.60-1-d12-1.1.O,18:40:00,18:40:00,8240DB000226,1,Sandymount
1,7712.y1009.60-1-d12-1.1.O,18:40:44,18:40:44,8220DB000228,2,Sandymount
2,7712.y1009.60-1-d12-1.1.O,18:41:21,18:41:21,8240DB000229,3,Sandymount
3,7712.y1009.60-1-d12-1.1.O,18:42:20,18:42:20,8240DB000227,4,Sandymount
4,7712.y1009.60-1-d12-1.1.O,18:42:58,18:42:58,8240DB000230,5,Sandymount
...,...,...,...,...,...,...
1874914,6667.y1003.60-H9-b12-1.97.I,08:03:12,08:03:12,8220DB000619,16,Abbey Street
1874915,6667.y1003.60-H9-b12-1.97.I,08:03:50,08:03:50,8220DB000675,17,Abbey Street
1874916,6667.y1003.60-H9-b12-1.97.I,08:05:22,08:05:22,8220DB000620,18,Abbey Street
1874917,6667.y1003.60-H9-b12-1.97.I,08:06:35,08:06:35,8220DB007569,19,Abbey Street


### And the stops file

In [3]:
stops_df = pd.read_csv("stops.txt")
stops_df = stops_df.rename(columns = {'stop_lat': 'latitude', 
                                      "stop_lon": "longitude"})
stops_df

Unnamed: 0,stop_id,stop_name,latitude,longitude
0,8220DB000002,"Parnell Square West, stop 2",53.352244,-6.263723
1,8220DB000003,"Parnell Square West, stop 3",53.352309,-6.263811
2,8220DB000004,"Parnell Square West, stop 4",53.352575,-6.264175
3,8220DB000006,"Parnell Square West, stop 6",53.352749,-6.264454
4,8220DB000007,"Parnell Square West, stop 7",53.352841,-6.264570
...,...,...,...,...
4203,8350DB007461,"Charlesland, stop 7461",53.128932,-6.062803
4204,8350DB007462,"Charlesland, stop 7462",53.128801,-6.062480
4205,8350DB007574,"Southern Cross, stop 7574",53.182348,-6.130064
4206,8350DB007823,"Enniskerry Village, stop 7823",53.194198,-6.170184


### Merging the two files

In [4]:
merged_df = pd.merge(stop_times, stops_df, left_on='stop_id', right_on='stop_id')
merged_df

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,destination,stop_name,latitude,longitude
0,7712.y1009.60-1-d12-1.1.O,18:40:00,18:40:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200
1,7728.y1009.60-1-d12-1.1.O,20:00:00,20:00:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200
2,7742.y1009.60-1-d12-1.1.O,19:40:00,19:40:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200
3,7757.y1009.60-1-d12-1.1.O,20:20:00,20:20:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200
4,7769.y1009.60-1-d12-1.1.O,19:20:00,19:20:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200
...,...,...,...,...,...,...,...,...,...
1874914,7119.y1005.60-65-b12-1.263.I,08:50:39,08:50:39,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614
1874915,343.y1003.60-65-b12-1.263.I,06:49:58,06:49:58,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614
1874916,304.y1003.60-65-b12-1.263.I,18:51:31,18:51:31,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614
1874917,349.y1003.60-65-b12-1.263.I,17:54:25,17:54:25,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614


In [5]:
# function to find and append the irish name for the stop
def agus_ainm(row, first_list, filtered_list):
    if row['stop_id'] in filtered_list:
        item = first_list[filtered_list.index(row['stop_id'])]
        return item[1]

In [6]:
# we need some gtfs_data from an extra file containing more info per each stop
all_routes_sequences = pd.read_csv("route_seqs.csv")
db_routes_sequences = all_routes_sequences[all_routes_sequences["Operator"] == "DB"]
db_stops_filtered = db_routes_sequences[["AtcoCode", "ShortCommonName_ga"]]

In [7]:
first_list = [tuple(r) for r in db_stops_filtered.to_numpy()]
filtered_list = []
for item in first_list:
    filtered_list.append(item[0])

merged_df['ainm'] = merged_df.apply(agus_ainm, first_list=first_list, filtered_list=filtered_list, axis=1)

In [8]:
merged_df

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm
0,7712.y1009.60-1-d12-1.1.O,18:40:00,18:40:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird
1,7728.y1009.60-1-d12-1.1.O,20:00:00,20:00:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird
2,7742.y1009.60-1-d12-1.1.O,19:40:00,19:40:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird
3,7757.y1009.60-1-d12-1.1.O,20:20:00,20:20:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird
4,7769.y1009.60-1-d12-1.1.O,19:20:00,19:20:00,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird
...,...,...,...,...,...,...,...,...,...,...
1874914,7119.y1005.60-65-b12-1.263.I,08:50:39,08:50:39,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614,Cill an Ardáin
1874915,343.y1003.60-65-b12-1.263.I,06:49:58,06:49:58,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614,Cill an Ardáin
1874916,304.y1003.60-65-b12-1.263.I,18:51:31,18:51:31,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614,Cill an Ardáin
1874917,349.y1003.60-65-b12-1.263.I,17:54:25,17:54:25,8230DB002358,38,Poolbeg St,"Killinarden, stop 2358",53.280704,-6.389614,Cill an Ardáin


### In order to make the correct modifications to the dataframe, we need to split it into the correct chunks. We only want the longest shape of each route. This will be easier with a cell containing the correct line_id per row

In [9]:
def line_id(row):
    shape_id = row['shape_id']
    line_id = shape_id.split('-')[1]
    return line_id


def shape_id(row):
    trip_id = row['trip_id']
    shape_strings = trip_id.split('.')
    shape_id = shape_strings[2] + '.' + shape_strings[3] + "." + shape_strings[4]
    return shape_id


def route_direction(row):
    trip_string = row['trip_id']
    direction = trip_string[-1]
    if direction == "O":
        return "outbound"
    if direction == "I":
        return "inbound"
    
    
# function for isolating the stop number for each row
def stop_number(row):
    stop_string = row['stop_name'].split(' ')
    if stop_string[-1].isdigit:
        return stop_string[-1]
    else:
        return "No stop number."

    
def stop_name(row):
    name = row["stop_name"].split(",")[0]
    return name

In [10]:
merged_df["shape_id"] = merged_df.apply(shape_id, axis=1)

In [11]:
merged_df["line_id"] = merged_df.apply(line_id, axis=1)

In [12]:
merged_df["direction"] = merged_df.apply(route_direction, axis=1)

In [13]:
merged_df["stop_num"] = merged_df.apply(stop_number, axis=1)

In [14]:
merged_df["stop_name"] = merged_df.apply(stop_name, axis=1)
merged_df

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,shape_id,line_id,direction,stop_num
0,7712.y1009.60-1-d12-1.1.O,18:40:00,18:40:00,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226
1,7728.y1009.60-1-d12-1.1.O,20:00:00,20:00:00,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226
2,7742.y1009.60-1-d12-1.1.O,19:40:00,19:40:00,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226
3,7757.y1009.60-1-d12-1.1.O,20:20:00,20:20:00,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226
4,7769.y1009.60-1-d12-1.1.O,19:20:00,19:20:00,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1874914,7119.y1005.60-65-b12-1.263.I,08:50:39,08:50:39,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358
1874915,343.y1003.60-65-b12-1.263.I,06:49:58,06:49:58,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358
1874916,304.y1003.60-65-b12-1.263.I,18:51:31,18:51:31,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358
1874917,349.y1003.60-65-b12-1.263.I,17:54:25,17:54:25,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358


In [15]:
line_list = merged_df["line_id"].unique().tolist()
print(len(line_list))
print(line_list)

102
['1', '16', '16D', '33', '41', '41B', '41C', '41D', '44', '13', '11', '40', '40B', '40D', '122', '38', '38A', '38B', '38D', '53', '15A', '15B', '15D', '47', '56A', '61', '77A', '77X', '27', '120', '25', '25A', '25B', '25D', '26', '4', '66', '66A', '66B', '66E', '67', '7', '7A', '140', '155', '83', '83A', '9', '46A', '46E', '33E', '116', '37', '39', '39A', '70', '145', '33D', '7B', '7D', '32X', '41X', '84X', '142', '118', '25X', '39X', '66X', '67X', '27X', '44B', '14', '27B', '40E', '15', '150', '151', '33X', '51D', '68', '68A', '69', '69X', '123', '65', '65B', '68X', '49', '54A', '130', '27A', '6', 'H1', 'H2', 'H3', '42', '43', 'H9', '79', '79A', '84', '84A']


### We are also going to need an empty dataframe to add to

In [151]:
col_names =  ['destination', 'first_departure_schedule', 
              'stops', 'longitudes', 'latitudes', 'names', 
              'id', 'gach_ainm', 'line_id', 'direction']
  
final_df  = pd.DataFrame(columns = col_names)
final_df

Unnamed: 0,destination,first_departure_schedule,stops,longitudes,latitudes,names,id,gach_ainm,line_id,direction


### The following functions will compile all the starting times of each route

In [152]:
def sorting_seconds(time_list):
    ftr = [3600, 60, 1]
    times_in_seconds = []

    for time in time_list:
        time_units = time.split(':')
        total_secs = (int(time_units[0]) * ftr[2]) + (int(time_units[1]) * ftr[1]) + (int(time_units[0]) * ftr[0])
        times_in_seconds.append(total_secs)

    times_in_seconds.sort() 
    return times_in_seconds

    
def to_timestamp(seconds):
    hour = 3600
    minute = 60

    hours = str(int(seconds/hour))
    minutes = str(int((seconds % hour)/minute))
    seconds = str(int((seconds % hour) % 60))

    if len(hours) == 1:
        hours = f"0{hours}"
    if len(minutes) == 1:
        minutes = f"0{minutes}"
    
    seconds = f"00"

    timestamp = f"{hours}:{minutes}:{seconds}"

    return timestamp

def sorted_timestamps(times_in_seconds):
    sorted_timestamps = []

    for time in times_in_seconds:
        sorted_timestamps.append(to_timestamp(time))

    return sorted_timestamps

def departure_times(df):    
    first_stop = df[df["stop_sequence"]==1]
    first_stop_times = first_stop["departure_time"].unique().tolist()
    sorted_seconds = sorting_seconds(first_stop_times)
    first_stop_times = sorted_timestamps(sorted_seconds)
    first_stop_times = ([str(x) for x in first_stop_times])
    first_stop_times = ", ".join(first_stop_times)
    return first_stop_times

### The following make further alterations to the data frame, creating nested lists

In [153]:
def coordinates(row, df, coordinate):
    all_coords = df[coordinate].unique().tolist()

    if len(all_coords) == 0:
        coord = "None"
    else:
        all_coords = ([str(x) for x in all_coords])
        coord = ", ".join(all_coords)
    return coord


def gach_ainm(row, df):
    gach_ainm = df['ainm'].tolist()
    gach_ainm = ([str(x) for x in gach_ainm])
    gach_ainm = ", ".join(gach_ainm)

    return gach_ainm



def names(row, df):
    route = df["line_id"].unique().tolist()
    all_names = df["stop_name"].tolist()
    names = ", ".join(all_names)
    return names


def stops(row, df):
    all_stops = df["stop_num"]

    if len(all_stops) == 0:
        stops = "None"
    else:
        stops = ", ".join(all_stops)
    return stops


def create_uniques_id(row):
    return row["line_id"] + "_" + row["direction"]


def modify_df(df):
    if df.empty:
        pass
    else:
        df['stops'] = df.apply(stops, df=df, axis=1)
        df['longitudes'] = df.apply(coordinates, df=df, coordinate="longitude", axis=1)
        df['latitudes'] = df.apply(coordinates, df=df, coordinate="latitude", axis=1)
        df['names'] = df.apply(names, df=df, axis=1)
        df['gach_ainm'] = df.apply(gach_ainm, df=df, axis=1)
        df['id'] = df.apply(create_uniques_id, axis=1)
        
        
    df = df.drop(["stop_num", "latitude", "longitude", 
                  "stop_name", "ainm", "stop_sequence", 
                  "departure_time", "arrival_time", "stop_id"], axis=1)
    return df


def make_string(row):
    return int(row["stop_sequence"])

def sort_by_sequence(df):
    df['sort'] = df.apply(make_string, axis=1)
    df.sort_values('sort',inplace=True, ascending=True)
    df = df.drop('sort', axis=1)
    
    return df

### We need to go over every line_id and get the longest shape associated with each 

In [154]:
for line in line_list:
    temp_df = merged_df[merged_df["line_id"]==line]
    shapes = temp_df["shape_id"].unique().tolist()
    
    # iterate over each of the shapes and split into outbound and inbound
    inbound_shapes = []
    outbound_shapes = []

    for shape in shapes:
        direction = shape.split('.')[2]
        if direction == "O":
            outbound_shapes.append(shape)
        if direction == "I":
            inbound_shapes.append(shape)
            
    # get the longest inbound and longest outbound
    longest_outbound = ""
    longest_length_outbound = 0
    for shape in outbound_shapes:
        temp = merged_df[merged_df["shape_id"]==shape]
        if temp.shape[0] > longest_length_outbound:
            longest_length_outbound = temp.shape[0]
            longest_outbound = shape

    longest_inbound = ""
    longest_length_inbound = 0
    for shape in inbound_shapes:
        temp = merged_df[merged_df["shape_id"]==shape]
        if temp.shape[0] > longest_length_inbound:
            longest_length_inbound = temp.shape[0]
            longest_inbound = shape
    
    longest_outbound = merged_df[merged_df["shape_id"]==longest_outbound]
    longest_inbound = merged_df[merged_df["shape_id"]==longest_inbound]

    longest_outbound["first_departure_schedule"] = departure_times(longest_outbound) 
    longest_inbound["first_departure_schedule"] = departure_times(longest_inbound)
    
    longest_outbound = longest_outbound.drop_duplicates(subset=['stop_sequence'], keep='first')
    longest_inbound = longest_inbound.drop_duplicates(subset=['stop_sequence'], keep='first')

    if not longest_outbound.empty:
        longest_outbound = sort_by_sequence(longest_outbound)
    if not longest_inbound.empty:
        longest_inbound = sort_by_sequence(longest_inbound)
    
    longest_outbound = longest_outbound.drop(["shape_id", "trip_id"], axis=1)
    longest_inbound = longest_inbound.drop(["shape_id", "trip_id"], axis=1)
 
    longest_inbound = modify_df(longest_inbound)
    longest_outbound = modify_df(longest_outbound)
    
    final_df = final_df.append(longest_inbound)
    final_df = final_df.append(longest_outbound)

In [155]:
final_df = final_df.drop_duplicates(subset=["id"], keep='first')

In [156]:
final_df.head(5)

Unnamed: 0,destination,first_departure_schedule,stops,longitudes,latitudes,names,id,gach_ainm,line_id,direction
49962,Shanard Road,"06:30:00, 06:42:00, 06:54:00, 07:00:00, 07:06:...","381, 7740, 7741, 387, 388, 389, 393, 371, 391,...","-6.21237419337661, -6.21472966508216, -6.21498...","53.3243237661094, 53.3268757414366, 53.3292794...","St John's Church, Park Avenue, Gilford Road, S...",1_inbound,"Séipeál Eoin, nan, nan, Dumhach Thrá, Páirc Fh...",1,inbound
0,Sandymount,"06:30:00, 06:42:00, 06:54:00, 07:00:00, 07:06:...","226, 228, 229, 227, 230, 231, 1641, 1642, 213,...","-6.26220046436849, -6.25971957291393, -6.25653...","53.391140564198, 53.3918773927815, 53.39139951...","Shanard Avenue, Shanliss Road, Oldtown Road, S...",1_outbound,"Ascail Sheanaird, Br an tSeanleasa, Br an tSea...",1,outbound
1046074,Dublin Airport,"05:30:00, 05:45:00, 06:00:00, 06:15:00, 06:30:...","5171, 2976, 2977, 2978, 2979, 2980, 2981, 2991...","-6.24826056513117, -6.2554595122767, -6.260210...","53.2717359437923, 53.2720572242554, 53.2735737...","Kingston, Grange Hall, Pine Valley, Grange Roa...",16_inbound,"Baile an Rí, Halla na Gráinsí, Gleann na Giúis...",16,inbound
1013778,Ballinteer,"06:00:00, 06:15:00, 06:30:00, 06:45:00, 07:00:...","7347, 3669, 7349, 1631, 1632, 5053, 1633, 1634...","-6.24202072636356, -6.23507978936044, -6.23471...","53.428019654753, 53.4284655392397, 53.42527838...","Zone 15, Maldron Hotel, Radisson Hotel, ALSAA ...",16_outbound,"Aerfort BÁC C 1, Óstán an Maldron, Óstán an Ra...",16,outbound
1014229,Ballinteer,"07:10:00, 07:50:00, 08:30:00","7347, 3669, 7349, 1631, 1632, 5053, 1633, 1634...","-6.24202072636356, -6.23507978936044, -6.23471...","53.428019654753, 53.4284655392397, 53.42527838...","Zone 15, Maldron Hotel, Radisson Hotel, ALSAA ...",16D_outbound,"Aerfort BÁC C 1, Óstán an Maldron, Óstán an Ra...",16D,outbound


In [157]:
#merged_df

In [161]:
in37 = final_df[final_df["id"]=="44_inbound"]
names37 = in37["names"].tolist()[0]
names37 = names37.split(", ")
print(names37)
len(names37)

['Enniskerry Village', 'Enniskerry Village', 'Enniskerry Road', 'St. Mary’s Church', 'Environmental Centre', 'Ashridge Green', 'Monastery House', 'Bramble Hill', 'Killegar House', 'The Scalp Wood', 'Ski Club of Ireland', 'Grange Growers', 'Kiltiernan Village', 'Golden Ball', 'Kilternan NS', 'Kilternan Church', 'Palmerston FC', 'Cairnfort', 'Stepaside Lane', 'Kilgobbin Heights', 'Castle Lodge', 'Ballyogan Road', 'Sandyford Hall', 'Kilgobbin Heights', 'Bearna Park', "St Mary's Church", 'Greenlands', 'Central Bank', 'Clonard Road', 'Ballawley Park', 'Balally Road', 'Balally Drive', 'Dundrum Centre', 'Holy Cross Church', 'Dundrum Luas', 'Rosemount Estate', 'Frankfort', 'Dundrum Hospital', 'Dundrum Business Pk', 'Millmount Terrace', "Luke's Crescent", 'Alexandra College', 'Glenmalure Square', 'Garrynure', 'Norwood Park', 'Hollybank Ave Lower', 'Merton Drive', 'Ranelagh', 'Ranelagh Luas', 'Ranelagh Road', 'Northbrook Road', 'Charlemont Street', 'Harcourt Luas', 'Earlsfort Terrace', 'Dawson S

79

In [110]:
line37 = merged_df[merged_df["line_id"]=="37"]
inbound37 = line37[line37["direction"]=="inbound"]
inbound37 = sort_by_sequence(inbound37.drop_duplicates(subset=["stop_sequence"],  keep="first"))
print(inbound37.shape)
inbound37

(60, 14)


Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,shape_id,line_id,direction,stop_num
1295203,10574.y1009.60-37-d12-1.38.I,23:30:00,23:30:00,8240DB004747,1,Bachelor's Walk,Blanchardstown SC,53.394333,-6.391852,IS Bhaile Bhlainséir,60-37-d12-1.38.I,37,inbound,4747
1302203,4207.y1003.60-37-b12-1.41.I,07:30:55,07:30:55,8240DB001688,2,Wilton Terrace,Oak Lawns,53.376209,-6.364414,Plásóga na Darach,60-37-b12-1.41.I,37,inbound,1688
1296255,10574.y1009.60-37-d12-1.38.I,23:31:55,23:31:55,8240DB001882,3,Bachelor's Walk,Millennium Park,53.392773,-6.398813,Páirc na Mílaoise,60-37-d12-1.38.I,37,inbound,1882
1296737,10574.y1009.60-37-d12-1.38.I,23:32:30,23:32:30,8240DB007379,4,Bachelor's Walk,Mountview Road,53.389748,-6.400505,Radharc an Chnocáin,60-37-d12-1.38.I,37,inbound,7379
1297219,10574.y1009.60-37-d12-1.38.I,23:33:05,23:33:05,8240DB004903,5,Bachelor's Walk,Lohunda Road,53.386664,-6.401733,Bóthar Lohunda,60-37-d12-1.38.I,37,inbound,4903
1297701,10574.y1009.60-37-d12-1.38.I,23:33:33,23:33:33,8240DB007219,6,Bachelor's Walk,Limelawn,53.384251,-6.402741,Motor Park,60-37-d12-1.38.I,37,inbound,7219
1297963,10574.y1009.60-37-d12-1.38.I,23:34:07,23:34:07,8240DB007218,7,Bachelor's Walk,The Courtyard,53.381638,-6.402974,An Clós Cúirte,60-37-d12-1.38.I,37,inbound,7218
1298225,10574.y1009.60-37-d12-1.38.I,23:36:06,23:36:06,8240DB007031,8,Bachelor's Walk,Diswellstown Road,53.374204,-6.401554,Baile an Diosualaigh,60-37-d12-1.38.I,37,inbound,7031
1298487,10574.y1009.60-37-d12-1.38.I,23:36:37,23:36:37,8240DB004897,9,Bachelor's Walk,Luttrellstown Court,53.372157,-6.398354,Cúirt Bhaile Lotrail,60-37-d12-1.38.I,37,inbound,4897
1298749,10574.y1009.60-37-d12-1.38.I,23:37:20,23:37:20,8240DB004898,10,Bachelor's Walk,Castleknock College,53.370276,-6.392789,Col Chaisleán Cnucha,60-37-d12-1.38.I,37,inbound,4898


In [97]:
for shape in inbound_shapes:
    temp = merged_df[merged_df["shape_id"]==shape]
    temp_trips = temp["trip_id"].unique().tolist()
    
    print(shape, "; ", len(temp_trips))

60-84A-d12-1.340.I ;  6
60-84A-d12-1.341.I ;  3
60-84A-b12-1.341.I ;  6
60-84A-b12-1.342.I ;  3


In [45]:
x = merged_df[merged_df["shape_id"]=="60-37-d12-1.40.I"]
count = 0
for stop in x["stop_name"].unique().tolist():
    print(stop)
    count += 1
    print(count)

Nassau Street
1
Kildare Street
2
Leeson St Lower
3
Fitzwilliam Place
4
Baggot Road
5
Our Lady's Church
6
Navan Road Church
7
Arran Quay
8
Four Courts
9
Ormond Quay Upper
10
Bachelors Walk
11
Blanchardstown SC
12
Millennium Park
13
Mountview Road
14
Lohunda Road
15
Limelawn
16
The Courtyard
17
Diswellstown Road
18
Luttrellstown Court
19
Castleknock College
20
Burnell Park Avenue
21
Carpenterstown Road
22
Bramley Walk
23
Sycamore Drive
24
Oaktree Green
25
Maple Glen
26
Oaktree Avenue
27
Castleknock Vale
28
Parklands
29
Ashleigh Green
30
Oak Lawns
31
Castleknock
32
Peck's Lane
33
Deerpark Road
34
Park View
35
Castleknock Gate
36
Old Race Course
37
Ashtown Gate
38
Ashtown Roundabout
39
Kempton
40
Ashtown Grove
41
Cabra Garda Station
42
Skreen Road
43
Ardpatrick Road
44
Blackhorse Avenue
45
Dunard Walk
46
Mc Kee Park
47
Glenbeigh Avenue
48
St David's Terrace
49
Aughrim Street
50
Holy Family Church
51
Aughrim Court
52
Manor Street
53
Stoneybatter
54
Blackhall Place
55
Wilton Terrace
56
Steph

In [24]:
# function to create id column values
def create_id(row):
    return row["shape_id"] + "_" + row["stop_num"]

In [25]:
#merged_df["id"] = merged_df.apply(create_id, axis=1)

In [26]:
# merged_df = merged_df.drop(["arrival_time", "departure_time", "trip_id"], axis=1)
# merged_df

Unnamed: 0,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,shape_id,line_id,direction,stop_num,id
0,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226,60-1-d12-1.1.O_226
1,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226,60-1-d12-1.1.O_226
2,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226,60-1-d12-1.1.O_226
3,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226,60-1-d12-1.1.O_226
4,8240DB000226,1,Sandymount,Shanard Avenue,53.391141,-6.262200,Ascail Sheanaird,60-1-d12-1.1.O,1,outbound,226,60-1-d12-1.1.O_226
...,...,...,...,...,...,...,...,...,...,...,...,...
1874914,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358,60-65-b12-1.263.I_2358
1874915,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358,60-65-b12-1.263.I_2358
1874916,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358,60-65-b12-1.263.I_2358
1874917,8230DB002358,38,Poolbeg St,Killinarden,53.280704,-6.389614,Cill an Ardáin,60-65-b12-1.263.I,65,inbound,2358,60-65-b12-1.263.I_2358


In [27]:
# unique_stops = merged_df.drop_duplicates(subset=['stop_num'], keep='first')
# unique_stops = unique_stops[["stop_id",
#                              "latitude",
#                              "longitude",
#                              "stop_name",
#                              "ainm",
#                              "stop_num"]].sort_values(by='stop_id')

# unique_stops

Unnamed: 0,stop_id,latitude,longitude,stop_name,ainm,stop_num
1341704,8220DB000002,53.352244,-6.263723,Parnell Square West,Cg Parnell Thiar,2
246570,8220DB000003,53.352309,-6.263811,Parnell Square West,Cg Parnell Thiar,3
1743370,8220DB000004,53.352575,-6.264175,Parnell Square West,Cg Parnell Thiar,4
977134,8220DB000006,53.352749,-6.264454,Parnell Square West,Cg Parnell Thiar,6
461243,8220DB000007,53.352841,-6.264570,Parnell Square West,Cg Parnell Thiar,7
...,...,...,...,...,...,...
1828114,8350DB007461,53.128932,-6.062803,Charlesland,Acra na mBodach,7461
1828094,8350DB007462,53.128801,-6.062480,Charlesland,Acra na mBodach,7462
787366,8350DB007574,53.182348,-6.130064,Southern Cross,Cros an Deiscirt,7574
1562888,8350DB007823,53.194198,-6.170184,Enniskerry Village,,7823
