In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
stop_times = pd.read_csv("stop_times.txt")
stop_times.head(4)

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,5242.y1005.60-1-d12-1.1.O,21:00:00,21:00:00,8240DB000226,1,Sandymount,0,0,0.0
1,5242.y1005.60-1-d12-1.1.O,21:00:38,21:00:38,8220DB000228,2,Sandymount,0,0,267.48
2,5242.y1005.60-1-d12-1.1.O,21:01:11,21:01:11,8240DB000229,3,Sandymount,0,0,483.53
3,5242.y1005.60-1-d12-1.1.O,21:02:02,21:02:02,8240DB000227,4,Sandymount,0,0,834.47


## We want to check the stop sequencing on a given route "shape" and make sure we account for all possible stops per shape. Some may not be included in all listings, but there should be plenty of listings per shape so it is unlikely a shape has skipped a given stop in every journey

### Let's start with one route shape for now

In [3]:
route_155_b12_191_O = stop_times[stop_times["trip_id"].str.contains("60-155-b12-1.91.O")] 
route_155_b12_191_O

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
936448,9646.y1005.60-155-b12-1.91.O,06:00:00,06:00:00,8220DB007698,1,Bray,0,0,0.00
936449,9646.y1005.60-155-b12-1.91.O,06:02:14,06:02:14,8220DB007113,3,Bray,0,0,1075.10
936450,9646.y1005.60-155-b12-1.91.O,06:03:28,06:03:28,8220DB000127,4,Bray,0,0,1471.78
936451,9646.y1005.60-155-b12-1.91.O,06:04:30,06:04:30,8220DB000112,5,Bray,0,0,1800.78
936452,9646.y1005.60-155-b12-1.91.O,06:05:18,06:05:18,8220DB000113,6,Bray,0,0,2233.60
...,...,...,...,...,...,...,...,...,...
948143,9740.y1005.60-155-b12-1.91.O,18:40:40,18:40:40,8350DB004130,76,Bray,0,0,28362.63
948144,9740.y1005.60-155-b12-1.91.O,18:41:09,18:41:09,8350DB004131,77,Bray,0,0,28520.73
948145,9740.y1005.60-155-b12-1.91.O,18:41:56,18:41:56,8350DB007294,78,Bray,0,0,28782.15
948146,9740.y1005.60-155-b12-1.91.O,18:43:41,18:43:41,8350DB004156,79,Bray,0,0,29199.69


### Find all the stop sequence numbers for this shape

In [4]:
stop_seqs = route_155_b12_191_O["stop_sequence"].unique().tolist()
print(stop_seqs)

[1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80]


### what if stop sequence position 3 does not always equal the same stopID?

In [5]:
counter = 0
for num in stop_seqs:    
    first_stop = route_155_b12_191_O[route_155_b12_191_O["stop_sequence"]==num]
    current_stop = first_stop["stop_id"].unique().tolist()
    current_stop.insert(0, num)
    print(current_stop)
    counter += 1
print("Reference: ", counter)

[1, '8220DB007698']
[3, '8220DB007113']
[4, '8220DB000127']
[5, '8220DB000112']
[6, '8220DB000113']
[7, '8220DB000114']
[8, '8220DB000115']
[9, '8220DB000037']
[10, '8220DB000038']
[11, '8220DB000039']
[12, '8220DB000040']
[13, '8220DB000146']
[14, '8220DB000147']
[15, '8220DB000184']
[16, '8220DB000185']
[17, '8220DB000186']
[18, '8220DB000187']
[19, '8220DB000188']
[20, '8220DB000189']
[21, '8220DB000190']
[22, '8220DB000191']
[23, '8220DB000819']
[24, '8220DB000264']
[25, '8220DB006059']
[26, '8220DB000334']
[27, '8220DB000406']
[28, '8220DB000747']
[29, '8220DB000842']
[30, '8220DB000845']
[31, '8220DB000846']
[32, '8220DB000847']
[33, '8220DB000848']
[34, '8220DB002795']
[35, '8220DB000756']
[36, '8220DB000757']
[37, '8220DB000758']
[38, '8220DB000759']
[39, '8220DB000760']
[40, '8220DB000761']
[41, '8220DB000762']
[42, '8220DB000763']
[43, '8250DB002007']
[44, '8250DB002008']
[45, '8250DB002009']
[46, '8250DB002010']
[47, '8250DB000435']
[48, '8250DB007353']
[49, '8250DB004571']


### - So, we can see that this shape never attends certain stops 2 or 60 and we know that each stop sequence always occurs at the same stopID.

### We only need one instance of each of this listing so let's ditch the dupes, and other redundant data

In [6]:
sequence_route155 = route_155_b12_191_O.drop_duplicates(subset=['stop_sequence'], keep='first')
route155 = sequence_route155.drop(['arrival_time',
                                   'departure_time',
                                   'pickup_type', 
                                   'drop_off_type', 
                                   'shape_dist_traveled'], axis=1)
route155.head(5)

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign
936448,9646.y1005.60-155-b12-1.91.O,8220DB007698,1,Bray
936449,9646.y1005.60-155-b12-1.91.O,8220DB007113,3,Bray
936450,9646.y1005.60-155-b12-1.91.O,8220DB000127,4,Bray
936451,9646.y1005.60-155-b12-1.91.O,8220DB000112,5,Bray
936452,9646.y1005.60-155-b12-1.91.O,8220DB000113,6,Bray


## Next step is to match the stop information with this dataframe

In [7]:
stops_df = pd.read_csv("stops.txt")
stops_df

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon
0,8220DB000002,"Parnell Square West, stop 2",53.352244,-6.263723
1,8220DB000003,"Parnell Square West, stop 3",53.352309,-6.263811
2,8220DB000004,"Parnell Square West, stop 4",53.352575,-6.264175
3,8220DB000006,"Parnell Square West, stop 6",53.352749,-6.264454
4,8220DB000007,"Parnell Square West, stop 7",53.352841,-6.264570
...,...,...,...,...
4216,8350DB007461,"Charlesland, stop 7461",53.128932,-6.062803
4217,8350DB007462,"Charlesland, stop 7462",53.128801,-6.062480
4218,8350DB007574,"Southern Cross, stop 7574",53.182348,-6.130064
4219,8350DB007823,"Enniskerry Village, stop 7823",53.194198,-6.170184


### We will merge the stop data from all_stops, with the historical stops made on the route. The most important part here is the correct coordinates are aligned in the routes sequence.

### While we are here, we will append a row with the correct customer facing route number.

In [8]:
merged_df = pd.merge(route155, stops_df, left_on='stop_id', right_on='stop_id')

routes = []
route_num = merged_df["trip_id"].unique().tolist()
for route in route_num:
    route_string = route.split('-')
    num = route_string[1] 
    if num not in routes:
        routes.append(num)

merged_df = merged_df.assign(route_num = num)
merged_df

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign,stop_name,stop_lat,stop_lon,route_num
0,9646.y1005.60-155-b12-1.91.O,8220DB007698,1,Bray,"IKEA, stop 7698",53.406385,-6.274635,155
1,9646.y1005.60-155-b12-1.91.O,8220DB007113,3,Bray,"Northwood Avenue, stop 7113",53.402541,-6.264924,155
2,9646.y1005.60-155-b12-1.91.O,8220DB000127,4,Bray,"Nursing Home, stop 127",53.399108,-6.263680,155
3,9646.y1005.60-155-b12-1.91.O,8220DB000112,5,Bray,"Civic Centre, stop 112",53.396154,-6.263951,155
4,9646.y1005.60-155-b12-1.91.O,8220DB000113,6,Bray,"Trinity Comp School, stop 113",53.392268,-6.263703,155
...,...,...,...,...,...,...,...,...
73,9646.y1005.60-155-b12-1.91.O,8350DB004130,76,Bray,"Castle Street, stop 4130",53.207282,-6.113513,155
74,9646.y1005.60-155-b12-1.91.O,8350DB004131,77,Bray,"Dwyer Park, stop 4131",53.206567,-6.111523,155
75,9646.y1005.60-155-b12-1.91.O,8350DB007294,78,Bray,"Cornerstone Church, stop 7294",53.204724,-6.109254,155
76,9646.y1005.60-155-b12-1.91.O,8350DB004156,79,Bray,"Quinsborough Road, stop 4156",53.204967,-6.103614,155


### Let's reassess all 155 variations, using the same process.

In [9]:
all_155 = stop_times[stop_times["trip_id"].str.contains("60-155-b12")] 
all_155

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
936448,9646.y1005.60-155-b12-1.91.O,06:00:00,06:00:00,8220DB007698,1,Bray,0,0,0.00
936449,9646.y1005.60-155-b12-1.91.O,06:02:14,06:02:14,8220DB007113,3,Bray,0,0,1075.10
936450,9646.y1005.60-155-b12-1.91.O,06:03:28,06:03:28,8220DB000127,4,Bray,0,0,1471.78
936451,9646.y1005.60-155-b12-1.91.O,06:04:30,06:04:30,8220DB000112,5,Bray,0,0,1800.78
936452,9646.y1005.60-155-b12-1.91.O,06:05:18,06:05:18,8220DB000113,6,Bray,0,0,2233.60
...,...,...,...,...,...,...,...,...,...
960458,9733.y1005.60-155-b12-1.94.I,23:30:22,23:30:22,8220DB000909,52,O'Connell St,0,0,18983.20
960459,9733.y1005.60-155-b12-1.94.I,23:30:52,23:30:52,8220DB000786,53,O'Connell St,0,0,19245.82
960460,9733.y1005.60-155-b12-1.94.I,23:32:39,23:32:39,8220DB000792,54,O'Connell St,0,0,20001.79
960461,9733.y1005.60-155-b12-1.94.I,23:36:06,23:36:06,8220DB000319,55,O'Connell St,0,0,20730.92


### How many variations of the route are we working with?

In [10]:
all_155_shapes = []
all_155_trips = all_155["trip_id"].tolist()
for route_id in all_155_trips:
    id_strings = route_id.split('.')
    shape_id = id_strings[2] + '.' + id_strings[3] + '.' + id_strings[4]
    if shape_id not in all_155_shapes:
        all_155_shapes.append(shape_id)

print(all_155_shapes)

['60-155-b12-1.91.O', '60-155-b12-1.92.O', '60-155-b12-1.93.I', '60-155-b12-1.94.I']


### What is the stop sequence for each 155 variation.

In [11]:
for shape in all_155_shapes:  
    print("Current shape displayed: ", shape)
    current_shape = all_155[all_155["trip_id"].str.contains(shape)]
    stop_seqs = current_shape["stop_sequence"].unique().tolist()
    
    counter = 0
    for num in stop_seqs:    
        current_stop = current_shape[current_shape["stop_sequence"]==num]
        all_stops = current_stop["stop_id"].unique().tolist()
        all_stops.insert(0, num)
        counter += 1
        print(all_stops)
    print("Reference: ", counter)

Current shape displayed:  60-155-b12-1.91.O
[1, '8220DB007698']
[3, '8220DB007113']
[4, '8220DB000127']
[5, '8220DB000112']
[6, '8220DB000113']
[7, '8220DB000114']
[8, '8220DB000115']
[9, '8220DB000037']
[10, '8220DB000038']
[11, '8220DB000039']
[12, '8220DB000040']
[13, '8220DB000146']
[14, '8220DB000147']
[15, '8220DB000184']
[16, '8220DB000185']
[17, '8220DB000186']
[18, '8220DB000187']
[19, '8220DB000188']
[20, '8220DB000189']
[21, '8220DB000190']
[22, '8220DB000191']
[23, '8220DB000819']
[24, '8220DB000264']
[25, '8220DB006059']
[26, '8220DB000334']
[27, '8220DB000406']
[28, '8220DB000747']
[29, '8220DB000842']
[30, '8220DB000845']
[31, '8220DB000846']
[32, '8220DB000847']
[33, '8220DB000848']
[34, '8220DB002795']
[35, '8220DB000756']
[36, '8220DB000757']
[37, '8220DB000758']
[38, '8220DB000759']
[39, '8220DB000760']
[40, '8220DB000761']
[41, '8220DB000762']
[42, '8220DB000763']
[43, '8250DB002007']
[44, '8250DB002008']
[45, '8250DB002009']
[46, '8250DB002010']
[47, '8250DB000435'

- ### This process appears to be fine. I will now replicate it for all shapes in the stop_times.txt and we should have a complete set of all recent routes and shapes.

### We need a list of all shapes

In [12]:
all_shapes = []
all_trips = stop_times["trip_id"].tolist()
for route_id in all_trips:
    id_strings = route_id.split('.')
    shape_id = id_strings[2] + '.' + id_strings[3] + '.' + id_strings[4]
    if shape_id not in all_shapes:
        all_shapes.append(shape_id)

### The following cell creates stop-sequenced lists for every "shape"

- note: this cell takes some time to run!!

## Note: choosing an index in the cell below will list the stop sequencing of a shape from the list all_shape_ids. these lists will match on the index.

### Create an empty DF to fill

In [13]:
new_df = stop_times.iloc[0:0]
new_df

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled


### Append to that dataframe with a dataframe for every shape id
#### This cell runs through every shape and eliminates any rows that have replica stop_sequencingfor that shape. It then appends that data to the empty dataframe we just created.
** this cell takes some time to run

In [14]:
for shape_id in all_shapes:
    current_shape_df = stop_times[stop_times["trip_id"].str.contains(shape_id)] 
    no_repeats = current_shape_df.drop_duplicates(subset=['stop_sequence'], keep='first')

    new_df = new_df.append(no_repeats, ignore_index=True)
    
new_df = new_df.drop(['arrival_time',
                      'departure_time',
                      'pickup_type', 
                      'drop_off_type', 
                      'shape_dist_traveled'], axis=1)
new_df

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign
0,5242.y1005.60-1-d12-1.1.O,8240DB000226,1,Sandymount
1,5242.y1005.60-1-d12-1.1.O,8220DB000228,2,Sandymount
2,5242.y1005.60-1-d12-1.1.O,8240DB000229,3,Sandymount
3,5242.y1005.60-1-d12-1.1.O,8240DB000227,4,Sandymount
4,5242.y1005.60-1-d12-1.1.O,8240DB000230,5,Sandymount
...,...,...,...,...
36977,6665.y1003.60-H9-b12-1.97.I,8220DB000619,16,Abbey Street
36978,6665.y1003.60-H9-b12-1.97.I,8220DB000675,17,Abbey Street
36979,6665.y1003.60-H9-b12-1.97.I,8220DB000620,18,Abbey Street
36980,6665.y1003.60-H9-b12-1.97.I,8220DB007569,19,Abbey Street


### And now, merge the parsed data with all the extra stop information we want...

In [15]:
merged_df = pd.merge(new_df, stops_df, left_on='stop_id', right_on='stop_id', how='left')
merged_df

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign,stop_name,stop_lat,stop_lon
0,5242.y1005.60-1-d12-1.1.O,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200
1,5242.y1005.60-1-d12-1.1.O,8220DB000228,2,Sandymount,"Shanliss Road, stop 228",53.391877,-6.259720
2,5242.y1005.60-1-d12-1.1.O,8240DB000229,3,Sandymount,"Oldtown Road, stop 229",53.391400,-6.256536
3,5242.y1005.60-1-d12-1.1.O,8240DB000227,4,Sandymount,"Shanliss Drive, stop 227",53.391144,-6.251345
4,5242.y1005.60-1-d12-1.1.O,8240DB000230,5,Sandymount,"Shanowen Road, stop 230",53.389888,-6.249066
...,...,...,...,...,...,...,...
36977,6665.y1003.60-H9-b12-1.97.I,8220DB000619,16,Abbey Street,"Amiens Street, stop 619",53.354045,-6.247305
36978,6665.y1003.60-H9-b12-1.97.I,8220DB000675,17,Abbey Street,"Buckingham St Lower, stop 675",53.352858,-6.248526
36979,6665.y1003.60-H9-b12-1.97.I,8220DB000620,18,Abbey Street,"Talbot Street, stop 620",53.350961,-6.252780
36980,6665.y1003.60-H9-b12-1.97.I,8220DB007569,19,Abbey Street,"Gardiner Street, stop 7569",53.350451,-6.255970


### There is some data not here (such as Irish names), but located in another document. Let's include that data in our dataframe.

In [16]:
route_sequences_df = pd.read_csv("route_seqs.csv")
db_sequences_df = route_sequences_df[route_sequences_df["Operator"]=="DB"]
print("Number of unique spots in sequences: ", len(db_sequences_df["AtcoCode"].unique().tolist()))
print("Number of unique spots in sequences: ", len(merged_df["stop_id"].unique().tolist()))
db_sequences_df.head(4)

Number of unique spots in sequences:  4207
Number of unique spots in sequences:  4221


Unnamed: 0,ShapeId,Operator,StopSequence,RouteName,RouteDescription,Direction,AtcoCode,PlateCode,Latitude,Longitude,ShortCommonName_en,ShortCommonName_ga,HasPole,HasShelter,CarouselType,FlagData,RouteData
39347,60-1-b12-1.1.O,DB,1,1,Shanard Avenue - St. John's Church,O,8240DB000226,226.0,53.391141,-6.2622,Shanard Avenue,Ascail Sheanaird,Pole,No Shelter,Short Carousel,Circular,"1, 104"
39348,60-1-b12-1.1.O,DB,2,1,Shanard Avenue - St. John's Church,O,8220DB000228,228.0,53.391877,-6.25972,Shanliss Road,Br an tSeanleasa,Unknown,Unknown,Unknown,Unknown,"1, 104"
39349,60-1-b12-1.1.O,DB,3,1,Shanard Avenue - St. John's Church,O,8240DB000229,229.0,53.3914,-6.256536,Oldtown Road,Br an tSeanbhaile,No Pole,Shelter,Shelter Panel,Circular,"1, 104"
39350,60-1-b12-1.1.O,DB,4,1,Shanard Avenue - St. John's Church,O,8240DB000227,227.0,53.391144,-6.251345,Shanliss Drive,Céide an tSeanleasa,Pole,No Shelter,Short Carousel,Circular,"1, 104"


### Unfortunately some stops will be missing their irish names. I will investigate further solutions to this.

### For now, let's filter for the data we want to append to our dataframe.

In [17]:
db_stops_filtered = db_sequences_df[["AtcoCode", "ShortCommonName_ga"]]
db_stops_filtered

Unnamed: 0,AtcoCode,ShortCommonName_ga
39347,8240DB000226,Ascail Sheanaird
39348,8220DB000228,Br an tSeanleasa
39349,8240DB000229,Br an tSeanbhaile
39350,8240DB000227,Céide an tSeanleasa
39351,8240DB000230,Br na Seanabhann
...,...,...
72709,8220DB001357,Sráid an Chiste
72710,8220DB001359,Faiche an Choláiste
72711,8220DB000319,Sráid Westmoreland
72712,8220DB000281,Sd Uí Chonaill Uacht


### And now we need to merge in the irish name and make some other changes

In [18]:
print(merged_df.shape)
merged_df.head(5)

(36982, 7)


Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign,stop_name,stop_lat,stop_lon
0,5242.y1005.60-1-d12-1.1.O,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.2622
1,5242.y1005.60-1-d12-1.1.O,8220DB000228,2,Sandymount,"Shanliss Road, stop 228",53.391877,-6.25972
2,5242.y1005.60-1-d12-1.1.O,8240DB000229,3,Sandymount,"Oldtown Road, stop 229",53.3914,-6.256536
3,5242.y1005.60-1-d12-1.1.O,8240DB000227,4,Sandymount,"Shanliss Drive, stop 227",53.391144,-6.251345
4,5242.y1005.60-1-d12-1.1.O,8240DB000230,5,Sandymount,"Shanowen Road, stop 230",53.389888,-6.249066


In [19]:
my_list = [tuple(r) for r in db_stops_filtered.to_numpy()]
filtered_list = []
for item in my_list:
    filtered_list.append(item[0])

In [20]:
# function to find and append the irish name for the stop
def agus_ainm(row):
    if row['stop_id'] in filtered_list:
        item = my_list[filtered_list.index(row['stop_id'])]
        return item[1]


# function to find and match the row with its correct customer facing route number
def route_finder(row):
    id_strings = row['trip_id'].split('-')
    return id_strings[1]


# function to create id column values
def create_id(row):
    return row['shape_id'] + "_" + row['stop_num']


# function for isolating the stop number for each row
def stop_finder(row):
    stop_string = row['stop_name'].split(' ')
    if stop_string[-1].isdigit:
        return stop_string[-1]
    else:
        return "No stop number."
    
def route_direction(row):
    trip_string = row['trip_id']
    direction = trip_string[-1]
    if direction == "O":
        return "outbound"
    if direction == "I":
        return "inbound"


# and one more to change the trip_id to a shape_id as we have no need of unique trip info here
# this whole process has been about removing duplicated "shape" data, so unique trips don't help
def trip_to_shape_id(row):
    id_string = row['trip_id'].split('.')
    shape_id = id_string[2] + '.' + id_string[3] + '.' + id_string[4]
    return shape_id

In [21]:
print("Merging and altering dateframe, this may take some time...")

merged_df['ainm'] = merged_df.apply(agus_ainm, axis=1)
print("Irish names added successfully.")

Merging and altering dateframe, this may take some time...
Irish names added successfully.


In [22]:
merged_df['route_num'] = merged_df.apply(route_finder, axis=1)
print("Route numbers added successfully.")

Route numbers added successfully.


In [23]:
merged_df['shape_id'] = merged_df.apply(trip_to_shape_id, axis=1)
print("trip id removed and replaced with shape id.")

trip id removed and replaced with shape id.


In [24]:
merged_df['stop_num'] = merged_df.apply(stop_finder, axis=1)
print("stop numbers added successfully.")

stop numbers added successfully.


In [25]:
merged_df["direction"] = merged_df.apply(route_direction, axis=1)
print("Route direction added successfully.")

Route direction added successfully.


In [26]:
merged_df["id"] = merged_df.apply(create_id, axis=1)
print("row id value created successfully.")

print("Data successfully merged. Altering column headers...")

merged_df.rename(columns={"stop_headsign": "destination",
                          "stop_lat": "latitude",
                          "stop_lon": "longitude"}, inplace=True)

merged_df

row id value created successfully.
Data successfully merged. Altering column headers...


Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id
0,5242.y1005.60-1-d12-1.1.O,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird,1,60-1-d12-1.1.O,226,outbound,60-1-d12-1.1.O_226
1,5242.y1005.60-1-d12-1.1.O,8220DB000228,2,Sandymount,"Shanliss Road, stop 228",53.391877,-6.259720,Br an tSeanleasa,1,60-1-d12-1.1.O,228,outbound,60-1-d12-1.1.O_228
2,5242.y1005.60-1-d12-1.1.O,8240DB000229,3,Sandymount,"Oldtown Road, stop 229",53.391400,-6.256536,Br an tSeanbhaile,1,60-1-d12-1.1.O,229,outbound,60-1-d12-1.1.O_229
3,5242.y1005.60-1-d12-1.1.O,8240DB000227,4,Sandymount,"Shanliss Drive, stop 227",53.391144,-6.251345,Céide an tSeanleasa,1,60-1-d12-1.1.O,227,outbound,60-1-d12-1.1.O_227
4,5242.y1005.60-1-d12-1.1.O,8240DB000230,5,Sandymount,"Shanowen Road, stop 230",53.389888,-6.249066,Br na Seanabhann,1,60-1-d12-1.1.O,230,outbound,60-1-d12-1.1.O_230
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36977,6665.y1003.60-H9-b12-1.97.I,8220DB000619,16,Abbey Street,"Amiens Street, stop 619",53.354045,-6.247305,Sráid Amiens,H9,60-H9-b12-1.97.I,619,inbound,60-H9-b12-1.97.I_619
36978,6665.y1003.60-H9-b12-1.97.I,8220DB000675,17,Abbey Street,"Buckingham St Lower, stop 675",53.352858,-6.248526,Sd Buckingham Íocht,H9,60-H9-b12-1.97.I,675,inbound,60-H9-b12-1.97.I_675
36979,6665.y1003.60-H9-b12-1.97.I,8220DB000620,18,Abbey Street,"Talbot Street, stop 620",53.350961,-6.252780,Sráid Thalbóid,H9,60-H9-b12-1.97.I,620,inbound,60-H9-b12-1.97.I_620
36980,6665.y1003.60-H9-b12-1.97.I,8220DB007569,19,Abbey Street,"Gardiner Street, stop 7569",53.350451,-6.255970,Sráid Ghairdinéir,H9,60-H9-b12-1.97.I,7569,inbound,60-H9-b12-1.97.I_7569


In [27]:
nulls = merged_df[merged_df["ainm"].isnull()]
nulls

Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id
36,5242.y1005.60-1-d12-1.1.O,8220DB007738,37,Sandymount,"Gilford Road, stop 7738",53.329199,-6.214437,,1,60-1-d12-1.1.O,7738,outbound,60-1-d12-1.1.O_7738
37,5242.y1005.60-1-d12-1.1.O,8220DB007739,38,Sandymount,"Park Avenue, stop 7739",53.326629,-6.214485,,1,60-1-d12-1.1.O,7739,outbound,60-1-d12-1.1.O_7739
63,5243.y1005.60-1-d12-1.3.I,8220DB007740,2,Shanard Road,"Park Avenue, stop 7740",53.326876,-6.214730,,1,60-1-d12-1.3.I,7740,inbound,60-1-d12-1.3.I_7740
64,5243.y1005.60-1-d12-1.3.I,8220DB007741,3,Shanard Road,"Gilford Road, stop 7741",53.329279,-6.214990,,1,60-1-d12-1.3.I,7741,inbound,60-1-d12-1.3.I_7741
102,7793.y1006.60-1-d12-1.4.I,8220DB007740,2,O'Connell Street,"Park Avenue, stop 7740",53.326876,-6.214730,,1,60-1-d12-1.4.I,7740,inbound,60-1-d12-1.4.I_7740
...,...,...,...,...,...,...,...,...,...,...,...,...,...
34991,10553.y1005.60-7D-b12-1.123.I,8250DB007826,40,Mountjoy Square,"Crofton Road, stop 7826",53.295590,-6.137332,,7D,60-7D-b12-1.123.I,7826,inbound,60-7D-b12-1.123.I_7826
34992,10553.y1005.60-7D-b12-1.123.I,8250DB007825,41,Mountjoy Square,"Clarence Place, stop 7825",53.295153,-6.142722,,7D,60-7D-b12-1.123.I,7825,inbound,60-7D-b12-1.123.I_7825
36776,11947.y1005.60-H2-b12-1.351.O,8240DB004503,44,Malahide,"Hazel Grove, stop 4503",53.419071,-6.140771,,H2,60-H2-b12-1.351.O,4503,outbound,60-H2-b12-1.351.O_4503
36793,11929.y1005.60-H2-b12-1.352.I,8240DB003586,2,Abbey Street,"Malahide, stop 3586",53.451058,-6.151088,,H2,60-H2-b12-1.352.I,3586,inbound,60-H2-b12-1.352.I_3586


### Let's make sure we don't have too many iterations of a route before submitting to the database

In [28]:
# the output of this cell is long, so it is commented out for now
final_trip_ids = merged_df["id"].unique().tolist()
for trip in final_trip_ids:
    test = merged_df[merged_df["id"]==trip]
    #print(test.shape)

In [29]:
merged_df

Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id
0,5242.y1005.60-1-d12-1.1.O,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird,1,60-1-d12-1.1.O,226,outbound,60-1-d12-1.1.O_226
1,5242.y1005.60-1-d12-1.1.O,8220DB000228,2,Sandymount,"Shanliss Road, stop 228",53.391877,-6.259720,Br an tSeanleasa,1,60-1-d12-1.1.O,228,outbound,60-1-d12-1.1.O_228
2,5242.y1005.60-1-d12-1.1.O,8240DB000229,3,Sandymount,"Oldtown Road, stop 229",53.391400,-6.256536,Br an tSeanbhaile,1,60-1-d12-1.1.O,229,outbound,60-1-d12-1.1.O_229
3,5242.y1005.60-1-d12-1.1.O,8240DB000227,4,Sandymount,"Shanliss Drive, stop 227",53.391144,-6.251345,Céide an tSeanleasa,1,60-1-d12-1.1.O,227,outbound,60-1-d12-1.1.O_227
4,5242.y1005.60-1-d12-1.1.O,8240DB000230,5,Sandymount,"Shanowen Road, stop 230",53.389888,-6.249066,Br na Seanabhann,1,60-1-d12-1.1.O,230,outbound,60-1-d12-1.1.O_230
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36977,6665.y1003.60-H9-b12-1.97.I,8220DB000619,16,Abbey Street,"Amiens Street, stop 619",53.354045,-6.247305,Sráid Amiens,H9,60-H9-b12-1.97.I,619,inbound,60-H9-b12-1.97.I_619
36978,6665.y1003.60-H9-b12-1.97.I,8220DB000675,17,Abbey Street,"Buckingham St Lower, stop 675",53.352858,-6.248526,Sd Buckingham Íocht,H9,60-H9-b12-1.97.I,675,inbound,60-H9-b12-1.97.I_675
36979,6665.y1003.60-H9-b12-1.97.I,8220DB000620,18,Abbey Street,"Talbot Street, stop 620",53.350961,-6.252780,Sráid Thalbóid,H9,60-H9-b12-1.97.I,620,inbound,60-H9-b12-1.97.I_620
36980,6665.y1003.60-H9-b12-1.97.I,8220DB007569,19,Abbey Street,"Gardiner Street, stop 7569",53.350451,-6.255970,Sráid Ghairdinéir,H9,60-H9-b12-1.97.I,7569,inbound,60-H9-b12-1.97.I_7569


In [30]:
merged_df[merged_df["stop_num"]=="No stop number."]

Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id


### There are no unusual outputs in the cell above. I am happy to migrate this dataframe to the database

In [31]:
unique_routes = merged_df["route_num"].unique().tolist()
print(unique_routes)

['1', '11', '116', '118', '120', '122', '123', '13', '130', '14', '140', '142', '145', '15', '150', '151', '155', '15A', '15B', '15D', '16', '16D', '25', '25A', '25B', '25D', '25X', '26', '27', '27A', '27B', '27X', '29A', '31', '31A', '31B', '32', '32X', '33', '33D', '33E', '33X', '37', '38', '38A', '38B', '38D', '39', '39A', '39X', '4', '40', '40B', '40D', '40E', '41', '41B', '41C', '41D', '41X', '42', '43', '44', '44B', '46A', '46E', '47', '49', '51D', '53', '54A', '56A', '61', '65', '65B', '66', '66A', '66B', '66E', '66X', '67', '67X', '68', '68A', '68X', '69', '69X', '7', '70', '77A', '77X', '79', '79A', '7A', '7B', '7D', '83', '83A', '84', '84A', '84X', '9', '6', 'H1', 'H2', 'H3', 'H9']


In [32]:
uniques = merged_df.iloc[0:0]
for route in unique_routes:
    current_route_df = merged_df[merged_df["route_num"] == route] 
    no_repeats = current_route_df.drop_duplicates(subset=['stop_num', "direction"], keep='first')

    uniques = uniques.append(no_repeats, ignore_index=True)
    
uniques = uniques.drop(["trip_id", "shape_id", "id"], axis=1)

# Section is for the provision of all unique routes, and all unique stops

### All unique routes

In [36]:
def stops(row, df):
    current_route = row['route_num']
    stops_df = df[df['route_num']==current_route]
    outbound_stops = stops_df[stops_df['direction']=="outbound"]
    inbound_stops = stops_df[stops_df['direction']=="inbound"]
    outbound_stops = outbound_stops["stop_num"].unique().tolist()
    inbound_stops = inbound_stops["stop_num"].unique().tolist()
    
    if row["direction"] == "outbound":
        stops = outbound_stops
    if row["direction"] == "inbound":
        stops = inbound_stops
    
    if len(stops) == 0:
        stops = "None"
    else:
         stops = ", ".join(stops)
    return stops

def name(row, df):
    current_route = row['route_num']
    current = df[df['route_num']==current_route]
    inbound_names = current[current["direction"]=="inbound"]
    outbound_names = current[current["direction"]=="outbound"]
    inbound_names = inbound_names['stop_name'].unique().tolist()
    outbound_names = outbound_names['stop_name'].unique().tolist()
    
    if row["direction"] == "outbound":
        names = outbound_names
    if row["direction"] == "inbound":
        names = inbound_names
    
    if len(names) == 0:
        names = "None"
        
    else:
        names_modified = []
        for item in names:
            names_modified.append(item.split(",")[0])
        names = names_modified
        names = ([str(x) for x in names])
        names = ", ".join(names)
    return names


def coordinates(row, df, coordinate):
    current_route = row['route_num']
    current = df[df['route_num']==current_route]
    inbound_coordinate = current[current["direction"]=="inbound"]
    outbound_coordinate = current[current["direction"]=="outbound"]
    inbound_coordinate = inbound_coordinate[coordinate].unique().tolist()
    outbound_coordinate = outbound_coordinate[coordinate].unique().tolist()
    
    if row["direction"] == "outbound":
        coord = outbound_coordinate
    if row["direction"] == "inbound":
        coord = inbound_coordinate
    
    if len(coord) == 0:
        coord = "None"
    else:
        coord = ([str(x) for x in coord])
        coord = ", ".join(coord)
    return coord

def create_id(row):
    return row["route_num"] + "_" + row["direction"]

In [37]:
shape_ids = merged_df["shape_id"].unique().tolist()
route_nums = merged_df["route_num"].unique().tolist()

In [38]:
unique_routes = merged_df.drop_duplicates(subset=['route_num', "direction", "stop_sequence"], keep='first')
route44 = unique_routes[(unique_routes["route_num"] == "44") & (unique_routes["direction"]=="inbound")]
route44

Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id
11063,10499.y1006.60-44-d12-1.242.I,8350DB007823,1,Dundrum Road,"Enniskerry Village, stop 7823",53.194198,-6.170184,,44,60-44-d12-1.242.I,7823,inbound,60-44-d12-1.242.I_7823
11064,10499.y1006.60-44-d12-1.242.I,8530DB007824,2,Dundrum Road,"Enniskerry Village, stop 7824",53.192558,-6.171137,,44,60-44-d12-1.242.I,7824,inbound,60-44-d12-1.242.I_7824
11065,10499.y1006.60-44-d12-1.242.I,8350DB004114,3,Dundrum Road,"Enniskerry Road, stop 4114",53.192889,-6.172739,Br Áth na Sceire,44,60-44-d12-1.242.I,4114,inbound,60-44-d12-1.242.I_4114
11066,10499.y1006.60-44-d12-1.242.I,8350DB004115,4,Dundrum Road,"St. Mary’s Church, stop 4115",53.193826,-6.174046,Teampall Mhuire,44,60-44-d12-1.242.I,4115,inbound,60-44-d12-1.242.I_4115
11067,10499.y1006.60-44-d12-1.242.I,8350DB004116,5,Dundrum Road,"Environmental Centre, stop 4116",53.198991,-6.173841,Lárionad Comhshaoil,44,60-44-d12-1.242.I,4116,inbound,60-44-d12-1.242.I_4116
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11173,3718.y1005.60-44-d12-1.243.I,8220DB000208,78,DCU,"Glendun Road, stop 208",53.386654,-6.247950,Br Ghleann Doinne,44,60-44-d12-1.243.I,208,inbound,60-44-d12-1.243.I_208
11174,3718.y1005.60-44-d12-1.243.I,8220DB000209,79,DCU,"Larkhill Road, stop 209",53.387271,-6.245910,Cnoc na Fuiseoige,44,60-44-d12-1.243.I,209,inbound,60-44-d12-1.243.I_209
11175,3718.y1005.60-44-d12-1.243.I,8220DB000210,80,DCU,"Crestfield Road, stop 210",53.387594,-6.248318,Gort an Mhullaigh,44,60-44-d12-1.243.I,210,inbound,60-44-d12-1.243.I_210
11176,3718.y1005.60-44-d12-1.243.I,8220DB001643,81,DCU,"St Aidan's School, stop 1643",53.384802,-6.251003,Scoil Naomh Aodhán,44,60-44-d12-1.243.I,1643,inbound,60-44-d12-1.243.I_1643


In [39]:
unique_routes = merged_df.drop_duplicates(subset=['route_num', "direction", "stop_sequence"], keep='first')
unique_routes['stops'] = unique_routes.apply(stops, df=merged_df, axis=1)
unique_routes['longitudes'] = unique_routes.apply(coordinates, df=merged_df, coordinate="longitude", axis=1)
unique_routes['latitudes'] = unique_routes.apply(coordinates, df=merged_df, coordinate="latitude", axis=1)
unique_routes['names'] = unique_routes.apply(name, df=merged_df, axis=1)
unique_routes['id'] = unique_routes.apply(create_id, axis=1)


unique_routes = unique_routes[["id", 
                               "route_num", 
                               "stops",  
                               "latitudes",
                               "longitudes",
                               "direction",
                               "destination",
                               "names"]].sort_values(by='route_num')

In [40]:
unique_routes = unique_routes.drop_duplicates(subset=['id'], keep="first")
unique_routes

Unnamed: 0,id,route_num,stops,latitudes,longitudes,direction,destination,names
0,1_outbound,1,"226, 228, 229, 227, 230, 231, 1641, 1642, 213,...","53.391140564198, 53.391877392781495, 53.391399...","-6.26220046436849, -6.259719572913929, -6.2565...",outbound,Sandymount,"Shanard Avenue, Shanliss Road, Oldtown Road, S..."
80,1_inbound,1,"381, 7740, 7741, 387, 388, 389, 393, 371, 391,...","53.3243237661094, 53.3268757414366, 53.3292794...","-6.21237419337661, -6.21472966508216, -6.21498...",inbound,Shanard Road,"St John's Church, Park Avenue, Gilford Road, S..."
221,11_inbound,11,"449, 450, 3181, 451, 447, 5045, 4460, 453, 454...","53.2763228251657, 53.2750940462784, 53.2767339...","-6.2169273788984, -6.21349966939981, -6.207777...",inbound,St Pappin's Rd,"Blackthorn Road, Heather Road, Carmanhall Road..."
168,11_outbound,11,"6122, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ...","53.3863727059716, 53.3862067871037, 53.3861268...","-6.27430166369757, -6.270910692540951, -6.2678...",outbound,Sandyford B.D.,"St Pappin's Road, Dean Swift Road, Maolbuille ..."
385,116_inbound,116,"2955, 7067, 2956, 2965, 2967, 2968, 2969, 2970...","53.27787196635321, 53.2778338338973, 53.278120...","-6.28953336669198, -6.28875510387515, -6.28616...",inbound,Parnell Sq,"Ballyboden Church, Whitechurch Way, Glenmore P..."
...,...,...,...,...,...,...,...,...
36812,H2_inbound,H2,"3624, 3586, 3587, 3588, 3590, 3589, 3591, 3592...","53.451270736402904, 53.45105809285379, 53.4511...","-6.15147039768367, -6.151088146827759, -6.1451...",inbound,Abbey Street,"St James's Terrace, Malahide, The Old Golf Lin..."
36746,H2_outbound,H2,"7591, 496, 515, 516, 4384, 518, 519, 521, 522,...","53.348890153038795, 53.3494968761366, 53.35353...","-6.25682977423594, -6.2522235786816, -6.248092...",outbound,Malahide,"Abbey Street Lower, Busáras, Portland Row, Nor..."
36926,H3_inbound,H3,"707, 568, 569, 570, 571, 572, 573, 574, 575, 5...","53.37248953133739, 53.373935262296, 53.3755367...","-6.05864990209616, -6.055803553342121, -6.0547...",inbound,Abbey Street,"Howth Summit, Kitestown Road, Thormanby Hill, ..."
36887,H3_outbound,H3,"7591, 496, 515, 516, 4384, 518, 519, 521, 522,...","53.348890153038795, 53.3494968761366, 53.35353...","-6.25682977423594, -6.2522235786816, -6.248092...",outbound,Howth Summit,"Abbey Street Lower, Busáras, Portland Row, Nor..."


In [41]:
import sqlite3
db = sqlite3.connect("db.sqlite3")
unique_routes.to_sql("bus_routes_uniqueroutes", db, if_exists="replace", index=False)

## All unique stops 

In [42]:
unique_stops = merged_df.drop_duplicates(subset=['stop_num'], keep='first')
unique_stops = unique_stops[["stop_id", 
                             "latitude", 
                             "longitude", 
                             "stop_name", 
                             "ainm",
                             "stop_num"]].sort_values(by='stop_id')
unique_stops

Unnamed: 0,stop_id,latitude,longitude,stop_name,ainm,stop_num
8139,8220DB000002,53.352244,-6.263723,"Parnell Square West, stop 2",Cg Parnell Thiar,2
471,8220DB000003,53.352309,-6.263811,"Parnell Square West, stop 3",Cg Parnell Thiar,3
15900,8220DB000004,53.352575,-6.264175,"Parnell Square West, stop 4",Cg Parnell Thiar,4
3843,8220DB000006,53.352749,-6.264454,"Parnell Square West, stop 6",Cg Parnell Thiar,6
1423,8220DB000007,53.352841,-6.264570,"Parnell Square West, stop 7",Cg Parnell Thiar,7
...,...,...,...,...,...,...
18218,8350DB007461,53.128932,-6.062803,"Charlesland, stop 7461",Acra na mBodach,7461
18217,8350DB007462,53.128801,-6.062480,"Charlesland, stop 7462",Acra na mBodach,7462
2969,8350DB007574,53.182348,-6.130064,"Southern Cross, stop 7574",Cros an Deiscirt,7574
11063,8350DB007823,53.194198,-6.170184,"Enniskerry Village, stop 7823",,7823


In [43]:
merged_df

Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id
0,5242.y1005.60-1-d12-1.1.O,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird,1,60-1-d12-1.1.O,226,outbound,60-1-d12-1.1.O_226
1,5242.y1005.60-1-d12-1.1.O,8220DB000228,2,Sandymount,"Shanliss Road, stop 228",53.391877,-6.259720,Br an tSeanleasa,1,60-1-d12-1.1.O,228,outbound,60-1-d12-1.1.O_228
2,5242.y1005.60-1-d12-1.1.O,8240DB000229,3,Sandymount,"Oldtown Road, stop 229",53.391400,-6.256536,Br an tSeanbhaile,1,60-1-d12-1.1.O,229,outbound,60-1-d12-1.1.O_229
3,5242.y1005.60-1-d12-1.1.O,8240DB000227,4,Sandymount,"Shanliss Drive, stop 227",53.391144,-6.251345,Céide an tSeanleasa,1,60-1-d12-1.1.O,227,outbound,60-1-d12-1.1.O_227
4,5242.y1005.60-1-d12-1.1.O,8240DB000230,5,Sandymount,"Shanowen Road, stop 230",53.389888,-6.249066,Br na Seanabhann,1,60-1-d12-1.1.O,230,outbound,60-1-d12-1.1.O_230
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36977,6665.y1003.60-H9-b12-1.97.I,8220DB000619,16,Abbey Street,"Amiens Street, stop 619",53.354045,-6.247305,Sráid Amiens,H9,60-H9-b12-1.97.I,619,inbound,60-H9-b12-1.97.I_619
36978,6665.y1003.60-H9-b12-1.97.I,8220DB000675,17,Abbey Street,"Buckingham St Lower, stop 675",53.352858,-6.248526,Sd Buckingham Íocht,H9,60-H9-b12-1.97.I,675,inbound,60-H9-b12-1.97.I_675
36979,6665.y1003.60-H9-b12-1.97.I,8220DB000620,18,Abbey Street,"Talbot Street, stop 620",53.350961,-6.252780,Sráid Thalbóid,H9,60-H9-b12-1.97.I,620,inbound,60-H9-b12-1.97.I_620
36980,6665.y1003.60-H9-b12-1.97.I,8220DB007569,19,Abbey Street,"Gardiner Street, stop 7569",53.350451,-6.255970,Sráid Ghairdinéir,H9,60-H9-b12-1.97.I,7569,inbound,60-H9-b12-1.97.I_7569


In [44]:
all_shapes = []
all_trips = stop_times["trip_id"].tolist()
for trip_id in all_trips:
    id_strings = trip_id.split('.')
    shape_id = id_strings[2] + '.' + id_strings[3] + '.' + id_strings[4]
    if shape_id not in all_shapes:
        all_shapes.append(shape_id)

In [45]:
unique_routes = merged_df["route_num"].unique().tolist()
filtered_df = merged_df.iloc[0:0]

for db_route in unique_routes:
    current_shape_df = merged_df[merged_df["route_num"] == db_route] 
    no_repeats = current_shape_df.drop_duplicates(subset=['stop_num'], keep='first')

    filtered_df = filtered_df.append(no_repeats, ignore_index=True)
    
filtered_df = filtered_df.drop(["trip_id", "shape_id"], axis=1)
    
filtered_df

Unnamed: 0,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,stop_num,direction,id
0,8240DB000226,1,Sandymount,"Shanard Avenue, stop 226",53.391141,-6.262200,Ascail Sheanaird,1,226,outbound,60-1-d12-1.1.O_226
1,8220DB000228,2,Sandymount,"Shanliss Road, stop 228",53.391877,-6.259720,Br an tSeanleasa,1,228,outbound,60-1-d12-1.1.O_228
2,8240DB000229,3,Sandymount,"Oldtown Road, stop 229",53.391400,-6.256536,Br an tSeanbhaile,1,229,outbound,60-1-d12-1.1.O_229
3,8240DB000227,4,Sandymount,"Shanliss Drive, stop 227",53.391144,-6.251345,Céide an tSeanleasa,1,227,outbound,60-1-d12-1.1.O_227
4,8240DB000230,5,Sandymount,"Shanowen Road, stop 230",53.389888,-6.249066,Br na Seanabhann,1,230,outbound,60-1-d12-1.1.O_230
...,...,...,...,...,...,...,...,...,...,...,...
11069,8220DB000619,16,Abbey Street,"Amiens Street, stop 619",53.354045,-6.247305,Sráid Amiens,H9,619,inbound,60-H9-b12-1.97.I_619
11070,8220DB000675,17,Abbey Street,"Buckingham St Lower, stop 675",53.352858,-6.248526,Sd Buckingham Íocht,H9,675,inbound,60-H9-b12-1.97.I_675
11071,8220DB000620,18,Abbey Street,"Talbot Street, stop 620",53.350961,-6.252780,Sráid Thalbóid,H9,620,inbound,60-H9-b12-1.97.I_620
11072,8220DB007569,19,Abbey Street,"Gardiner Street, stop 7569",53.350451,-6.255970,Sráid Ghairdinéir,H9,7569,inbound,60-H9-b12-1.97.I_7569


In [46]:
route44 = merged_df[(merged_df["route_num"]=="44") & (merged_df["direction"]=="inbound")]
route44.drop_duplicates(subset=['stop_sequence'], keep='first').tail(50)

Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id
11092,10499.y1006.60-44-d12-1.242.I,8250DB002838,30,Dundrum Road,"Ballawley Park, stop 2838",53.279757,-6.232051,Pc Bhaile Amhlaoibh,44,60-44-d12-1.242.I,2838,inbound,60-44-d12-1.242.I_2838
11093,10499.y1006.60-44-d12-1.242.I,8250DB002839,31,Dundrum Road,"Balally Road, stop 2839",53.28287,-6.234067,Br Bhaile Amhlaoibh,44,60-44-d12-1.242.I,2839,inbound,60-44-d12-1.242.I_2839
11094,10499.y1006.60-44-d12-1.242.I,8250DB002840,32,Dundrum Road,"Balally Drive, stop 2840",53.284334,-6.237051,Cde Bhaile Amhlaoibh,44,60-44-d12-1.242.I,2840,inbound,60-44-d12-1.242.I_2840
11095,10499.y1006.60-44-d12-1.242.I,8250DB002841,33,Dundrum Road,"Dundrum Centre, stop 2841",53.287184,-6.240114,Ionad Dhún Droma,44,60-44-d12-1.242.I,2841,inbound,60-44-d12-1.242.I_2841
11096,10499.y1006.60-44-d12-1.242.I,8250DB002865,34,Dundrum Road,"Holy Cross Church, stop 2865",53.289704,-6.24403,Séip na Croise Naofa,44,60-44-d12-1.242.I,2865,inbound,60-44-d12-1.242.I_2865
11097,10499.y1006.60-44-d12-1.242.I,8250DB002866,35,Dundrum Road,"Dundrum Luas, stop 2866",53.29158,-6.245709,Luas Dhún Droma,44,60-44-d12-1.242.I,2866,inbound,60-44-d12-1.242.I_2866
11098,10499.y1006.60-44-d12-1.242.I,8250DB002892,36,Dundrum Road,"Rosemount Estate, stop 2892",53.295354,-6.246199,Eas Chnocán na Rós,44,60-44-d12-1.242.I,2892,inbound,60-44-d12-1.242.I_2892
11135,3718.y1005.60-44-d12-1.243.I,8250DB002893,37,DCU,"Frankfort, stop 2893",53.297607,-6.246632,Frankfort,44,60-44-d12-1.243.I,2893,inbound,60-44-d12-1.243.I_2893
11136,3718.y1005.60-44-d12-1.243.I,8250DB002894,38,DCU,"Dundrum Hospital, stop 2894",53.300932,-6.245955,Ospidéal Dhun Droma,44,60-44-d12-1.243.I,2894,inbound,60-44-d12-1.243.I_2894
11137,3718.y1005.60-44-d12-1.243.I,8250DB002895,39,DCU,"Dundrum Business Pk, stop 2895",53.303043,-6.245284,Pc Gnó Dhún Droma,44,60-44-d12-1.243.I,2895,inbound,60-44-d12-1.243.I_2895


In [47]:
shapes = route44["shape_id"].unique().tolist()

In [48]:
duplicateRowsDF = route44[route44.duplicated(subset=["stop_sequence"])]
duplicateRowsDF.sort_values(by=['stop_num']).head(50)

Unnamed: 0,trip_id,stop_id,stop_sequence,destination,stop_name,latitude,longitude,ainm,route_num,shape_id,stop_num,direction,id
28807,7593.y1005.60-44-b12-1.236.I,8220DB000010,63,DCU,"Parnell Square West, stop 10",53.353392,-6.265389,Cg Parnell Thiar,44,60-44-b12-1.236.I,10,inbound,60-44-b12-1.236.I_10
28800,7593.y1005.60-44-b12-1.236.I,8220DB001074,53,DCU,"Harcourt Luas, stop 1074",53.333795,-6.263977,Luas Fhearchair,44,60-44-b12-1.236.I,1074,inbound,60-44-b12-1.236.I_1074
28808,7593.y1005.60-44-b12-1.236.I,8220DB000012,64,DCU,"Upper Dorset Street, stop 12",53.357108,-6.264382,Sráid Dorset Uacht,44,60-44-b12-1.236.I,12,inbound,60-44-b12-1.236.I_12
28809,7593.y1005.60-44-b12-1.236.I,8220DB000014,65,DCU,"Dorset Street Lower, stop 14",53.358531,-6.262777,Sráid Dorset Íocht,44,60-44-b12-1.236.I,14,inbound,60-44-b12-1.236.I_14
28810,7593.y1005.60-44-b12-1.236.I,8220DB000015,66,DCU,"Innisfallen Parade, stop 15",53.360267,-6.260978,Pd Inis Faithleann,44,60-44-b12-1.236.I,15,inbound,60-44-b12-1.236.I_15
28825,7593.y1005.60-44-b12-1.236.I,8220DB001643,81,DCU,"St Aidan's School, stop 1643",53.384802,-6.251003,Scoil Naomh Aodhán,44,60-44-b12-1.236.I,1643,inbound,60-44-b12-1.236.I_1643
28811,7593.y1005.60-44-b12-1.236.I,8220DB000017,67,DCU,"Drumcondra Rail Stn, stop 17",53.363078,-6.258399,Stáis Dhroim Conrach,44,60-44-b12-1.236.I,17,inbound,60-44-b12-1.236.I_17
28812,7593.y1005.60-44-b12-1.236.I,8220DB000018,68,DCU,"Dargle Road, stop 18",53.365856,-6.255957,Bóthar na Deargaile,44,60-44-b12-1.236.I,18,inbound,60-44-b12-1.236.I_18
28813,7593.y1005.60-44-b12-1.236.I,8220DB000019,69,DCU,"Botanic Avenue, stop 19",53.367235,-6.2556,Asc Gharr na Lus,44,60-44-b12-1.236.I,19,inbound,60-44-b12-1.236.I_19
28817,7593.y1005.60-44-b12-1.236.I,8220DB000203,73,DCU,"Whitehall College, stop 203",53.376393,-6.25001,Col an Halla Bháin,44,60-44-b12-1.236.I,203,inbound,60-44-b12-1.236.I_203


In [49]:
count = 0
for shape in shapes:   
    shape_df = merged_df[(merged_df["shape_id"]==shape) & (merged_df["direction"]=="outbound")]
    pd.set_option('display.max_rows', shape1.shape[0]+1)
    print(shape, " ", shape_df.shape)
    count += 1

NameError: name 'shape1' is not defined

In [None]:
shape1 = merged_df[(merged_df["shape_id"]==shapes[3]) & (merged_df["direction"]=="outbound")]
shape1

In [None]:
coords=route44["lat_long"].unique().tolist()
print(len(coords))