Stops django model

```
class Stops(models.Model):
    stopid = models.CharField(max_length=10, primary_key=True)
    address = models.TextField()
    lat = models.DecimalField(max_digits=10, decimal_places=8)
    lng = models.DecimalField(max_digits=10, decimal_places=8)
    routes = ArrayField(models.CharField(max_length=10))
    operator = models.CharField(max_length=10)

    def __str__(self):
        return "STOP: "+self.stopid+" OP: "+self.operator

    class Meta:
        verbose_name_plural = "Stops" 
        indexes = [
            models.Index(fields=['stopid'],)
        ]10
```

Problem - the stops table on the database appears to have non-bus stations outside dublin.

In [1]:
import pandas as pd

# Prevents tables from being truncated.
pd.set_option('display.max_columns', None)

In [2]:
# Create engine to interact with postgres database
# make sure tunnel has been created on port 5433 before running

from sqlalchemy import create_engine

URI="localhost"
PORT="5433"
DB = "jetaDb"
USER = "postgres"
PASSWORD = "00001234"
  
engine = create_engine("postgresql://{}:{}@{}:{}/{}"
                       .format(USER, PASSWORD, URI, PORT, DB), echo=True)

  """)


In [3]:
# Import curren stops table into dataframe

sql = "SELECT * FROM main_stops;"

stops = pd.read_sql(sql, engine)

2018-07-05 13:18:51,746 INFO sqlalchemy.engine.base.Engine select version()
2018-07-05 13:18:51,749 INFO sqlalchemy.engine.base.Engine {}
2018-07-05 13:18:51,761 INFO sqlalchemy.engine.base.Engine select current_schema()
2018-07-05 13:18:51,766 INFO sqlalchemy.engine.base.Engine {}
2018-07-05 13:18:51,773 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2018-07-05 13:18:51,774 INFO sqlalchemy.engine.base.Engine {}
2018-07-05 13:18:51,779 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2018-07-05 13:18:51,781 INFO sqlalchemy.engine.base.Engine {}
2018-07-05 13:18:51,787 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2018-07-05 13:18:51,788 INFO sqlalchemy.engine.base.Engine {}
2018-07-05 13:18:51,797 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
20

In [24]:
# Store stops in csv for safekeeping

stops.to_csv('stops_all_ireland.csv', sep=';')

In [4]:
stops.head(5)

Unnamed: 0,stopid,address,lat,lng,routes,operator
0,2,Parnell Square,53.352241,-6.263695,"[38B, 38D, 38, 38A, 46A, 46E]",bac
1,3,Parnell Square,53.352307,-6.263783,"[122, 120]",bac
2,4,Parnell Square,53.352567,-6.264166,"[7, 7B, 9, 7D, 7A]",bac
3,6,Parnell Square,53.352744,-6.264443,[4],bac
4,7,Parnell Square,53.352836,-6.264562,"[40, 140, 40D, 40B, 13]",bac


In [8]:
# Import csv file from Oz

oz_stops = pd.read_csv('../../Data/all_bus_stops.csv', sep=',')

In [9]:
oz_stops.head(5)

Unnamed: 0,stop_id,stop_name,stop_lon,stop_lat
0,7612,Davenport Hotel Merrion Street,-6.250529,53.341347
1,2,"Rotunda, Parnell Square West",-6.263723,53.352244
2,3,"Rotunda, Granby Place",-6.263811,53.352309
3,4,"Rotunda, Rotunda Hospital",-6.264175,53.352575
4,6,"Rotunda, Saint Martin's Chapel",-6.264454,53.352749


In [10]:
# Find stops which are in database stops file but not in Oz's stop file

# Get stops lists
stops_ids = stops['stopid'].tolist()
oz_stops_ids = oz_stops['stop_id'].tolist()

# Stringify oz's stops
oz_stops_ids = list(map(str, oz_stops_ids))

shared_ids = [stopid for stopid in stops_ids if stopid in oz_stops_ids]

In [12]:
# Create a mask True/False of which stopids are shared by both dataframes
# trips.loc[trips['tripid'] == 4093258]

mask = stops['stopid'].isin(shared_ids)

In [13]:
# See first 10 stations that are not shared

stops.loc[~mask].head(10)

Unnamed: 0,stopid,address,lat,lng,routes,operator
1129,1364,South Circular Road,53.332322,-6.28801,"[68, 68A, 122]",bac
4404,210361,Black Ash Park,51.87861,-8.466182,[213],BE
4435,7220,Hillcrest Pk,53.38891,-6.275267,[11],bac
4635,7567,Burton Hall Rd,53.273159,-6.204346,[47],bac
4680,7665,Poolbeg Street,53.346823,-6.256774,"[68X, 68, 68A, 69, 69X]",bac
4682,7667,Barnhill Rd,53.276894,-6.119152,[7D],bac
4683,7668,Charleston Road,53.324137,-6.257887,[18],bac
4684,100011,Porthall (Doherys Shop),54.872772,-7.465057,[489],BE
4685,100031,Porthall (Opp Doherys Shop),54.872706,-7.464902,[489],BE
4686,100041,Dublin (Clare St Cafe Sol),53.341442,-6.251556,"[X2, 2]",BE


In [14]:
# Unique operator values in stops

print(pd.unique(stops['operator'].ravel().tolist()))

['bac' 'BE' 'ir' 'LUAS']


In [15]:
# Find stops which are in in stops but not in oz_stops

not_in_oz = stops.loc[~mask & (stops['operator'] == 'bac')].head(5)
not_in_oz

Unnamed: 0,stopid,address,lat,lng,routes,operator
1129,1364,South Circular Road,53.332322,-6.28801,"[68, 68A, 122]",bac
4435,7220,Hillcrest Pk,53.38891,-6.275267,[11],bac
4635,7567,Burton Hall Rd,53.273159,-6.204346,[47],bac
4680,7665,Poolbeg Street,53.346823,-6.256774,"[68X, 68, 68A, 69, 69X]",bac
4682,7667,Barnhill Rd,53.276894,-6.119152,[7D],bac


In [16]:
# Stops that are in oz_stops but not in stops
# Note: these stops do not exist when looked up online!

oz_mask = oz_stops['stop_id'].isin(shared_ids)
not_in_stops = oz_stops.loc[~oz_mask]
not_in_stops

Unnamed: 0,stop_id,stop_name,stop_lon,stop_lat
228,313,"Dublin, Lower Liffey Street",-6.261972,53.346907
1639,5190,"Dublin City South, Shelter outside Screen Cinema",-6.256946,53.346034
1674,6207,"Dublin, Abbey Theatre",-6.257552,53.348496
1756,7513,"Arbour Hill, Phoenix Park Gate",-6.296795,53.350896
1779,7615,"Rotunda, Rotunda Hospital",-6.263938,53.352508
1785,7491,Busaras Virtual Stops,-6.259879,53.34853
3496,7592,"Skerries, Northcliffe Heights",-6.124463,53.583271
3623,2038,"Dun Laoghaire, Crofton Road",-6.134711,53.294821


In [17]:
# change name of oz_stop id to stopid to allow merge

oz_stops = oz_stops.rename(columns={'stop_id': 'stopid'})
oz_stops.head(1)

Unnamed: 0,stopid,stop_name,stop_lon,stop_lat
0,7612,Davenport Hotel Merrion Street,-6.250529,53.341347


In [18]:
# Convert all oz_stop ids to string format

oz_stops['stopid'] = oz_stops['stopid'].apply(lambda x: str(x))

In [19]:
# Add stop name from oz_stops as column in stops.

merged_stops = pd.merge(stops, oz_stops, on='stopid', how='inner')

In [20]:
merged_stops.head(5)

Unnamed: 0,stopid,address,lat,lng,routes,operator,stop_name,stop_lon,stop_lat
0,2,Parnell Square,53.352241,-6.263695,"[38B, 38D, 38, 38A, 46A, 46E]",bac,"Rotunda, Parnell Square West",-6.263723,53.352244
1,3,Parnell Square,53.352307,-6.263783,"[122, 120]",bac,"Rotunda, Granby Place",-6.263811,53.352309
2,4,Parnell Square,53.352567,-6.264166,"[7, 7B, 9, 7D, 7A]",bac,"Rotunda, Rotunda Hospital",-6.264175,53.352575
3,6,Parnell Square,53.352744,-6.264443,[4],bac,"Rotunda, Saint Martin's Chapel",-6.264454,53.352749
4,7,Parnell Square,53.352836,-6.264562,"[40, 140, 40D, 40B, 13]",bac,"Rotunda, Rotunda Hospital",-6.26457,53.352841


In [21]:
# Select desired columns

new_stops = merged_stops[['stopid', 'stop_name', 'lat', 'lng', 'routes']]
new_stops = new_stops.rename(columns={'stop_name': 'address'})
new_stops.head(5)

Unnamed: 0,stopid,address,lat,lng,routes
0,2,"Rotunda, Parnell Square West",53.352241,-6.263695,"[38B, 38D, 38, 38A, 46A, 46E]"
1,3,"Rotunda, Granby Place",53.352307,-6.263783,"[122, 120]"
2,4,"Rotunda, Rotunda Hospital",53.352567,-6.264166,"[7, 7B, 9, 7D, 7A]"
3,6,"Rotunda, Saint Martin's Chapel",53.352744,-6.264443,[4]
4,7,"Rotunda, Rotunda Hospital",53.352836,-6.264562,"[40, 140, 40D, 40B, 13]"


In [22]:
# Add stops which were not in oz_stops but were in stops

not_in_oz = not_in_oz[['stopid', 'address', 'lat', 'lng', 'routes']]
new_stops = new_stops.append(not_in_oz)
new_stops.head(5)

Unnamed: 0,stopid,address,lat,lng,routes
0,2,"Rotunda, Parnell Square West",53.352241,-6.263695,"[38B, 38D, 38, 38A, 46A, 46E]"
1,3,"Rotunda, Granby Place",53.352307,-6.263783,"[122, 120]"
2,4,"Rotunda, Rotunda Hospital",53.352567,-6.264166,"[7, 7B, 9, 7D, 7A]"
3,6,"Rotunda, Saint Martin's Chapel",53.352744,-6.264443,[4]
4,7,"Rotunda, Rotunda Hospital",53.352836,-6.264562,"[40, 140, 40D, 40B, 13]"


In [23]:
# Check that new_stops table contains stops which were in stops but not in oz_stops

new_stops.loc[new_stops['stopid'] == '7567']

Unnamed: 0,stopid,address,lat,lng,routes
4635,7567,Burton Hall Rd,53.273159,-6.204346,[47]


In [29]:
# Make sure there are no duplicate keys

ids = new_stops['stopid']
new_stops[ids.isin(ids[ids.duplicated()])]

Unnamed: 0,stopid,address,lat,lng,routes
4473,7270,Hillcrest Park (Glasnevin Park),53.107182,-6.491846,[65]
4474,7270,"County Wicklow, Valleymount Road",53.107182,-6.491846,[65]


In [34]:
# It appears that this stop comes up twice in oz's file. However, stop 7270 is not a real stop when looked up online.

oz_stops.loc[oz_stops['stopid'] == '7270']

Unnamed: 0,stopid,stop_name,stop_lon,stop_lat
1715,7270,Hillcrest Park (Glasnevin Park),-6.275358,53.388878
4666,7270,"County Wicklow, Valleymount Road",-6.491875,53.107188


In [36]:
# Remove these stops.

new_stops = new_stops.drop(new_stops.index[[4473,4474]])

In [37]:
# Push table to database

new_stops.to_sql('main_stops', engine, if_exists='append', index=False)

2018-07-05 13:34:00,657 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2018-07-05 13:34:00,658 INFO sqlalchemy.engine.base.Engine {'name': 'main_stops'}
2018-07-05 13:34:00,669 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2018-07-05 13:34:00,730 INFO sqlalchemy.engine.base.Engine INSERT INTO main_stops (stopid, address, lat, lng, routes) VALUES (%(stopid)s, %(address)s, %(lat)s, %(lng)s, %(routes)s)
2018-07-05 13:34:00,733 INFO sqlalchemy.engine.base.Engine ({'stopid': '2', 'address': 'Rotunda, Parnell Square West', 'lat': 53.35224111, 'lng': -6.263695, 'routes': ['38B', '38D', '38', '38A', '46A', '46E']}, {'stopid': '3', 'address': 'Rotunda, Granby Place', 'lat': 53.35230694, 'lng': -6.26378306, 'routes': ['122', '120']}, {'stopid': '4', 'address': 'Rotunda, Rotunda Hospital', 'lat': 53.35256694, 'lng': -6.26416611, 'routes': ['7', '7B', '9', '7D', '7A