Lines table must follow this schema:

```
class Lines(models.Model):
    lineid = models.CharField(max_length=10, primary_key=True)
    routes = ArrayField(models.CharField(max_length=10))

    def __str__(self):
        return "Line ID: "+self.lineid
    
    class Meta:
        verbose_name_plural = "Lines"
        indexes = [
            models.Index(fields=['lineid'],)
        ]
```

In [1]:
import pandas as pd

# Prevents tables from being truncated.
pd.set_option('display.max_columns', None)

In [2]:
# Read trips text files into pandas dataframe
df_16 = pd.read_csv("/home/student/files/rt_trips_2016_I_DB.txt", sep=';')
df_17 = pd.read_csv("/home/student/files/rt_trips_2017_I_DB.txt", sep=';')

In [3]:
# Concat dataframes into one

trips = pd.concat([df_16, df_17], join="inner")

In [4]:
# Look at example of routes associated with lineid

trips.loc[trips['lineid'] == '39A'].head(5)

Unnamed: 0,datasource,dayofservice,tripid,lineid,routeid,direction,plannedtime_arr,plannedtime_dep,actualtime_arr,actualtime_dep,basin,tenderlot,suppressed,justificationid,lastupdate,note
11,DB,09-FEB-16 00:00:00,2818069,39A,39A_41,1,85769,84600,85576.0,84452.0,BasDef,,,,12-APR-16 09:35:20,",2428241,"
53,DB,09-FEB-16 00:00:00,2814946,39A,39A_40,1,52081,46800,52531.0,46832.0,BasDef,,,,12-APR-16 09:35:20,",2426175,"
77,DB,09-FEB-16 00:00:00,2826868,39A,39A_43,2,49850,45000,49988.0,45038.0,BasDef,,,,12-APR-16 09:35:20,",2425929,"
82,DB,09-FEB-16 00:00:00,2815902,39A,39A_43,2,55850,51000,56066.0,51003.0,BasDef,,,,12-APR-16 09:35:20,",2416634,"
167,DB,09-FEB-16 00:00:00,2823891,39A,39A_43,2,73499,69600,73415.0,69598.0,BasDef,,,,12-APR-16 09:35:20,",2419757,"


In [5]:
trips = trips[['lineid', 'routeid']]
trips.head(3)

Unnamed: 0,lineid,routeid
0,145,145_105
1,9,9_7
2,54A,54A_12


In [6]:
# Group routeids into sets associated with single lineids (sets to avoid duplicate routeid values)

trips = trips.groupby('lineid')['routeid'].apply(set)

In [7]:
trips.head(10)

lineid
1                   {1_40, 1_38, 1_37, 1_41, 1_39}
102                         {102_8, 102_10, 102_9}
104                               {104_15, 104_16}
11                    {11_42, 11_40, 11_43, 11_41}
111    {111_9, 111_10, 111_4, 111_3, 111_7, 111_8}
114                                 {114_5, 114_6}
116                                 {116_1, 116_3}
118                                        {118_3}
120                  {120_10, 120_7, 120_8, 120_9}
122               {122_17, 122_14, 122_16, 122_15}
Name: routeid, dtype: object

In [10]:
# 125 unique lines

trips.shape

(125,)

In [11]:
# Reset index to convert groupby to normally indexed dataframe

trips = pd.DataFrame(trips).reset_index()
trips.head(5)

Unnamed: 0,lineid,routeid
0,1,"{1_40, 1_38, 1_37, 1_41, 1_39}"
1,102,"{102_8, 102_10, 102_9}"
2,104,"{104_15, 104_16}"
3,11,"{11_42, 11_40, 11_43, 11_41}"
4,111,"{111_9, 111_10, 111_4, 111_3, 111_7, 111_8}"


In [12]:
# Convert sets to lists

trips['routeid'] = trips['routeid'].apply(list)

In [13]:
trips.head(5)

Unnamed: 0,lineid,routeid
0,1,"[1_40, 1_38, 1_37, 1_41, 1_39]"
1,102,"[102_8, 102_10, 102_9]"
2,104,"[104_15, 104_16]"
3,11,"[11_42, 11_40, 11_43, 11_41]"
4,111,"[111_9, 111_10, 111_4, 111_3, 111_7, 111_8]"


In [14]:
# print one of the routeid arrays to ensure strings

print(trips.loc[trips['lineid'] == '39A']['routeid'].values[0])

['39A_41', '39A_42', '39A_40', '39A_44', '39A_43', '39A_45']


In [15]:
# Rename columns

trips.columns = ['lineid', 'routes']

In [18]:
# Save as CSV file

# trips.to_csv('Lines.csv', sep=';', index=False)

In [17]:
# Create engine to send data to database
# make sure tunnel has been created on port 5433 before running

from sqlalchemy import create_engine

URI="localhost"
PORT="5432"
DB = "jetaDb"
USER = "postgres"
PASSWORD = "00001234"
  
engine = create_engine("postgresql://{}:{}@{}:{}/{}"
                       .format(USER, PASSWORD, URI, PORT, DB), echo=True)

trips.to_sql('main_lines', engine, if_exists='append', index=False)

2018-06-29 11:01:15,370 INFO sqlalchemy.engine.base.Engine select version()
2018-06-29 11:01:15,373 INFO sqlalchemy.engine.base.Engine {}
2018-06-29 11:01:15,383 INFO sqlalchemy.engine.base.Engine select current_schema()
2018-06-29 11:01:15,386 INFO sqlalchemy.engine.base.Engine {}
2018-06-29 11:01:15,393 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2018-06-29 11:01:15,396 INFO sqlalchemy.engine.base.Engine {}
2018-06-29 11:01:15,402 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2018-06-29 11:01:15,403 INFO sqlalchemy.engine.base.Engine {}
2018-06-29 11:01:15,410 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2018-06-29 11:01:15,411 INFO sqlalchemy.engine.base.Engine {}
2018-06-29 11:01:15,416 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
20