In [108]:
from IPython.display import display, HTML
import pandas as pd
import sqlite3
from sqlite3 import Error

def create_connection(db_file, delete_db=False):
    import os
    if delete_db and os.path.exists(db_file):
        os.remove(db_file)

    conn = None
    try:
        conn = sqlite3.connect(db_file)
        conn.execute("PRAGMA foreign_keys = 1")
    except Error as e:
        print(e)

    return conn


def create_table(conn, create_table_sql):
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
def execute_sql_statement(sql_statement, conn):
    cur = conn.cursor()
    cur.execute(sql_statement)

    rows = cur.fetchall()

    return rows


In [111]:
# Delete old database file

normalized_database_filename = 'normalized2.db'
conn = create_connection(normalized_database_filename, delete_db=True)


In [112]:
def create_origin_table(data_filename, normalized_database_filename):
    conn = create_connection(normalized_database_filename)
    
    create_table_origin = '''CREATE TABLE IF NOT EXISTS Origin ( 
                         [OriginID] Integer not null primary key,
                         [Origin] Text not null);
                         ''' 
    
    row_count = 0
    origins = []
    with open(data_filename) as file:
        
        for line in file:
            if row_count ==0:
                columns = (line.strip().split(","))
                row_count+=1
            else:
                origins.append(line.strip().split(",")[1].strip())
                row_count+=1
        
        origins_unique = sorted(set(origins))
        origins_final = list(set(zip(range(1,len(origins_unique)+1),origins_unique)))
        origins_final=sorted(origins_final)
        
    with conn:
        create_table(conn, create_table_origin)
        insert_Origin(conn,origins_final)
        
def insert_Origin(conn,values):
        
        sql_insert_origin = """INSERT INTO Origin(OriginID,Origin)
                                VALUES(?,?)"""
        curr = conn.cursor()
        curr.executemany(sql_insert_origin,values)
        return curr.lastrowid

In [113]:
data_filename = "Sample_data.csv"
normalized_database_filename = 'normalized2.db'
create_origin_table(data_filename, normalized_database_filename)

In [114]:
def origin_to_originid_dictionary(normalized_database_filename):
    
    
    # YOUR CODE HERE
    origins = execute_sql_statement("SELECT OriginID,Origin FROM Origin",conn)
    origin_dict = dict()
    for item in origins:
        origin_dict[item[1]] = item[0]
        
    return origin_dict

In [97]:
#execute_sql_statement('DROP TABLE TRIPS',conn)

[]

In [115]:
def create_destination_table(data_filename, normalized_database_filename):
    conn = create_connection(normalized_database_filename)
    
    create_table_destinations = '''CREATE TABLE IF NOT EXISTS Destination ( 
                                 [DestinationID] Integer not null primary key,
                                 [Destination] Text not null);
                         ''' 
    
    row_count = 0
    destinations = []

    with open(data_filename) as file:
        
        for line in file:
            if row_count ==0:
                columns = (line.strip().split(","))
                row_count+=1
            else:
                destinations.append(line.strip().split(",")[2].strip())
                row_count+=1
        
        destinations_unique = sorted(set(destinations))
        destinations_final = list(set(zip(range(1,len(destinations_unique)+1),destinations_unique)))
        destinations_final=sorted(destinations_final)
        print(destinations_final)
    with conn:
        create_table(conn, create_table_destinations)
        insert_destinations(conn,destinations_final)
        
def insert_destinations(conn,values):
        
        sql_insert_destinations = """INSERT INTO Destination(DestinationID,Destination)
                                VALUES(?,?)"""
        curr = conn.cursor()
        curr.executemany(sql_insert_destinations,values)
        return curr.lastrowid

In [116]:
data_filename = "Sample_data.csv"
normalized_database_filename = 'normalized2.db'
create_destination_table(data_filename, normalized_database_filename)

[(1, '1'), (2, '10'), (3, '11'), (4, '12'), (5, '13'), (6, '14'), (7, '15'), (8, '16'), (9, '18'), (10, '2A'), (11, '2B'), (12, '2C'), (13, '3A'), (14, '3B'), (15, '3C'), (16, '4A'), (17, '4B'), (18, '4C'), (19, '5A'), (20, '5B'), (21, '5C'), (22, '6A'), (23, '6B'), (24, '7A'), (25, '7B'), (26, '7C'), (27, '8'), (28, '9'), (29, 'NULL')]


In [117]:
def destination_to_destinationid_dictionary(normalized_database_filename):
    
    
    # YOUR CODE HERE
    destination = execute_sql_statement("SELECT DestinationID,Destination FROM Destination",conn)
    destination_dict = dict()
    for item in destination:
        destination_dict[item[1]] = item[0]
        
    return destination_dict

In [132]:
def create_trips_table(data_filename, normalized_database_filename):
    conn = create_connection(normalized_database_filename)
    create_table_Trips_sql = """ CREATE TABLE IF NOT EXISTS TRIPS (
                                [TripID] INTEGER NOT NULL PRIMARY KEY,
                                [pickup_datetime] TEXT NOT NULL,
                                [trip_distance] FLOAT NOT NULL,
                                [trip_duration] TEXT NOT NULL,
                                FOREIGN KEY(OriginID) REFERENCES Origin(OriginID),
                                FOREIGN KEY(DestinationID) REFERENCES Destination(DestinationID));"""  
    
    trips = []
    row_count = 0
    destination_dict = destination_to_destinationid_dictionary(normalized_database_filename)
    origin_dict = origin_to_originid_dictionary(normalized_database_filename)
    
    with open(data_filename) as file:
        for line in file:
            if row_count ==0:
                columns = (line.strip().split(","))
                row_count+=1
            else:
                a = line.strip().split(',')
                values = (a[3],a[4],a[5],origin_dict[a[1]],destination_dict[a[2]])
                trips.append(values)
                row_count+=1

    with conn:
        create_table(conn, create_table_Trips_sql)
        insert_Trip(conn,trips)

    
def insert_Trip(conn, values):
    sql = '''INSERT INTO TRIPS(pickup_datetime, trip_distance,trip_duration,OriginID,DestinationID)
          VALUES(?,?,?,?,?) '''
    cur = conn.cursor()
    cur.executemany(sql, values)
    return cur.lastrowid

In [133]:
data_filename = "Sample_data.csv"
normalized_database_filename = 'normalized2.db'
create_trips_table(data_filename, normalized_database_filename)

unknown column "OriginID" in foreign key definition


OperationalError: no such table: TRIPS

In [123]:
data_filename = "Sample_data.csv"
normalized_database_filename = 'normalized2.db'
conn = create_connection(normalized_database_filename)
create_table_Trips_sql = """ CREATE TABLE IF NOT EXISTS TRIPS (
                                [TripID] INTEGER NOT NULL PRIMARY KEY,
                                [pickup_datetime] TEXT NOT NULL,
                                [trip_distance] FLOAT NOT NULL,
                                [trip_duration] TEXT NOT NULL,
                                FOREIGN KEY(OriginID) REFERENCES Origin(OriginID),
                                FOREIGN KEY(DestinationID) REFERENCES Destination(DestinationID));"""  

    
with conn:
        create_table(conn, create_table_Trips_sql)

unknown column "OriginID" in foreign key definition
