In [10]:
import pandas as pd
import numpy as np
import sqlite3 as db

In [11]:
%pwd

'/Users/manulabricole/Documents/CDN/BDD'

# Utils

### Import des data

In [12]:
routes_df=pd.read_csv('./SEM-GTFS/routes.txt')
stops_df=pd.read_csv('./SEM-GTFS/stops.txt')
stop_times_df=pd.read_csv('./SEM-GTFS/stop_times.txt')
agency_df=pd.read_csv('./SEM-GTFS/agency.txt')
trips_df=pd.read_csv('./SEM-GTFS/trips.txt')

### Get infos

In [17]:
def get_infos(db_file):
    conn = db.connect(db_file)
    cur = conn.cursor()
    
    # Retrieve the names of all tables in the database
    cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
    tables = [table[0] for table in cur.fetchall()]
    
    # Retrieve the columns in each table
    columns = {}
    for table in tables:
        cur.execute("PRAGMA table_info({})".format(table))
        columns[table] = [column[1] for column in cur.fetchall()]
    
    cur.close()
    conn.close()
    print("°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°")
    print("------------------------------ TABLES --------------------------------")
    print("")
    print("Tables:", tables)
    print("")
    print("------------------------------ COLUMNS -------------------------------")
    for table in tables:
        print("")
        print(f"----> {table} <----")
        print("Columns --> ", columns[table])
    print("°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°")

### Query the DB

In [26]:
# The name has to be in the same folder as the notebook
def query_db(name, query):
    
    conn = db.connect(name)
    cur = conn.cursor()
    cur.execute(query)
    column_names = [description[0] for description in cur.description]
    results = cur.fetchall()
    cur.close()
    conn.close()
    
    #df_results = pd.DataFrame(results, columns=column_names)

    return df_results

# I - Implémenter une fonction qui génère une commande insertion SQL

In [27]:
db_name = "gtfs_tag.db"

In [46]:
get_infos(db_name)

°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°
------------------------------ TABLES --------------------------------

Tables: ['agency', 'route', 'trip', 'stop', 'stop_time', 'calendar']

------------------------------ COLUMNS -------------------------------

----> agency <----
Columns -->  ['agency_id', 'agency_name', 'agency_url', 'agency_timezone', 'agency_lang', 'agency_phone', 'agency_fare_url', 'agency_email']

----> route <----
Columns -->  ['route_id', 'agency_id', 'route_short_name', 'route_long_name', 'route_desc', 'route_type', 'route_url', 'route_color', 'route_text_color']

----> trip <----
Columns -->  ['trip_id', 'route_id', 'service_id', 'trip_headsign', 'trip_short_name', 'direction_id', 'block_id', 'shape_id']

----> stop <----
Columns -->  ['stop_id', 'stop_name', 'stop_lat', 'stop_lon', 'stop_desc', 'zone_id', 'stop_url', 'location_type', 'parent_station', 'wheelchair_boarding']

----> stop_t

### Generate dict of data to insert

In [47]:
#Transformation des fichiers en dictionnaires
routes_dic=routes_df.to_dict(orient='dict')
stops_dic=stops_df.to_dict(orient='dict')
stop_times_dic=stop_times_df.to_dict(orient='dict')
agency_dic=agency_df.to_dict(orient='dict')
trips_dic=trips_df.to_dict(orient='dict')

In [68]:
# Fonction transformation données mais pas création de fichier sql
def gen_insert_query(table_name:str, a_dict:dict) -> str:
    #table_name= stops_insert.sql par exple
    #df --> attention à gérer les NaN
    # https://sql.sh/cours/insert-into
    # INSERT INTO table-name (column-names)  VALUES (values) 

    insert_query_tpl = "INSERT INTO {} {} VALUES {} "
    keys, values = zip(*a_dict.items())
    compiled_query = insert_query_tpl.format(table_name, keys, values)
    
    return compiled_query

In [69]:
query = gen_insert_query("route", routes_dic)

In [70]:
query

'INSERT INTO route (\'agency_id\', \'route_id\', \'route_short_name\', \'route_long_name\', \'route_type\', \'route_color\', \'route_text_color\') VALUES ({0: \'SEM\', 1: \'SEM\', 2: \'SEM\', 3: \'SEM\', 4: \'SEM\', 5: \'SEM\', 6: \'SEM\', 7: \'SEM\', 8: \'SEM\', 9: \'SEM\', 10: \'SEM\', 11: \'SEM\', 12: \'SEM\', 13: \'SEM\', 14: \'SEM\', 15: \'SEM\', 16: \'SEM\', 17: \'SEM\', 18: \'SEM\', 19: \'SEM\', 20: \'SEM\', 21: \'SEM\', 22: \'SEM\', 23: \'SEM\', 24: \'SEM\', 25: \'SEM\', 26: \'SEM\', 27: \'SEM\', 28: \'SEM\', 29: \'SEM\', 30: \'SEM\', 31: \'SEM\', 32: \'SEM\', 33: \'SEM\', 34: \'SEM\', 35: \'SEM\', 36: \'SEM\', 37: \'SEM\', 38: \'SEM\', 39: \'SEM\', 40: \'SEM\', 41: \'SEM\', 42: \'SEM\', 43: \'SEM\', 44: \'SEM\', 45: \'SEM\', 46: \'SEM\', 47: \'SEM\', 48: \'SEM\', 49: \'SEM\', 50: \'SEM\', 51: \'SEM\', 52: \'SEM\', 53: \'SEM\', 54: \'SEM\'}, {0: \'1\', 1: \'2\', 2: \'3\', 3: \'4\', 4: \'5\', 5: \'6\', 6: \'7\', 7: \'12\', 8: \'13\', 9: \'14\', 10: \'15\', 11: \'16\', 12: \'19\'

# II - Implémenter une fonction qui génère des commandes d’insertion SQL

### Function to create the SQL query to insert

"INSERT INTO agency ('agency_id', 'agency_name', 'agency_url', 'agency_timezone', 'agency_lang', 'agency_phone') VALUES ({0: 'SEM'}, {0: 'Mobilités M - Tag'}, {0: 'https://www.mobilites-m.fr/'}, {0: 'Europe/Paris'}, {0: 'FR'}, {0: 438703870}) "

### Function generating a list of query based on a dataframe

In [43]:
def get_insert_queries(tablename:str, df: pd.DataFrame) -> list:
    return 

# III - Implémenter une procédure qui crée un fichier SQL

In [34]:
#Fonction pour mettre en forme .txt puis création fichier sql
def gen_insert_file(filename,tablename,df):
    with open(filename,'w') as file:
        file.write("BEGIN;") #BEGIN et COMMIT sont à ajouter pour optimiser le process/plus rapide
        for i in range(len(df)):
            line=df.iloc[i].dropna()
            query= "INSERT INTO {} {} VALUES {};\n ".format(tablename, tuple(line.index), tuple(line.to_list()))
            file.write(query)
        file.write("COMMIT;")
    return
#Pour import dans DB --> terminal : sqlite3 gtfs_tag.db < routes.sql