In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)  # es para enseñar todas las columnas del df
# pd.set_option('display.max_rows', None)     # es para enseñar todas las filas del df


import warnings
warnings.simplefilter('ignore')   # es para quitar warnings

import mysql.connector as conn
from sqlalchemy import create_engine

In [2]:
# Cargar lo que tengo dentro del .env

import os
from dotenv import load_dotenv
load_dotenv() 

True

## Importo datos

In [3]:
# Importo todos los csv

topologia = pd.read_csv('../data/pasajeros/topologia.csv')
passengers = pd.read_csv('../data/pasajeros/passengers.csv')
passenger_location = pd.read_csv('../data/pasajeros/passenger_location.csv')

## Crear base de datos (transport)

In [4]:
mysql = os.getenv("mysql")

In [5]:

db=conn.connect(host='localhost', user='root', passwd=mysql, database='transport_network')

cursor=db.cursor()

#  str_conn='mysql+pymysql://root:'+mysql+'@localhost:3306/nba_stats'
# motor=create_engine(str_conn)

#### TOPOLOGIA

Red de transporte, joins de todo el estático

In [6]:
topologia.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2693 entries, 0 to 2692
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   route_code     2693 non-null   int64 
 1   route_name     2693 non-null   object
 2   line_code      2693 non-null   int64 
 3   line_name      2693 non-null   object
 4   stop_order     2693 non-null   int64 
 5   stop_code      2693 non-null   int64 
 6   stop_name      2693 non-null   object
 7   stop_code_int  2693 non-null   int64 
 8   latitude       2693 non-null   object
 9   longitude      2693 non-null   object
dtypes: int64(5), object(5)
memory usage: 210.5+ KB


In [7]:
# Crear tabla 'TOPOLOGY'


cursor.execute('drop table if exists TOPOLOGY;')

        
tabla='''

        create table TOPOLOGY (
            
            route_code int, 
            route_name varchar(55),
            line_code int,
            line_name varchar(55),
            stop_order int, 
            stop_code int, 
            stop_name varchar(55),
            stop_code_int int, 
            latitude varchar(55),
            longitude varchar(55)

        );


'''

cursor.execute(tabla)

#### PASSENGERS

Pasajeros en una semana

In [8]:
passengers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1480811 entries, 0 to 1480810
Data columns (total 12 columns):
 #   Column          Non-Null Count    Dtype  
---  ------          --------------    -----  
 0   date            1480811 non-null  object 
 1   bus             1480811 non-null  int64  
 2   machine         1480811 non-null  int64  
 3   driver          1480811 non-null  int64  
 4   settlement      1480811 non-null  object 
 5   ticket_number   1480811 non-null  object 
 6   operation       1480811 non-null  object 
 7   operation_type  1480811 non-null  object 
 8   fare            1480811 non-null  float64
 9   route_code      1480811 non-null  int64  
 10  line_code       1480811 non-null  int64  
 11  stop_code       1480811 non-null  int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 135.6+ MB


In [9]:
# Crear tabla 'PASSENGERS'


cursor.execute('drop table if exists PASSENGERS;')

        
tabla='''

        create table PASSENGERS (
            
            date datetime,
            bus int, 
            machine int,  
            driver int,  
            settlement varchar(20), 
            ticket_number varchar(20), 
            operation varchar(20), 
            operation_type varchar(20), 
            fare float,
            route_code int,  
            line_code int,  
            stop_code int 


        );


'''

cursor.execute(tabla)

#### PASSENGER_LOCATION

Localizaciones de las entradas en cada parada

In [10]:
passenger_location.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1414810 entries, 0 to 1414809
Data columns (total 18 columns):
 #   Column          Non-Null Count    Dtype  
---  ------          --------------    -----  
 0   date            1414810 non-null  object 
 1   bus             1414810 non-null  int64  
 2   machine         1414810 non-null  int64  
 3   driver          1414810 non-null  int64  
 4   settlement      1414810 non-null  object 
 5   ticket_number   1414810 non-null  object 
 6   operation       1414810 non-null  object 
 7   operation_type  1414810 non-null  object 
 8   fare            1414810 non-null  float64
 9   route_code      1414810 non-null  int64  
 10  route_name      1414810 non-null  object 
 11  line_code       1414810 non-null  int64  
 12  line_name       1414810 non-null  object 
 13  stop_code       1414810 non-null  int64  
 14  stop_name       1414810 non-null  object 
 15  stop_order      1414810 non-null  int64  
 16  latitude        1414810 non-null  ob

In [11]:
# Crear tabla 'PASSENGER_LOCATION'


cursor.execute('drop table if exists PASSENGER_LOCATION;')


tabla='''

        create table PASSENGER_LOCATION (
            
            date datetime, 
            bus int,  
            machine int,  
            driver int,  
            settlement varchar(55), 
            ticket_number varchar(55), 
            operation varchar(55), 
            operation_type varchar(55), 
            fare float,
            route_code int,  
            route_name varchar(55), 
            line_code int,  
            line_name varchar(55), 
            stop_code int,  
            stop_name varchar(55), 
            stop_order int,  
            latitude varchar(55), 
            longitude varchar(55)
                        
        );

'''

cursor.execute(tabla)

## Cargar datos a BD (transport)

In [12]:
#  insert into tabla (columnas) values (valores);

def insert(df, table):

    for i in range(len(df)):
        
        insert_query=f"insert into {table} ({','.join(df.columns)}) values {tuple(df.iloc[i].values)};"
    
        cursor.execute(insert_query)
    
    
    db.commit()
    print('Comiteado!')

In [13]:
# Cargo datos a 'TOPOLOGY'

insert(topologia, 'TOPOLOGY')

Comiteado!


In [14]:
# Cargo datos a 'PASSENGERS'

insert(passengers, 'PASSENGERS')

Comiteado!


In [15]:
# Cargo datos a 'PASSENGER_LOCATION'

insert(passenger_location, 'PASSENGER_LOCATION')

Comiteado!
