## Library

In [118]:
import os, math
import mysql.connector as MariaDB
import pandas as pd
from jinjasql import JinjaSql
from six import string_types
from copy import deepcopy
from random import random
from datetime import datetime
from sqlalchemy import create_engine

## DB Connection

In [124]:
user = os.environ.get("USER")
pw = os.environ.get('PASSWORD')

In [151]:
con = MariaDB.connect(
    host="127.0.0.1",
    user=user,
    passwd=pw,
    db="DB_TEST",
    port=3336
    )

cursor = con.cursor()

In [152]:
conn_string = f'{"mysql+pymysql://{user}:{pw}@127.0.0.1/DB_TEST"}'

engine = create_engine(conn_string, echo=False)
engine.connect()

OperationalError: (pymysql.err.OperationalError) (1045, "Access denied for user 'mbrugnar'@'localhost' (using password: YES)")
(Background on this error at: http://sqlalche.me/e/13/e3q8)

## Funcions and SQL Templates

#### Jinja Templates

In [3]:
_SIMPLE_SELECT_TEMPLATE = \
    """
    {% if show_columns %}
        {% set columns = '\n    ,'.join(show_columns) %}
    {% endif %}
    SELECT
        {% if columns %}
            {{ columns | sqlsafe }}
        {% else %}
            * 
        {% endif %}
    FROM 
        {{ table | sqlsafe }}
    """

#### Functions

* Functions to work with Jinja Templates

In [4]:
def quote_sql_string(value):
    if isinstance(value, string_types):
        new_value = str(value)
        new_value = new_value.replace("'", "''")
        return "'{}'".format(new_value)
    return value

def get_sql_from_template(query, bind_params):
    if not bind_params:
        return query
    params = deepcopy(bind_params)
    for key, val in params.items():
        params[key] = quote_sql_string(val)
    return query % params

def apply_sql_template(template, parameters):
    j = JinjaSql(param_style='pyformat')
    query, bind_params = j.prepare_query(template, parameters)
    return get_sql_from_template(query, bind_params)

def get_select_table_sql(table, show_columns=None):    
    data = {
        "table": table,
        "show_columns": show_columns
    }
    return apply_sql_template(_SIMPLE_SELECT_TEMPLATE, data)



* Functions to generate data

In [22]:
def generate_rand_number(min, max):
    return math.floor((random()*(max-min))+min)

def get_rand_value(df, column):
    total_values = df[column].count()
    position = generate_rand_number(0, total_values-1)
    return df.iloc[position][column]
    
def get_rand_value(dict, column):
    total_values = list(dict.keys())[-1]
    key = generate_rand_number(0, total_values)
    return dict[key][column]

def get_rand_key(dict):
    total_values = list(dict.keys())[-1]
    return generate_rand_number(0, total_values)

## Get data from Database Tables

In [6]:
tables = []
cursor.execute('show tables')

for row in cursor:
    tables.append(row[0])
    
tables

['itens_notas_fiscais',
 'notas_fiscais',
 'tabela_de_clientes',
 'tabela_de_produtos',
 'tabela_de_vendedores']

In [7]:
query_items_notes = get_select_table_sql("itens_notas_fiscais")
query_notes       = get_select_table_sql("notas_fiscais")
query_customers   = get_select_table_sql("tabela_de_clientes")
query_products    = get_select_table_sql("tabela_de_produtos")
query_sellers     = get_select_table_sql("tabela_de_vendedores")

In [8]:
df_items_notes = pd.read_sql(query_items_notes, con)
df_notes       = pd.read_sql(query_notes, con)
df_customers   = pd.read_sql(query_customers, con)
df_products    = pd.read_sql(query_products, con)
df_sellers     = pd.read_sql(query_sellers, con)

In [9]:
dict_items_notes = pd.read_sql(query_items_notes, con).to_dict('index')
dict_notes       = pd.read_sql(query_notes, con).to_dict('index')
dict_customers   = pd.read_sql(query_customers, con).to_dict('index')
dict_products    = pd.read_sql(query_products, con).to_dict('index')
dict_sellers     = pd.read_sql(query_sellers, con).to_dict('index')

## Generate Data

In [77]:
def generate_new_data(maxNotes, maxItens, maxQuantity):
    date_sale  = datetime.today().strftime('%Y-%m-%d')
    last_note  = df_notes['NUMERO'].max()
    tax_median = df_notes['IMPOSTO'].median()
    new_notes = {}
    new_items_notes = {}

    for i in range(maxNotes):
        number_note = last_note + i
        customer_cpf = get_rand_value(dict_customers, 'CPF')
        seller_id = get_rand_value(dict_sellers, 'MATRICULA')
        new_notes[number_note] = [customer_cpf, seller_id, date_sale, number_note, tax_median]

        for i in range(maxItens):
            key = get_rand_key(dict_customers)
            product_code = dict_products[key]['CODIGO_DO_PRODUTO']
            item_key = f'{number_note}{product_code}'

            if item_key not in new_items_notes:
                product_price = dict_products[key]['PRECO_DE_LISTA']
                quantity = generate_rand_number(0, maxQuantity)
                new_items_notes[item_key] = [number_note, product_code, quantity, product_price]
                
    return new_notes, new_items_notes

In [108]:
 new_notes, new_items_notes = generate_new_data(100000, 1, 10)

100000

## Load new data into DB

In [109]:
df_new_itens_notes = pd.DataFrame.from_dict(new_items_notes, orient='index', columns=df_items_notes.columns).reset_index(drop=True)
df_new_notes = pd.DataFrame.from_dict(new_notes, orient='index', columns=df_notes.columns).reset_index(drop=True)

In [117]:
df_new_notes.head()

Unnamed: 0,CPF,MATRICULA,DATA_VENDA,NUMERO,IMPOSTO
0,19290992743,237,2022-05-26,125865,0.109984
1,50534475787,237,2022-05-26,125866,0.109984
2,5648641702,236,2022-05-26,125867,0.109984
3,8502682733,236,2022-05-26,125868,0.109984
4,7771579779,235,2022-05-26,125869,0.109984


In [105]:
df_new_itens_notes.head()

Unnamed: 0,NUMERO,CODIGO_DO_PRODUTO,QUANTIDADE,PRECO
0,125865,1041119,6,4.904
1,125866,1096818,8,7.7105
2,125867,1101035,0,9.0105
3,125868,1000889,3,6.309
4,125869,1037797,6,16.008


In [116]:
df_new_notes.to_sql('notas_fiscais', con=con, index=False)

DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': Not all parameters were used in the SQL statement