## Library

In [223]:
import os, math
import mysql.connector as MariaDB
import pandas as pd
from jinjasql import JinjaSql
from six import string_types
from copy import deepcopy
from random import random
from datetime import datetime

## DB Connection

In [94]:
user = os.environ.get("USER")
pw = os.environ.get('PASSWORD')

con = MariaDB.connect(
    host="127.0.0.1",
    user=user,
    passwd=pw,
    db="DB_TEST",
    port=3336
    )

cursor = con.cursor()

## Funcions and SQL Templates

#### Jinja Templates

In [92]:
_SIMPLE_SELECT_TEMPLATE = \
    """
    {% if show_columns %}
        {% set columns = '\n    ,'.join(show_columns) %}
    {% endif %}
    SELECT
        {% if columns %}
            {{ columns | sqlsafe }}
        {% else %}
            * 
        {% endif %}
    FROM 
        {{ table | sqlsafe }}
    """

#### Functions

* Functions to work with Jinja Templates

In [60]:

def quote_sql_string(value):
    if isinstance(value, string_types):
        new_value = str(value)
        new_value = new_value.replace("'", "''")
        return "'{}'".format(new_value)
    return value

def get_sql_from_template(query, bind_params):
    if not bind_params:
        return query
    params = deepcopy(bind_params)
    for key, val in params.items():
        params[key] = quote_sql_string(val)
    return query % params

def apply_sql_template(template, parameters):
    j = JinjaSql(param_style='pyformat')
    query, bind_params = j.prepare_query(template, parameters)
    return get_sql_from_template(query, bind_params)

def get_select_table_sql(table, show_columns=None):    
    data = {
        "table": table,
        "show_columns": show_columns
    }
    return apply_sql_template(_SIMPLE_SELECT_TEMPLATE, data)



* Functions to generate data

In [237]:
def generate_rand_number(min, max):
    return math.floor((random()*(max-min))+min)

def get_rand_value(df, column):
    total_values = df[column].count()
    position = generate_rand_number(0, total_values-1)
    return df.iloc[position][column]
    

## Database Tables to DF

In [96]:
tables = []
cursor.execute('show tables')

for row in cursor:
    tables.append(row[0])
    
tables

In [98]:
query_items_notes = get_select_table_sql("itens_notas_fiscais")
query_notes       = get_select_table_sql("notas_fiscais")
query_customers   = get_select_table_sql("tabela_de_clientes")
query_products    = get_select_table_sql("tabela_de_produtos")
query_sellers     = get_select_table_sql("tabela_de_vendedores")

df_items_notes = pd.read_sql(query_items_notes, con)
df_notes       = pd.read_sql(query_notes, con)
df_customers   = pd.read_sql(query_customers, con)
df_products    = pd.read_sql(query_products, con)
df_sellers     = pd.read_sql(query_sellers, con)


## Generate Data

In [240]:
new_notes  = pd.DataFrame(columns=df_notes.columns)
new_items  = pd.DataFrame(columns=df_items_notes.columns)
date_sale  = datetime.today().strftime('%Y-%m-%d')
last_note  = df_notes['NUMERO'].max()
tax_median = df_notes['IMPOSTO'].median()

for i in range(100000):
    note_number = last_note + i
    customer_cpf = get_rand_value(df_customers, 'CPF')
    seller_id = get_rand_value(df_sellers, 'MATRICULA')
    new_notes.loc[len(new_notes)] = [customer_cpf, seller_id, date_sale, note_number, tax_median]
    
#product_code = get_rand_value(df_products, 'CODIGO_DO_PRODUTO')

In [239]:
new_notes.head()

Unnamed: 0,CPF,MATRICULA,DATA_VENDA,NUMERO,IMPOSTO
0,2600586709,235,2022-05-25,125865,0.109984
1,5840119709,236,2022-05-25,125866,0.109984
2,8719655770,237,2022-05-25,125867,0.109984
3,5648641702,237,2022-05-25,125868,0.109984
4,492472718,235,2022-05-25,125869,0.109984
