Basic Imports

In [3]:
from datetime import date
from dotenv import load_dotenv
from sqlalchemy import create_engine
import bs4 as bs
import ftplib
import gzip
import os
import pandas as pd
import psycopg2
import re
import sys
import time
import requests
import urllib.request
import pip
import zipfile

import warnings

warnings.filterwarnings("ignore")

Auxiliary functions

In [4]:
def check_diff(url, file_name):
    '''
    Verifica se o arquivo no servidor existe no disco e se ele tem o mesmo
    tamanho no servidor.
    '''
    if not os.path.isfile(file_name):
        return True # ainda nao foi baixado

    response = requests.head(url)
    new_size = int(response.headers.get('content-length', 0))
    old_size = os.path.getsize(file_name)
    if new_size != old_size:
        os.remove(file_name)
        return True # tamanho diferentes

    return False # arquivos sao iguais

def makedirs(path):
    '''
    cria path caso seja necessario
    '''
    if not os.path.exists(path):
        os.makedirs(path)

def getEnv(env):
    return os.getenv(env)

Getting the directories for the data

In [5]:
local_env = r'C:\Users\Jeremias Junior\Documents\GitHub\gov_data'

dados_rf = 'http://200.152.38.155/CNPJ/'

raw_files = r'C:\Users\Jeremias Junior\Documents\GitHub\gov_data\data\raw_files'
extracted_files = r'C:\Users\Jeremias Junior\Documents\GitHub\gov_data\data\extracted_files'

raw_html = urllib.request.urlopen(dados_rf)
raw_html = raw_html.read()

# Formatar página e converter em string
page_items = bs.BeautifulSoup(raw_html, 'lxml')
html_str = str(page_items)

Files = []
text = '.zip'
for m in re.finditer(text, html_str):
    i_start = m.start()-40
    i_end = m.end()
    i_loc = html_str[i_start:i_end].find('href=')+6
    Files.append(html_str[i_start+i_loc:i_end])

Extracting files

In [None]:
i_l = 0
print('unzipping files')
for l in Files:

    try:
        i_l += 1
        
        print(str(i_l) + ' - ' + l)
        full_path = os.path.join(raw_files, l)
        with zipfile.ZipFile(full_path, 'r') as zip_ref:
            zip_ref.extractall(extracted_files)
    except:
        pass

Setting up files and database

In [6]:
insert_start = time.time()

items = [name for name in os.listdir(extracted_files) if name.endswith('')]

files = dict()

files['empresa'] = list()
files['estabelecimento'] = list()
files['socios'] = list()
files['simples'] = list()
files['cnae'] = list()
files['moti'] = list()
files['munic'] = list()
files['natju'] = list()
files['pais'] = list()
files['quals'] = list()

for i in range(len(items)):

    if items[i].find('EMPRE') > -1:
        files['empresa'].append(items[i])
    if items[i].find('ESTABELE') > -1:
        files['estabelecimento'].append(items[i])
    if items[i].find('SOCIO') > -1:
        files['socios'].append(items[i])
    if items[i].find('SIMPLES') > -1:
        files['simples'].append(items[i])
    if items[i].find('CNAE') > -1:
        files['cnae'].append(items[i])
    if items[i].find('MOTI') > -1:
        files['moti'].append(items[i])
    if items[i].find('MUNIC') > -1:
        files['munic'].append(items[i])
    if items[i].find('NATJU') > -1:
        files['natju'].append(items[i])
    if items[i].find('PAIS') > -1:
        files['pais'].append(items[i])
    if items[i].find('QUALS') > -1:   
        files['quals'].append(items[i])

    else:
        pass


import pyodbc
import sqlalchemy

               
conn = pyodbc.connect(driver='{SQL Server}', 
                      server='(local)', 
                      database='gov_db',               
                      trusted_connection='yes')

cursor = conn.cursor()



Empresa

In [14]:
table_name = 'empresa'
table_columns = ['cnpj_basico', 
                       'razao_social', 
                       'natureza_juridica', 
                       'qualificacao_responsavel', 
                       'capital_social', 
                       'porte_empresa', 
                       'ente_federativo_responsavel']

cursor.execute('DROP TABLE IF EXISTS "empresa";')
conn.commit()


create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()


insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"


for i in range(0, len(files['empresa'])):
    
    print('loading : ',files['empresa'][i])
    empresa = pd.DataFrame(columns=[0, 1, 2, 3, 4, 5, 6])
    empresa_dtypes = {0: 'object', 1: 'object', 2: 'object', 3: 'object', 4: 'object', 5: 'object', 6: 'object'}
    extracted_file_path = os.path.join(extracted_files, files['empresa'][i])

    empresa = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype=empresa_dtypes,
                          encoding='latin-1',
                        )
    
    empresa = empresa.reset_index()

    del empresa['index']

    empresa.columns = table_columns
    
    empresa['capital_social'] = empresa['capital_social'].apply(lambda x: x.replace(',','.'))
    empresa['capital_social'] = empresa['capital_social'] .astype(float)
    empresa['ente_federativo_responsavel'] = empresa['ente_federativo_responsavel'] .astype(str)
    
    for values in empresa.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
empresa

    
    

  




loading :  K3241.K03200Y1.D20910.EMPRECSV
loading :  K3241.K03200Y2.D20910.EMPRECSV
loading :  K3241.K03200Y3.D20910.EMPRECSV
loading :  K3241.K03200Y4.D20910.EMPRECSV
loading :  K3241.K03200Y5.D20910.EMPRECSV
loading :  K3241.K03200Y6.D20910.EMPRECSV
loading :  K3241.K03200Y7.D20910.EMPRECSV
loading :  K3241.K03200Y8.D20910.EMPRECSV
loading :  K3241.K03200Y9.D20910.EMPRECSV


Unnamed: 0,cnpj_basico,razao_social,natureza_juridica,qualificacao_responsavel,capital_social,porte_empresa,ente_federativo_responsavel
0,36627979,LORENA MARIA DE BRITO CAMARGO 09776918700,2135,50,4000.0,01,
1,36627980,KARINE BRUNO DE OLIVEIRA BRAGA 88277704100,2135,50,20000.0,01,
2,36627981,RAIMUNDO WELLINGTON DE SENA 85213594204,2135,50,5000.0,01,
3,36627983,EVELLYN BEATRIZ CHARLO DE MELO RODRIGUES 46715...,2135,50,500.0,01,
4,36627984,RAFAELA FRANCISCO FIGUEIREDO 46701565867,2135,50,300.0,01,
...,...,...,...,...,...,...,...
995,36629084,CLEIDSON SILVA DOS SANTOS 00873643526,2135,50,15000.0,01,
996,36629085,MICHEL OLIVEIRA RAMOS DA SILVA 09000850703,2135,50,1000.0,01,
997,36629086,CENTRO DE ESTUDOS PENCCE LTDA,2062,49,60000.0,03,
998,36629088,THAYNA GOMES LIMA 07475073505,2135,50,16000.0,01,


Estabelecimento

In [24]:
table_name = 'estabelecimento'

table_columns = ['cnpj_basico',
                 'cnpj_ordem',
                 'cnpj_dv',
                 'identificador_matriz_filial',
                 'nome_fantasia',
                 'situacao_cadastral',
                               'data_situacao_cadastral',
                               'motivo_situacao_cadastral',
                               'nome_cidade_exterior',
                               'pais',
                               'data_inicio_atividade',
                               'cnae_fiscal_principal',
                               'cnae_fiscal_secundaria',
                               'tipo_logradouro',
                               'logradouro',
                               'numero',
                               'complemento',
                               'bairro',
                               'cep',
                               'uf',
                               'municipio',
                               'ddd_1',
                               'telefone_1',
                               'ddd_2',
                               'telefone_2',
                               'ddd_fax',
                               'fax',
                               'correio_eletronico',
                               'situacao_especial',
                               'data_situacao_especial']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "estabelecimento";')

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(1024)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['estabelecimento'])):
    
    print('loading : ',files['estabelecimento'][i])
    estabelecimento = pd.DataFrame(columns=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28])
    extracted_file_path = os.path.join(extracted_files, files['estabelecimento'][i])

    estabelecimento = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')

    estabelecimento = estabelecimento.reset_index()
    del estabelecimento['index']
    estabelecimento.columns = table_columns
    for values in estabelecimento.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
estabelecimento


loading :  K3241.K03200Y0.D30610.ESTABELE
loading :  K3241.K03200Y1.D30610.ESTABELE
loading :  K3241.K03200Y2.D30610.ESTABELE
loading :  K3241.K03200Y3.D30610.ESTABELE
loading :  K3241.K03200Y4.D30610.ESTABELE
loading :  K3241.K03200Y5.D30610.ESTABELE
loading :  K3241.K03200Y6.D30610.ESTABELE
loading :  K3241.K03200Y7.D30610.ESTABELE
loading :  K3241.K03200Y8.D30610.ESTABELE
loading :  K3241.K03200Y9.D30610.ESTABELE


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,28609609,0001,81,1,AC GASTRONOMIA,08,20190402,01,,,...,0427,91,99163670,,,,,acgastronomia@outlook.com,,
1,32681070,0001,49,1,SALUTE COQUETELARIA,02,20190206,00,,,...,6969,16,92265295,,,,,SALUTECOQUETELARIA@GMAIL.COM,,
2,32681081,0001,29,1,ABM ADMINISTRACAO,08,20190611,01,,,...,6001,21,31597112,,,,,deia.blenda@gmail.com,,
3,32681093,0001,53,1,ASSOCIACAO CUIABANA DE ATLETISMO - ACA,02,20220615,00,,,...,9067,65,92956451,,,,,,,
4,32681104,0001,03,1,INSTITUTO GOURMET,02,20190206,00,,,...,5833,21,35917662,,,,,WRLROCHA3018@GMAIL.COM,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,32692771,0001,83,1,SIRLEIDE AGUIAR DE JESUS 13118487623,04,20220127,63,,,...,2691,44,30461006,,,,,leydeaguiar373@gmail.com,,
996,32692783,0001,08,1,VITORIA 7 ALIMENTOS,02,20190207,00,,,...,1475,88,99516364,,,,,,,
997,32692795,0001,32,1,AUREA SPORTS,02,20190207,00,,,...,7535,41,31212533,,,,,,,
998,32692810,0001,42,1,BOLOS DA LENE,08,20210102,01,,,...,6313,11,43742714,,,,,cirlenebolzan@gmail.com,,


Socios

In [15]:
table_name = 'socios'
table_columns = ['cnpj_basico',
                      'identificador_socio',
                      'nome_socio_razao_social',
                      'cpf_cnpj_socio',
                      'qualificacao_socio',
                      'data_entrada_sociedade',
                      'pais',
                      'representante_legal',
                      'nome_do_representante',
                      'qualificacao_representante_legal',
                      'faixa_etaria']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "socios";')
conn.commit()

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['socios'])):

    print('loading : ',files['estabelecimento'][i])
    extracted_file_path = os.path.join(extracted_files, files['socios'][i])
    socios = pd.DataFrame(columns=[1,2,3,4,5,6,7,8,9,10,11])
    socios = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')
    
    socios.columns = table_columns
    socios = socios.reset_index()
    del socios['index']

    for values in socios.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
socios
    

loading :  K3241.K03200Y0.D30610.ESTABELE
loading :  K3241.K03200Y1.D30610.ESTABELE
loading :  K3241.K03200Y2.D30610.ESTABELE
loading :  K3241.K03200Y3.D30610.ESTABELE
loading :  K3241.K03200Y4.D30610.ESTABELE
loading :  K3241.K03200Y5.D30610.ESTABELE
loading :  K3241.K03200Y6.D30610.ESTABELE
loading :  K3241.K03200Y7.D30610.ESTABELE
loading :  K3241.K03200Y8.D30610.ESTABELE
loading :  K3241.K03200Y9.D30610.ESTABELE


Unnamed: 0,cnpj_basico,identificador_socio,nome_socio_razao_social,cpf_cnpj_socio,qualificacao_socio,data_entrada_sociedade,pais,representante_legal,nome_do_representante,qualificacao_representante_legal,faixa_etaria
0,02163447,2,ADALGISA HELENA PIMENTA SANTOS RAUZE,***516055**,49,19971009,,***000000**,,00,8
1,02163447,2,RENATO PIMENTA RAUZE,***075525**,22,19971009,,***000000**,,00,5
2,02163454,2,PATRICIA DE CASTRO LEMOS MOREIRA,***454588**,49,19971010,,***000000**,,00,6
3,03852010,2,RAIMUNDA MARIA FONTENELE,***321783**,16,20050912,,***000000**,,00,7
4,05512043,2,AILTON ARAUJO DE SOUZA,***704495**,49,20050902,,***000000**,,00,6
...,...,...,...,...,...,...,...,...,...,...,...
995,00546814,2,MARIA APARECIDA INOCENCIO SILVA SANTOS,***743308**,49,19951023,,***000000**,,00,8
996,00546814,2,JOAO DA SILVA SANTOS,***157648**,49,19950410,,***000000**,,00,8
997,03856387,2,ROBERTO BRANDAO FILHO,***265307**,49,20000530,,***000000**,,00,5
998,03856387,2,JANE DE LIMA,***629846**,22,20000530,,***000000**,,00,6


Simples Nacional


In [16]:
table_name = 'simples'
table_columns = ['cnpj_basico',
                           'opcao_pelo_simples',
                           'data_opcao_simples',
                           'data_exclusao_simples',
                           'opcao_mei',
                           'data_opcao_mei',
                           'data_exclusao_mei']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "simples";')
conn.commit()

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['simples'])):

    print('loading : ',files['simples'][i])
    extracted_file_path = os.path.join(extracted_files, files['simples'][i])
    simples = pd.DataFrame(columns=[1,2,3,4,5,6])
    simples = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')
    
    simples.columns = table_columns
    simples = simples.reset_index()
    del simples['index']

    for values in simples.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
simples

loading :  F.K03200$W.SIMPLES.CSV.D30610


Unnamed: 0,cnpj_basico,opcao_pelo_simples,data_opcao_simples,data_exclusao_simples,opcao_mei,data_opcao_mei,data_exclusao_mei
0,00000000,N,20070701,20070701,N,20090701,20090701
1,00000006,N,20180101,20191231,N,00000000,00000000
2,00000008,N,20140101,20211231,N,00000000,00000000
3,00000011,S,20070701,00000000,N,00000000,00000000
4,00000013,S,20090101,00000000,N,00000000,00000000
...,...,...,...,...,...,...,...
995,00006621,S,20090101,00000000,N,00000000,00000000
996,00006629,N,20070701,20161231,N,00000000,00000000
997,00006638,N,20100101,20101231,N,00000000,00000000
998,00006640,S,20070701,00000000,N,00000000,00000000


cnae

In [17]:
table_name = 'cnae'
table_columns = ['codigo', 'descricao']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "cnae";')
conn.commit()

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['cnae'])):

    print('loading : ',files['cnae'][i])
    extracted_file_path = os.path.join(extracted_files, files['cnae'][i])
    cnae = pd.DataFrame(columns=[1,2])
    cnae = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')
    
    cnae.columns = table_columns
    cnae = cnae.reset_index()
    del cnae['index']

    for values in cnae.values.tolist():
        cursor.execute(insertdata_query, values)
    
conn.commit()
cnae

loading :  F.K03200$Z.D30610.CNAECSV


Unnamed: 0,codigo,descricao
0,0111301,Cultivo de arroz
1,0111302,Cultivo de milho
2,0111303,Cultivo de trigo
3,0111399,Cultivo de outros cereais não especificados an...
4,0112101,Cultivo de algodão herbáceo
...,...,...
995,5823900,Edição integrada à impressão de revistas
996,5829800,"Edição integrada à impressão de cadastros, lis..."
997,5911101,Estúdios cinematográficos
998,5911102,Produção de filmes para publicidade


situacao atual


In [18]:
table_name = 'moti'
table_columns = ['codigo', 'descricao']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "moti";')
conn.commit()

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['moti'])):

    print('loading : ',files['moti'][i])
    extracted_file_path = os.path.join(extracted_files, files['moti'][i])
    moti = pd.DataFrame(columns=[1,2])
    moti = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')
    
    moti.columns = table_columns
    moti = moti.reset_index()
    del moti['index']

    for values in moti.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
moti
    

loading :  F.K03200$Z.D30610.MOTICSV


Unnamed: 0,codigo,descricao
0,00,SEM MOTIVO
1,01,EXTINCAO POR ENCERRAMENTO LIQUIDACAO VOLUNTARIA
2,02,INCORPORACAO
3,03,FUSAO
4,04,CISAO TOTAL
...,...,...
56,73,OMISSAO CONTUMAZ
57,74,INCONSISTENCIA CADASTRAL
58,75,OBITO DO MEI - TITULAR FALECIDO
59,80,"BAIXA REGISTRADA NA JUNTA, INDEFERIDA NA RFB"


municipios


In [19]:
table_name = 'munic'
table_columns = ['codigo', 'descricao']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "munic";')
conn.commit()

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['munic'])):

    print('loading : ',files['munic'][i])
    extracted_file_path = os.path.join(extracted_files, files['munic'][i])
    munic = pd.DataFrame(columns=[1,2])
    munic = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')
    
    munic.columns = table_columns
    munic = munic.reset_index()
    del munic['index']

    for values in munic.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
munic    

loading :  F.K03200$Z.D30610.MUNICCSV


Unnamed: 0,codigo,descricao
0,0001,GUAJARA-MIRIM
1,0002,ALTO ALEGRE DOS PARECIS
2,0003,PORTO VELHO
3,0004,BURITIS
4,0005,JI-PARANA
...,...,...
995,0997,JARDIM DO MULATO
996,0998,MARQUES DE SOUZA
997,0999,LAGOA ALEGRE
998,1000,MONTE ALEGRE DOS CAMPOS


natureza juridica

In [20]:
table_name = 'natju'
table_columns = ['codigo', 'descricao']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "natju";')
conn.commit()

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['natju'])):

    print('loading : ',files['natju'][i])
    extracted_file_path = os.path.join(extracted_files, files['natju'][i])
    natju = pd.DataFrame(columns=[1,2])
    natju = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')
    
    natju.columns = table_columns
    natju = natju.reset_index()
    del natju['index']

    for values in natju.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
natju
    

loading :  F.K03200$Z.D30610.NATJUCSV


Unnamed: 0,codigo,descricao
0,0000,Natureza Jurídica não informada
1,3271,Órgão de Direção Local de Partido Político
2,3280,Comitê Financeiro de Partido Político
3,3298,Frente Plebiscitária ou Referendária
4,3301,Organização Social (OS)
...,...,...
85,3255,Órgão de Direção Nacional de Partido Político
86,3263,Órgão de Direção Regional de Partido Político
87,5037,Outras Instituições Extraterritoriais
88,3328,Plano de Benefícios de Previdência Complementa...


Pais


In [21]:
table_name = 'pais'
table_columns = ['codigo', 'descricao']

insertdata_query = f"INSERT INTO {table_name} ({', '.join(table_columns)}) VALUES ({', '.join(['?'] * len(table_columns))})"
cursor.execute('DROP TABLE IF EXISTS "pais";')
conn.commit()

create_table= f"CREATE TABLE {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in table_columns])})"
cursor.execute(create_table)
cursor.commit()

for i in range(0, len(files['pais'])):

    print('loading : ',files['pais'][i])
    extracted_file_path = os.path.join(extracted_files, files['pais'][i])
    pais = pd.DataFrame(columns=[1,2])
    pais = pd.read_csv(filepath_or_buffer=extracted_file_path,
                          sep=';',
                          nrows=1000,
                          skiprows=0,
                          header=None,
                          dtype='object',
                          encoding='latin-1',
                        ).fillna('nan')
    
    pais.columns = table_columns
    pais = pais.reset_index()
    del pais['index']

    for values in pais.values.tolist():
        cursor.execute(insertdata_query, values)

conn.commit()
pais

loading :  F.K03200$Z.D30610.PAISCSV


Unnamed: 0,codigo,descricao
0,000,COLIS POSTAUX
1,013,AFEGANISTAO
2,017,ALBANIA
3,020,"ALBORAN-PEREJIL,ILHAS"
4,023,ALEMANHA
...,...,...
250,895,ZONA DO CANAL DO PANAMA
251,990,PROV. DE NAVIOS E AERONAVES
252,997,NAO DECLARADOS
253,998,NAO DECLARADO PRELIMINAR
