In [1]:
import tkinter as tk
from tkinter import ttk
import pandas as pd
import unicodedata
import zipfile
import os
import requests
import dask.dataframe as dd
import io
import codecs
import dbfread
import tempfile
from pandastable import Table, TableModel

def remove_special_characters(text):
    return ''.join(c for c in unicodedata.normalize('NFKD', text) if not unicodedata.combining(c))

def show_results_in_table(df):
    table_window = tk.Toplevel(root)
    table_window.title("Resultado do JOIN")
    frame = ttk.Frame(table_window)
    frame.pack(fill=tk.BOTH, expand=True)
    pt = Table(frame, dataframe=df.compute())
    pt.show()

def perform_join_and_display_ages():
    url = csv_url_entry.get()
    if not url:
        result_label.config(text="URL da Bahia não fornecida.")
        return

    zip_file_url = zip_url_entry.get()
    if not zip_file_url:
        result_label.config(text="URL do arquivo ZIP do Rio de Janeiro não fornecida.")
        return

    try:
        # Ler o arquivo CSV
        tipos_mistos = identificar_tipos_dados_mistos(url)
        df_url = dd.read_csv(url, delimiter=determinar_delimitador(url), dtype=tipos_mistos, assume_missing=True, low_memory=False)
    except Exception as e:
        error_message = f"Erro ao ler o arquivo CSV. Detalhes do erro: {str(e)}"
        result_label.config(text=error_message)
        print(error_message)
        return

    try:
        r = requests.get(zip_file_url, stream=True)
        temp_zip_file = tempfile.NamedTemporaryFile(delete=False)

        with open(temp_zip_file.name, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)

        with zipfile.ZipFile(temp_zip_file.name, 'r') as z:
            dbf_files = [file for file in z.namelist() if file.endswith('.dbf')]
            if not dbf_files:
                raise ValueError("Não foi encontrado nenhum arquivo DBF no arquivo ZIP.")

            z.extract(dbf_files[0])
            dbf_file_path = os.path.join(os.getcwd(), dbf_files[0])

        with open(dbf_file_path, 'rb') as dbf_file:
            df_dbf = pd.DataFrame(iter(dbfread.DBF(dbf_file_path, encoding='latin1')))
            df_dbf['CD_MUN'] = df_dbf['CD_MUN'].astype('int64')

    except Exception as e:
        error_message = f"Erro ao baixar, ler o arquivo ZIP ou realizar JOIN. Detalhes do erro: {str(e)}"
        result_label.config(text=error_message)
        print(error_message)
        return
    
    join_type = join_type_var.get()
    merged_df = dd.merge(df_url, df_dbf, left_on='municipioIBGE', right_on='CD_MUN', how=join_type)
    
    show_results_in_table(merged_df)

    if 'municipioIBGE' not in df_url.columns or 'CD_MUN' not in df_dbf.columns:
        error_message = "As colunas 'municipioIBGE' e 'CD_MUN' não estão presentes nos DataFrames originais."
        result_label.config(text=error_message)
        print(error_message)
        return
    
    join_type = join_type_var.get()
    merged_df = dd.merge(df_url, df_dbf, left_on='municipioIBGE', right_on='CD_MUN', how=join_type)
    
    if 'AREA_KM2' in merged_df.columns:
        result_label.config(text="Coluna 'AREA_KM2' após o JOIN:\n" + str(merged_df['AREA_KM2']))
    else:
        result_label.config(text="A coluna 'AREA_KM2' não existe no DataFrame resultante.")

def identificar_tipos_dados_mistos(arquivo_csv):
    df = pd.read_csv(arquivo_csv, nrows=100)
    tipos_de_dados = {}

    for coluna in df.columns:
        tipos = df[coluna].apply(type).unique()
        tipos_de_dados[coluna] = tipos

    colunas_tipos_mistos = {coluna: tipos for coluna, tipos in tipos_de_dados.items() if len(tipos) > 1}

    return colunas_tipos_mistos

def determinar_delimitador(url):
    r = requests.get(url)
    line = r.content.decode().splitlines()[0]
    delimiters = [',', ';', '\t']
    for delimiter in delimiters:
        if delimiter in line:
            return delimiter
    return ','

root = tk.Tk()
root.title("App Join Dataframes")

csv_url_label = ttk.Label(root, text="URL do arquivo CSV:")
csv_url_entry = ttk.Entry(root, width=40)

zip_url_label = ttk.Label(root, text="URL do arquivo ZIP:")
zip_url_entry = ttk.Entry(root, width=40)

join_type_var = tk.StringVar()
join_type_label = ttk.Label(root, text="Tipo de JOIN:")
join_type_combobox = ttk.Combobox(root, textvariable=join_type_var, values=["inner", "outer", "left", "right"])

join_button = ttk.Button(root, text="Carregar", command=perform_join_and_display_ages)

result_label = ttk.Label(root, text="")

csv_url_label.grid(row=0, column=0, sticky="w")
csv_url_entry.grid(row=0, column=1, columnspan=2, sticky="w")
zip_url_label.grid(row=1, column=0, sticky="w")
zip_url_entry.grid(row=1, column=1, columnspan=2, sticky="w")
join_type_label.grid(row=2, column=0, sticky="w")
join_type_combobox.grid(row=2, column=1, columnspan=2, sticky="w")
join_button.grid(row=3, column=0, columnspan=3, pady=10)
result_label.grid(row=4, column=0, columnspan=3, pady=10)

root.mainloop()

Erro ao ler o arquivo CSV. Detalhes do erro: Error tokenizing data. C error: Expected 4 fields in line 71, saw 6

