In [42]:
!pip3 install -r ../requirements.txt

The folder you are executing pip from can no longer be found.


In [40]:
%run logging.ipynb

Exception: File `'../logging.ipynb'` not found.

In [None]:
%run ../schema.py

In [None]:
%run snowflake_connection.ipynb

In [None]:
%run create_table.ipynb

In [None]:
%run load_raw_table.ipynb

In [None]:
# Importando as bibliotecas
import requests
import zipfile
import io
import os
import pandas as pd
from typing import Optional, Dict
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
class UrbanMobilityData:
    def __init__(self, url: str):
        """
        Inicializa a classe com a URL do arquivo zip.

        :param url: URL do arquivo zipado do Kaggle.
        """
        self.url = url
        self.file_paths = {
            "Household": "Household.csv",
            "Person": "Person.csv",
            "Stage": "Stage.csv",
            "Trip": "Trip.csv"
        }
        self.logger_class = Logger()
        self.logger = self.logger_class.set_logger()            

    def download_zip(self) -> Optional[bytes]:
        """
        Faz o download do arquivo zipado da URL fornecida.

        :return: Conteúdo do arquivo zipado em bytes, ou None em caso de erro.
        """
        try:
            response = requests.get(self.url)
            if response.status_code == 200:
                return response.content
            else:
                self.logger.error(f"Falha ao fazer o download dos arquivos. Status code: {response.status_code}")
                return None
        except Exception as e:
            self.logger.error(f"Erro ao baixar o arquivo: {e}")
            return None

    def extract_zip_to_dataframes(self, zip_content: bytes) -> Dict[str, Optional[pd.DataFrame]]:
        """
        Extrai o conteúdo do arquivo zipado e carrega os arquivos CSV em DataFrames.

        :param zip_content: Conteúdo do arquivo zipado em bytes.
        :return: Dicionário onde as chaves são os nomes dos arquivos e os valores são os DataFrames ou None.
        """
        dataframes = {}
        try:
            with zipfile.ZipFile(io.BytesIO(zip_content), 'r') as zip_ref:
                for file_name in zip_ref.namelist():
                    if file_name in self.file_paths.values():
                        with zip_ref.open(file_name) as file:
                            df = pd.read_csv(file, sep=';', on_bad_lines='skip')
                            key = [k for k, v in self.file_paths.items() if v == file_name][0]
                            dataframes[key] = df
                            self.logger.info(f"DataFrame {key} carregado com sucesso")
        except Exception as e:
            self.logger.error(f"Erro ao processar o arquivo zipado: {e}")
        return dataframes

In [None]:
if __name__ == "__main__":
    url = os.getenv("URL")
    logger_class = Logger()
    logger = logger_class.set_logger()    
    data_handler = UrbanMobilityData(url)
    create_table = SnowflakeTableCreator()


    # Fazendo o download do arquivo zip
    zip_content = data_handler.download_zip()
    if zip_content:
        # Extraindo o conteúdo do arquivo zip e carregando os DataFrames
        dataframes = data_handler.extract_zip_to_dataframes(zip_content)

        for name, df in dataframes.items():
            if df is not None:
                logger.info(name)

                handler = SnowflakeHandlerRaw()

                target_table_location = name.upper()

                match name.upper():
                    case 'PERSON':
                        create_table.create_table(target_table_location, table_person)
                    case 'HOUSEHOLD':
                        create_table.create_table(target_table_location, table_household)
                    case 'STAGE':
                        create_table.create_table(target_table_location, table_stage)
                    case 'TRIP':
                        create_table.create_table(target_table_location, table_trip)

                expected_columns = [c.upper() for c in df.columns]

                handler.save_dataframe(df, target_table_location, expected_columns)