In [0]:
# Importando as bibliotecas
import requests
import zipfile
import io
import os
import pandas as pd

In [0]:
# URL do arquivo zipado do kaggle
url = "https://www.kaggle.com/api/v1/datasets/download/danielefm/urban-mobility-survey-federal-district-brazil?datasetVersionNumber=1"

# Fazendo o request via metodo GET
response = requests.get(url)

# Garantindo o sucesso da solicitação 
if response.status_code == 200:
    # Criando um objeto BytesIO para armazenar o conteúdo do arquivo zipado
    zip_file = io.BytesIO(response.content)

    # Criar o objeto ZipFile para let o conteúdo do arquivo zipado
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        # Extraindo o conteúdo do arquivo zipado
        zip_ref.extractall("urban_mobility_data")
        print("Files extracted successfully")

        # Listar todos os arquivos extraidos para verificar a extração por CSV
        extracted_files = zip_ref.namelist()
        for file in extracted_files:
            print(f"Extracted: {file}")
else:
    print(f"Falha ao fazer o download dos arquivos. Status code: {response.status_code}")


Files extracted successfully
Extracted: Household.csv
Extracted: Person.csv
Extracted: Stage.csv
Extracted: Trip.csv


In [0]:
# Definindo o diretório onde os arquivos serão extraídos
directory = "urban_mobility_data"

# Definindo os caminhos dos arquivos
household_file = os.path.join(directory, "Household.csv")
person_file = os.path.join(directory, "Person.csv")
stage_file = os.path.join(directory, "Stage.csv")
trip_file = os.path.join(directory, "Trip.csv")

# Função para carregar os arquivos CSV 
def load_csv_with_error_handling(file_path, delimiter=';'):
    try:
        df = pd.read_csv(file_path, sep=delimiter, on_bad_lines='skip')
        return df
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

# Carregando os arquivos CSV em DataFrames
household_df = load_csv_with_error_handling(household_file)
person_df = load_csv_with_error_handling(person_file)
stage_df = load_csv_with_error_handling(stage_file)
trip_df = load_csv_with_error_handling(trip_file)

# Fazendo display de cada DataFrame para análise
if household_df is not None:
    print("Household DataFrame:")
    print(household_df.head())

if person_df is not None:
    print("\nPerson DataFrame:")
    print(person_df.head())

if stage_df is not None:
    print("\nStage DataFrame:")
    print(stage_df.head())

if trip_df is not None:
    print("\nTrip DataFrame:")
    print(trip_df.head())


Household DataFrame:
   household_id  people_in_household  ...  macrozone  administrative_region
0             3                    2  ...        315             Taguatinga
1             4                    4  ...        315             Taguatinga
2             7                    3  ...        315             Taguatinga
3            22                    3  ...        315             Taguatinga
4            35                    3  ...        222                  Guará

[5 rows x 20 columns]

Person DataFrame:
   person_id  household_id  ... sector_if_civil_servant expf_person
0          1             3  ...          Not applicable     35.6286
1          2             3  ...          Not applicable     35.6286
2          3             4  ...          Not applicable     32.6147
3          4             4  ...          Not applicable     32.6147
4          5             4  ...          Not applicable     32.6147

[5 rows x 11 columns]

Stage DataFrame:
   stage_id  household_id  ...  