In [8]:
import os
import requests
import pandas as pd
import psycopg2
from datetime import datetime, timedelta
from prefect import task, Flow, unmapped
from time import sleep
from sqlalchemy import create_engine

# Função para carregar o CSV na tabela do PostgreSQL
@task
def load_to_postgres(csv_file):
    engine = create_engine('postgresql://username:postgres@localhost:postgres/desafio')
    data = pd.read_csv(csv_file)
    data.to_sql('gps_data', engine, if_exists='append', index=False)

# Função para fazer a requisição à API e obter os dados de GPS
@task
def get_gps_data():
    url = "https://dados.mobilidade.rio/gps/brt"
    response = requests.get(url)
    data = response.json()
    return data

# Função para estruturar e transformar os dados da API
@task
def process_gps_data(raw_data):
    df = pd.DataFrame(raw_data['veiculos'])
    df['timestamp'] = pd.to_datetime(df['dataHora'], unit='ms')
    return df[["codigo","placa","linha","latitude","longitude", "velocidade","sentido","trajeto","timestamp"]]

# Função para salvar os dados em um arquivo CSV
@task
def save_to_csv(data, output_path):
    data.to_csv(output_path, index=False)

# Definição da pipeline
with Flow("GPS_BRT_Pipeline") as flow:
    # Obtenção dos dados de GPS
    gps_data = get_gps_data()
    
    # Processamento e transformação dos dados
    processed_data = process_gps_data(gps_data)
    
    # Salvando os dados em um arquivo CSV
    output_path = f"gps_data_{datetime.now().strftime('%Y%m%d%H%M%S')}.csv"
    save_to_csv(processed_data, output_path)
    
    # Carregando o CSV na tabela do PostgreSQL
    load_to_postgres(output_path)

# Agendando a execução da pipeline a cada minuto por 10 minutos
for i in range(15):
    flow.run()
    sleep(60)  # Espera 1 minuto entre cada execução


[2023-07-17 23:48:45-0300] INFO - prefect.FlowRunner | Beginning Flow run for 'GPS_BRT_Pipeline'
[2023-07-17 23:48:45-0300] INFO - prefect.TaskRunner | Task 'get_gps_data': Starting task run...
[2023-07-17 23:48:46-0300] INFO - prefect.TaskRunner | Task 'get_gps_data': Finished task run for task with final state: 'Success'
[2023-07-17 23:48:46-0300] INFO - prefect.TaskRunner | Task 'process_gps_data': Starting task run...
[2023-07-17 23:48:46-0300] INFO - prefect.TaskRunner | Task 'process_gps_data': Finished task run for task with final state: 'Success'
[2023-07-17 23:48:46-0300] INFO - prefect.TaskRunner | Task 'save_to_csv': Starting task run...
[2023-07-17 23:48:46-0300] INFO - prefect.TaskRunner | Task 'save_to_csv': Finished task run for task with final state: 'Success'
[2023-07-17 23:48:46-0300] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2023-07-17 23:49:46-0300] INFO - prefect.FlowRunner | Beginning Flow run for 'GPS_BRT_Pipeline'
[2023-07-17 2

[2023-07-17 23:57:54-0300] INFO - prefect.TaskRunner | Task 'get_gps_data': Finished task run for task with final state: 'Success'
[2023-07-17 23:57:54-0300] INFO - prefect.TaskRunner | Task 'process_gps_data': Starting task run...
[2023-07-17 23:57:54-0300] INFO - prefect.TaskRunner | Task 'process_gps_data': Finished task run for task with final state: 'Success'
[2023-07-17 23:57:54-0300] INFO - prefect.TaskRunner | Task 'save_to_csv': Starting task run...
[2023-07-17 23:57:54-0300] INFO - prefect.TaskRunner | Task 'save_to_csv': Finished task run for task with final state: 'Success'
[2023-07-17 23:57:54-0300] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2023-07-17 23:58:54-0300] INFO - prefect.FlowRunner | Beginning Flow run for 'GPS_BRT_Pipeline'
[2023-07-17 23:58:54-0300] INFO - prefect.TaskRunner | Task 'get_gps_data': Starting task run...
[2023-07-17 23:58:55-0300] INFO - prefect.TaskRunner | Task 'get_gps_data': Finished task run for task with fi