In [17]:
import pandas as pd

railway = pd.read_csv("railway.csv")
viagens_atrasadas = pd.read_csv("viagens_atrasadas.csv")

mapa_colunas = {
    "Transaction ID": "ID",
    "Departure Station": "ESTACAO_IDA",
    "Arrival Destination": "ESTACAO_CHEGADA",
    "Date of Journey": "DATA_VIAGEM",
    "Departure Time": "HORARIO_SAIDA",
    "Arrival Time": "TEMPO_ESPERADO_DA_VIAGEM",
    "Actual Arrival Time": "HORARIO_CHEGADA",
    "Journey Status": "Status da Jornada",
    "Reason for Delay": "MOTIVO_ATRASO",
    "Price": "PRECO_MEDIO",
    # As demais colunas do original não têm correspondência no traduzido
}

railway = railway.rename(columns=mapa_colunas)

In [18]:
print("original: " + str(list(railway.columns)))
print("traduzido com alterações: " + str(list(viagens_atrasadas.columns)))

original: ['ID', 'Date of Purchase', 'Time of Purchase', 'Purchase Type', 'Payment Method', 'Railcard', 'Ticket Class', 'Ticket Type', 'PRECO_MEDIO', 'ESTACAO_IDA', 'ESTACAO_CHEGADA', 'DATA_VIAGEM', 'HORARIO_SAIDA', 'TEMPO_ESPERADO_DA_VIAGEM', 'HORARIO_CHEGADA', 'Status da Jornada', 'MOTIVO_ATRASO', 'Refund Request']
traduzido com alterações: ['ID', 'Date of Purchase', 'Time of Purchase', 'Purchase Type', 'Payment Method', 'Railcard', 'Ticket Class', 'Ticket Type', 'PRECO_MEDIO', 'ESTACAO_IDA', 'ESTACAO_CHEGADA', 'DATA_VIAGEM', 'HORARIO_SAIDA', 'TEMPO_ESPERADO_DA_VIAGEM', 'HORARIO_CHEGADA', 'Status da Jornada', 'MOTIVO_ATRASO', 'Refund Request', 'ATRASO']


In [19]:
import pandas as pd

# Não é necessário converter as colunas para datetime/timedelta aqui
# Calcular o atraso em minutos sem alterar os tipos das outras colunas

# Converter apenas as colunas necessárias para datetime
horario_chegada = pd.to_datetime(railway['HORARIO_CHEGADA'], format='%H:%M:%S', errors='coerce')
tempo_esperado = pd.to_datetime(railway['TEMPO_ESPERADO_DA_VIAGEM'], format='%H:%M:%S', errors='coerce')

# Calcular a diferença em minutos
railway['ATRASO'] = (horario_chegada - tempo_esperado).dt.total_seconds() / 60

In [20]:
railway.head()

Unnamed: 0,ID,Date of Purchase,Time of Purchase,Purchase Type,Payment Method,Railcard,Ticket Class,Ticket Type,PRECO_MEDIO,ESTACAO_IDA,ESTACAO_CHEGADA,DATA_VIAGEM,HORARIO_SAIDA,TEMPO_ESPERADO_DA_VIAGEM,HORARIO_CHEGADA,Status da Jornada,MOTIVO_ATRASO,Refund Request,ATRASO
0,da8a6ba8-b3dc-4677-b176,2023-12-08,12:41:11,Online,Contactless,Adult,Standard,Advance,43,London Paddington,Liverpool Lime Street,2024-01-01,11:00:00,13:30:00,13:30:00,On Time,,No,0.0
1,b0cdd1b0-f214-4197-be53,2023-12-16,11:23:01,Station,Credit Card,Adult,Standard,Advance,23,London Kings Cross,York,2024-01-01,09:45:00,11:35:00,11:40:00,Delayed,Signal Failure,No,5.0
2,f3ba7a96-f713-40d9-9629,2023-12-19,19:51:27,Online,Credit Card,,Standard,Advance,3,Liverpool Lime Street,Manchester Piccadilly,2024-01-02,18:15:00,18:45:00,18:45:00,On Time,,No,0.0
3,b2471f11-4fe7-4c87-8ab4,2023-12-20,23:00:36,Station,Credit Card,,Standard,Advance,13,London Paddington,Reading,2024-01-01,21:30:00,22:30:00,22:30:00,On Time,,No,0.0
4,2be00b45-0762-485e-a7a3,2023-12-27,18:22:56,Online,Contactless,,Standard,Advance,76,Liverpool Lime Street,London Euston,2024-01-01,16:45:00,19:00:00,19:00:00,On Time,,No,0.0


In [21]:
railway = railway.dropna(subset=['ATRASO'])

In [22]:
railway.to_csv("viagens_atrasadas.csv", index=False)

In [23]:
import pandas as pd

atrasos = pd.read_csv("viagens_atrasadas.csv")

In [25]:
def start_to_end(start, end):
    return atrasos[(atrasos["ESTACAO_IDA"] == start) & (atrasos["ESTACAO_CHEGADA"] == end)]

start_to_end("York", "Birmingham New Street")[0:20]

Unnamed: 0,ID,Date of Purchase,Time of Purchase,Purchase Type,Payment Method,Railcard,Ticket Class,Ticket Type,PRECO_MEDIO,ESTACAO_IDA,ESTACAO_CHEGADA,DATA_VIAGEM,HORARIO_SAIDA,TEMPO_ESPERADO_DA_VIAGEM,HORARIO_CHEGADA,Status da Jornada,MOTIVO_ATRASO,Refund Request,ATRASO
352,cd046d0a-8cfd-4268-96c6,2024-01-03,02:20:52,Online,Contactless,,Standard,Off-Peak,59,York,Birmingham New Street,2024-01-03,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
6090,5c2ba502-bf26-4f49-a955,2024-01-25,02:21:04,Online,Contactless,,Standard,Off-Peak,59,York,Birmingham New Street,2024-01-25,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
8323,7a16990e-fa9f-47cb-8271,2024-02-02,05:26:18,Online,Contactless,,Standard,Advance,39,York,Birmingham New Street,2024-02-26,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
10009,ddf76ef4-ad71-406a-9a10,2024-02-06,05:23:40,Online,Contactless,,Standard,Advance,39,York,Birmingham New Street,2024-02-21,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
11852,05720738-ec44-4ee7-b5f3,2024-02-11,05:21:56,Online,Contactless,,Standard,Advance,39,York,Birmingham New Street,2024-02-12,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
14969,36deb342-5cae-4601-a167,2024-03-02,02:28:47,Online,Contactless,,Standard,Off-Peak,59,York,Birmingham New Street,2024-03-02,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
17283,02875065-adfe-4237-8bb2,2024-03-11,02:24:56,Online,Contactless,,Standard,Off-Peak,59,York,Birmingham New Street,2024-03-11,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
18571,10db7283-155c-47f8-b0ce,2024-03-16,05:18:31,Online,Contactless,,Standard,Advance,39,York,Birmingham New Street,2024-03-17,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
20331,919a0ecf-48e3-44ab-8fab,2024-03-23,02:22:54,Online,Contactless,,Standard,Off-Peak,59,York,Birmingham New Street,2024-03-23,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
21131,056cf741-782c-4100-9dc3,2024-03-26,02:21:05,Online,Contactless,,First Class,Off-Peak,74,York,Birmingham New Street,2024-03-26,03:45:00,06:00:00,06:00:00,On Time,,No,0.0
