# Generate one year of services based on one week

In [50]:
import os
import pandas as pd

from datetime import date, datetime, timedelta

In [51]:
df_stop_times = pd.read_csv('../data/renfe_one_week/stop_times/stopTimes_MADRI_BARCE_2023-06-05_2023-06-12.csv', dtype={'stop_id': str})
df_prices = pd.read_csv('../data/renfe_one_week/prices/prices_MADRI_BARCE_2023-06-05_2023-06-12.csv', dtype={'trip_id': str})

## Stop times

In [52]:
df_stop_times.head()

Unnamed: 0,service_id,stop_id,arrival,departure
0,06301_05-06-2023-06.15,60000,0,0
1,06301_05-06-2023-06.15,71801,150,150
2,03063_05-06-2023-06.30,60000,0,0
3,03063_05-06-2023-06.30,70600,55,56
4,03063_05-06-2023-06.30,4040,81,82


In [53]:
df_stop_times['train_number'] = df_stop_times['service_id'].str.split('_', expand=True)[0]
df_stop_times['datetime'] = pd.to_datetime(df_stop_times['service_id'].str.split('_', expand=True)[1], format='%d-%m-%Y-%H.%M')
df_stop_times.head()

Unnamed: 0,service_id,stop_id,arrival,departure,train_number,datetime
0,06301_05-06-2023-06.15,60000,0,0,6301,2023-06-05 06:15:00
1,06301_05-06-2023-06.15,71801,150,150,6301,2023-06-05 06:15:00
2,03063_05-06-2023-06.30,60000,0,0,3063,2023-06-05 06:30:00
3,03063_05-06-2023-06.30,70600,55,56,3063,2023-06-05 06:30:00
4,03063_05-06-2023-06.30,4040,81,82,3063,2023-06-05 06:30:00


In [54]:
stop_times = []
for i in range(5, 12):
    stop_times.append(df_stop_times[df_stop_times['datetime'].dt.date == datetime(2023, 6, i).date()])

In [55]:
new_stop_times = []
start_date = date(2023, 1, 1)
end_date = date(2023, 12, 31)
while start_date <= end_date:
    stop_time = stop_times[start_date.weekday()].copy()
    stop_time['service_id'] = stop_time['train_number'] + '_' + start_date.strftime('%d-%m-%Y') + '-' + stop_time['datetime'].dt.strftime('%H.%M')
    new_stop_times.append(stop_time)
    start_date += timedelta(days=1)

In [56]:
df_new_stop_times = pd.concat(new_stop_times)
df_new_stop_times.drop(['train_number', 'datetime'], axis=1, inplace=True)

In [57]:
os.makedirs('../data/renfe_tft/stop_times', exist_ok=True)
df_new_stop_times.to_csv('../data/renfe_tft/stop_times/stopTimes_MADRI_BARCE_2023-01-01_2024-01-01.csv', index=False)

## Prices

In [58]:
df_prices.head()

Unnamed: 0,trip_id,origin,destination,train_type,departure,arrival,duration,service_id,Basico,Elige,Premium
0,3073,60000,70200,AVE,2023-06-05 07:30:00,2023-06-05 07:53:00,0 days 00:23:00,03073_05-06-2023-07.30,34.0,37.4,64.2
1,19725,60000,70200,AVE,2023-06-05 13:25:00,2023-06-05 13:48:00,0 days 00:23:00,19725_05-06-2023-13.25,37.4,40.8,
2,3173,60000,70200,AVE,2023-06-05 17:30:00,2023-06-05 17:53:00,0 days 00:23:00,03173_05-06-2023-17.30,34.0,37.4,64.2
3,3393,60000,70200,AVE,2023-06-05 19:05:00,2023-06-05 19:27:00,0 days 00:22:00,03393_05-06-2023-19.05,18.4,20.25,43.45
4,6309,60000,70200,AVLO,2023-06-05 19:30:00,2023-06-05 19:53:00,0 days 00:23:00,06309_05-06-2023-19.30,7.0,,


In [59]:
df_prices['train_number'] = df_prices['service_id'].str.split('_', expand=True)[0]
df_prices['datetime'] = pd.to_datetime(df_prices['service_id'].str.split('_', expand=True)[1], format='%d-%m-%Y-%H.%M')
df_prices.head()

Unnamed: 0,trip_id,origin,destination,train_type,departure,arrival,duration,service_id,Basico,Elige,Premium,train_number,datetime
0,3073,60000,70200,AVE,2023-06-05 07:30:00,2023-06-05 07:53:00,0 days 00:23:00,03073_05-06-2023-07.30,34.0,37.4,64.2,3073,2023-06-05 07:30:00
1,19725,60000,70200,AVE,2023-06-05 13:25:00,2023-06-05 13:48:00,0 days 00:23:00,19725_05-06-2023-13.25,37.4,40.8,,19725,2023-06-05 13:25:00
2,3173,60000,70200,AVE,2023-06-05 17:30:00,2023-06-05 17:53:00,0 days 00:23:00,03173_05-06-2023-17.30,34.0,37.4,64.2,3173,2023-06-05 17:30:00
3,3393,60000,70200,AVE,2023-06-05 19:05:00,2023-06-05 19:27:00,0 days 00:22:00,03393_05-06-2023-19.05,18.4,20.25,43.45,3393,2023-06-05 19:05:00
4,6309,60000,70200,AVLO,2023-06-05 19:30:00,2023-06-05 19:53:00,0 days 00:23:00,06309_05-06-2023-19.30,7.0,,,6309,2023-06-05 19:30:00


In [60]:
prices = []
for i in range(5, 12):
    prices.append(df_prices[df_prices['datetime'].dt.date == datetime(2023, 6, i).date()])

In [61]:
new_prices = []
start_date = date(2023, 1, 1)
end_date = date(2023, 12, 31)
while start_date <= end_date:
    price = prices[start_date.weekday()].copy()
    departure = start_date.strftime('%d-%m-%Y') + '-' + price['datetime'].dt.strftime('%H.%M')
    price['service_id'] = price['train_number'] + '_' + departure
    price['departure'] = pd.to_datetime(departure, format='%d-%m-%Y-%H.%M')
    price['arrival'] = pd.to_datetime(departure, format='%d-%m-%Y-%H.%M') + pd.to_timedelta(price['duration'])
    new_prices.append(price)
    start_date += timedelta(days=1)

In [62]:
df_new_prices = pd.concat(new_prices)
df_new_prices.drop(['train_number', 'datetime'], axis=1, inplace=True)

In [63]:
os.makedirs('../data/renfe_tft/prices', exist_ok=True)
df_new_prices.to_csv('../data/renfe_tft/prices/prices_MADRI_BARCE_2023-01-01_2024-01-01.csv', index=False, float_format='%.2f')