# Generate one year of services based on one week

In [1]:
import holidays
import os
import pandas as pd
import random

from datetime import date, datetime, timedelta

In [2]:
df_stop_times = pd.read_csv('../data/renfe/stop_times/stopTimes_MADRI_BARCE_2023-05-15_2023-05-22.csv', dtype={'stop_id': str})
df_prices = pd.read_csv('../data/renfe/prices/prices_MADRI_BARCE_2023-05-15_2023-05-22.csv', dtype={'trip_id': str})

## Stop times

In [3]:
df_stop_times.head()

Unnamed: 0,service_id,stop_id,arrival,departure
0,06301_15-05-2023-06.15,60000,0,0
1,06301_15-05-2023-06.15,71801,150,150
2,03063_15-05-2023-06.30,60000,0,0
3,03063_15-05-2023-06.30,70600,55,56
4,03063_15-05-2023-06.30,4040,81,82


In [4]:
df_stop_times['train_number'] = df_stop_times['service_id'].str.split('_', expand=True)[0]
df_stop_times['datetime'] = pd.to_datetime(df_stop_times['service_id'].str.split('_', expand=True)[1], format='%d-%m-%Y-%H.%M')
df_stop_times.head()

Unnamed: 0,service_id,stop_id,arrival,departure,train_number,datetime
0,06301_15-05-2023-06.15,60000,0,0,6301,2023-05-15 06:15:00
1,06301_15-05-2023-06.15,71801,150,150,6301,2023-05-15 06:15:00
2,03063_15-05-2023-06.30,60000,0,0,3063,2023-05-15 06:30:00
3,03063_15-05-2023-06.30,70600,55,56,3063,2023-05-15 06:30:00
4,03063_15-05-2023-06.30,4040,81,82,3063,2023-05-15 06:30:00


Extract the stop times of that week

In [5]:
stop_times = []
for i in range(15, 22):
    stop_times.append(df_stop_times[df_stop_times['datetime'].dt.date == datetime(2023, 5, i).date()])

Generate the list of new stops times

In [6]:
new_stop_times = []
start_date = date(2023, 1, 1)
end_date = date(2023, 12, 31)
while start_date <= end_date:
    stop_time = stop_times[start_date.weekday()].copy()
    stop_time['service_id'] = stop_time['train_number'] + '_' + start_date.strftime('%d-%m-%Y') + '-' + stop_time['datetime'].dt.strftime('%H.%M')
    new_stop_times.append(stop_time)
    start_date += timedelta(days=1)

In [7]:
df_new_stop_times = pd.concat(new_stop_times)
df_new_stop_times.drop(['train_number', 'datetime'], axis=1, inplace=True)

In [8]:
os.makedirs('../data/renfe_tft/stop_times', exist_ok=True)
df_new_stop_times.to_csv('../data/renfe_tft/stop_times/stopTimes_MADRI_BARCE_2023-01-01_2024-01-01.csv', index=False)

## Prices

In [9]:
df_prices.head()

Unnamed: 0,trip_id,origin,destination,train_type,departure,arrival,duration,service_id,Basico,Elige,Premium
0,3073,60000,70200,AVE,2023-05-15 07:30:00,2023-05-15 07:53:00,0 days 00:23:00,03073_15-05-2023-07.30,34.0,37.4,64.2
1,19725,60000,70200,AVE,2023-05-15 13:25:00,2023-05-15 13:48:00,0 days 00:23:00,19725_15-05-2023-13.25,34.0,37.4,64.2
2,3173,60000,70200,AVE,2023-05-15 17:30:00,2023-05-15 17:53:00,0 days 00:23:00,03173_15-05-2023-17.30,37.4,40.8,64.2
3,3393,60000,70200,AVE,2023-05-15 19:05:00,2023-05-15 19:27:00,0 days 00:22:00,03393_15-05-2023-19.05,18.4,20.25,43.45
4,6309,60000,70200,AVLO,2023-05-15 19:30:00,2023-05-15 19:53:00,0 days 00:23:00,06309_15-05-2023-19.30,7.0,,


In [10]:
df_prices['train_number'] = df_prices['service_id'].str.split('_', expand=True)[0]
df_prices['datetime'] = pd.to_datetime(df_prices['service_id'].str.split('_', expand=True)[1], format='%d-%m-%Y-%H.%M')
df_prices.head()

Unnamed: 0,trip_id,origin,destination,train_type,departure,arrival,duration,service_id,Basico,Elige,Premium,train_number,datetime
0,3073,60000,70200,AVE,2023-05-15 07:30:00,2023-05-15 07:53:00,0 days 00:23:00,03073_15-05-2023-07.30,34.0,37.4,64.2,3073,2023-05-15 07:30:00
1,19725,60000,70200,AVE,2023-05-15 13:25:00,2023-05-15 13:48:00,0 days 00:23:00,19725_15-05-2023-13.25,34.0,37.4,64.2,19725,2023-05-15 13:25:00
2,3173,60000,70200,AVE,2023-05-15 17:30:00,2023-05-15 17:53:00,0 days 00:23:00,03173_15-05-2023-17.30,37.4,40.8,64.2,3173,2023-05-15 17:30:00
3,3393,60000,70200,AVE,2023-05-15 19:05:00,2023-05-15 19:27:00,0 days 00:22:00,03393_15-05-2023-19.05,18.4,20.25,43.45,3393,2023-05-15 19:05:00
4,6309,60000,70200,AVLO,2023-05-15 19:30:00,2023-05-15 19:53:00,0 days 00:23:00,06309_15-05-2023-19.30,7.0,,,6309,2023-05-15 19:30:00


Extract the prices of that week

In [11]:
prices = []
for i in range(15, 22):
    prices.append(df_prices[df_prices['datetime'].dt.date == datetime(2023, 5, i).date()])

Generate the list of new prices

In [12]:
es_holidays = holidays.ES()
def check_holidays(date):
    date - timedelta(days=2) in es_holidays or date - timedelta(days=1) in es_holidays or date in es_holidays

In [13]:
new_prices = []
start_date = date(2023, 1, 1)
end_date = date(2023, 12, 31)
while start_date <= end_date:
    price = prices[start_date.weekday()].copy()
    departure = start_date.strftime('%d-%m-%Y') + '-' + price['datetime'].dt.strftime('%H.%M')
    price['service_id'] = price['train_number'] + '_' + departure
    price['departure'] = pd.to_datetime(departure, format='%d-%m-%Y-%H.%M')
    price['arrival'] = pd.to_datetime(departure, format='%d-%m-%Y-%H.%M') + pd.to_timedelta(price['duration'])
    increase_rate = random.uniform(1.1, 1.3) if check_holidays(start_date) else random.uniform(0.8, 1.2)
    increase_rate_basico = 0.6 if start_date.weekday() == 6 else 1
    increase_rate_elige = 0.9 if start_date.weekday() == 2 else 1
    increase_rate_elige = 1.3 if start_date.weekday() == 4 else 1
    price['Basico'] = price['Basico'] * increase_rate * increase_rate_basico
    price['Elige'] = price['Elige'] * increase_rate * increase_rate_elige
    price['Premium'] = price['Premium'] * increase_rate
    new_prices.append(price)
    start_date += timedelta(days=1)

In [14]:
df_new_prices = pd.concat(new_prices)
df_new_prices.drop(['train_number', 'datetime'], axis=1, inplace=True)

In [15]:
os.makedirs('../data/renfe_tft/prices', exist_ok=True)
df_new_prices.to_csv('../data/renfe_tft/prices/prices_MADRI_BARCE_2023-01-01_2024-01-01.csv', index=False, float_format='%.2f')