In [50]:
import pandas as pd
import numpy as np
from datetime import datetime
from bakery_sales.preprocessor import preprocess

from darts import TimeSeries

def prediction(sales_file, weather_file, model):
    sales_file_preprocessed = preprocess(sales_file)
    data_target = sales_file_preprocessed
    df_weather = weather_file
    
    datetime.strptime(df_weather['time'][4], '%Y-%m-%dT%H:%M')
    df_weather['timestamp'] = df_weather['time'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M'))

    # Creating day of week as a cyclical feature
    # First, create the day of the week as a numerical feature
    df_weather['day_of_week'] = pd.to_datetime(df_weather['timestamp']).dt.weekday
    # Since we have a 7 days week period (e.g., days in a week)
    period = 7

    # Convert 'day_of_week' to radians
    df_weather['day_of_week_radians'] = 2 * np.pi * df_weather['day_of_week'] / period

    # Create new features using sine and cosine
    df_weather['day_of_week_sin'] = np.sin(df_weather['day_of_week_radians'])
    df_weather['day_of_week_cos'] = np.cos(df_weather['day_of_week_radians'])
    # Dropping ['month_radians']
    df_weather.drop(columns=['day_of_week_radians', 'day_of_week'], inplace=True)
    # Drops old DATE column
    df_weather = df_weather.drop(columns=['time'])
    # Creates cyclical month feature according to the date
    df_weather['month'] = df_weather.timestamp.dt.month
    # Assuming we have a 12 month period (e.g., month in a year)
    period = 12

    # Convert 'month' to radians
    df_weather['month_radians'] = 2 * np.pi * df_weather['month'] / period

    # Create new features using sine and cosine
    df_weather['month_sin'] = np.sin(df_weather['month_radians'])
    df_weather['month_cos'] = np.cos(df_weather['month_radians'])
    # Dropping ['month_radians']
    df_weather.drop(columns=['month_radians', 'month'], inplace=True)
    # Setting new date column as index
    df_weather.set_index(['timestamp'], inplace=True)
    # Creates dictionary with Holidays
    holidays = [
        '2021-01-01',
        '2021-04-05',
        '2021-05-01',
        '2021-05-08',
        '2021-05-13',
        '2021-05-24',
        '2021-07-14',
        '2021-08-15',
        '2021-11-01',
        '2021-11-11',
        '2021-12-25',
        '2022-01-01',
        '2022-04-18',
        '2022-05-01',
        '2022-05-08',
        '2022-05-26',
        '2022-06-06',
        '2022-07-14',
        '2022-08-15',
    ]
    #holidays = [pd.to_datetime(holiday)for holiday in holidays]
    # df_weather['isHoliday'] = df_weather.index.map(lambda x: 1 if x in holidays else 0)
    df_weather['isHoliday'] = df_weather.index.map(lambda x: 1 if x.strftime('%Y-%m-%d') in holidays else 0)
    # df_weather = df_weather.resample('20min', on = 'timestamp').mean().ffill()

    type(df_weather.index[0].strftime('%Y-%m-%d'))

    final_data = df_weather.join(data_target, how = 'left')
    final_data = final_data.loc[:data_target.index.max()]

    final_data = final_data.fillna(value = 0)

    output_chunk_length = 7 * 24
    series = TimeSeries.from_dataframe(final_data[['traditional_baguette']])
    past_covariates = TimeSeries.from_dataframe(final_data[['temperature_2m (°C)', 'relative_humidity_2m (%)', 'rain (mm)', 'wind_speed_100m (km/h)', 'day_of_week_sin', 'day_of_week_cos', 'month_sin', 'month_cos', 'isHoliday']])
    future_covariates = TimeSeries.from_dataframe(df_weather[['temperature_2m (°C)', 'relative_humidity_2m (%)', 'rain (mm)', 'wind_speed_100m (km/h)', 'day_of_week_sin', 'day_of_week_cos', 'month_sin', 'month_cos', 'isHoliday']])


    # print(series.duration, series.start_time(), series.end_time())
    # print(past_covariates.duration, past_covariates.start_time(), past_covariates.end_time())
    # print(future_covariates.duration, future_covariates.start_time(), future_covariates.end_time())

    output = model.predict(n = output_chunk_length,
                   series = series,
                   past_covariates = past_covariates,
                   future_covariates = future_covariates).pd_dataframe()
    
    values = [value[0] for value in output.values]
    dates = output.index

    dates = output.index.strftime('%Y-%m-%d %H:%M:%S').values

    print("code works ✅")

    return {'values' : list(values), 'dates' : list(dates)}

In [51]:
from darts.models.forecasting.tft_model import TFTModel
df_meteo = pd.read_csv("../raw_data/test/open-meteo-48.82N2.29E43m_test - Copy.csv")
df_sales = pd.read_csv("../raw_data/test/bakerysales - new.csv")

prediction(df_sales, df_meteo, model=TFTModel.load("../weights/tft_tuning_2.pt"))

  df['date_time'] = pd.to_datetime(df['date_time'])
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/reecepalmer/.pyenv/versions/3.10.6/envs/bakery/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/reecepalmer/.pyenv/versions/3.10.6/envs/bakery/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 25.40it/s]
code works ✅


{'values': [-0.025410055295692104,
  -0.08050527248952534,
  0.01795240759266693,
  0.0313716010108787,
  -0.03364969772690096,
  0.011831749571571262,
  0.029062823257919562,
  0.0019021050218686743,
  0.0024287331358459566,
  0.09352610321461939,
  0.006693719082365333,
  -0.0033060891960099703,
  -0.017001685767147605,
  1.0530727705348382,
  12.94497446400932,
  14.275791580990731,
  17.338885457216435,
  11.021115142689812,
  26.69479111231285,
  0.013489189786289234,
  0.10274587952681036,
  0.030015558386327842,
  10.395555759481311,
  9.017617317317326,
  7.533651319335273,
  0.015124624273833532,
  0.02002785969740783,
  -0.001637537715079389,
  0.26733575740699866,
  -0.017336672145088394,
  0.0009381877237658136,
  0.008696001538300515,
  -0.014069685418457146,
  -0.02492528546474252,
  0.03666654975441449,
  0.06925357909858336,
  0.017553143060023375,
  -0.0633001905640852,
  -0.10590980824815663,
  12.905249728466954,
  21.93996858921502,
  21.758047237913416,
  21.269647

In [33]:
df_sales

Unnamed: 0.1,Unnamed: 0,date,time,ticket_number,article,Quantity,unit_price,date_time
0,260299,13/11/2023,07:55,220379,BANETTE,2,"1,05 €",2023-11-13 07:55:00
1,260300,13/11/2023,07:55,220379,CROISSANT,2,"1,10 €",2023-11-13 07:55:00
2,260303,13/11/2023,07:56,220380,TRADITIONAL BAGUETTE,2,"1,20 €",2023-11-13 07:56:00
3,260304,13/11/2023,07:56,220380,BAGUETTE,1,"0,90 €",2023-11-13 07:56:00
4,260307,13/11/2023,07:59,220381,CEREAL BAGUETTE,1,"1,25 €",2023-11-13 07:59:00
...,...,...,...,...,...,...,...,...
4309,269931,03/12/2023,18:16,223038,VIK BREAD,1,"2,50 €",2023-12-03 18:16:00
4310,269932,03/12/2023,18:16,223038,TRADITIONAL BAGUETTE,1,"1,20 €",2023-12-03 18:16:00
4311,269933,03/12/2023,18:16,223038,COUPE,1,"0,15 €",2023-12-03 18:16:00
4312,269936,03/12/2023,18:20,223039,COUPE,2,"0,15 €",2023-12-03 18:20:00
