In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import os

# Function creation

In [2]:
def create_initial_state(col1, col2, col3, input_filename, output_folder, output_filename):
    """
    Creates initial state tensor where first column is col1 + col2 and second column is col3.
    Saves as a PyTorch tensor with shape [rows, 1, 2].
    
    Parameters:
    col1: First column name (will be added to col2)
    col2: Second column name (will be added to col1)
    col3: Third column name (becomes second column of tensor)
    input_filename: Path to the input CSV file
    output_folder: Folder path where to save the tensor
    output_filename: Filename for the output tensor (without extension, .pt will be added)
    """
    # Read the CSV file
    df = pd.read_csv(input_filename)
    
    # Create the two columns: [col1 + col2, col3]
    state_data = np.column_stack([
        df[col1].values + df[col2].values,  # First column: col1 + col2
        df[col3].values                      # Second column: col3
    ])
    
    # Convert to tensor and reshape to [rows, 1, 2]
    tensor = torch.tensor(state_data, dtype=torch.float32).unsqueeze(1)
    
    # Create folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Add .pt extension if not present
    if not output_filename.endswith('.pt'):
        output_filename = output_filename + '.pt'
    
    # Save tensor
    filepath = os.path.join(output_folder, output_filename)
    torch.save(tensor, filepath)
    
    print(f"Tensor saved to {filepath}")
    print(f"Tensor shape: {tensor.shape}")
    print(f"Column 1: {col1} + {col2}")
    print(f"Column 2: {col3}")

In [3]:
def df_to_tensor(df):
    """
    Converts a dataframe to a PyTorch tensor with shape [rows, 1, time].
    
    Parameters:
    df: DataFrame with Store, Product columns and date columns
    
    Returns:
    tensor: PyTorch tensor with shape [rows, 1, time]
    """
    # Get all date columns (exclude Store and Product)
    date_columns = df.columns.difference(['Store', 'Product'])
    
    # Sort date columns to ensure chronological order
    date_columns = sorted(date_columns)
    
    # Extract only the date columns as numpy array
    data = df[date_columns].values
    
    # Convert to tensor and reshape to [rows, 1, time]
    tensor = torch.tensor(data, dtype=torch.float32).unsqueeze(1)
    
    return tensor


def save_df_as_tensor(df, folder, filename):
    """
    Saves a dataframe as a PyTorch tensor with shape [rows, 1, time].
    
    Parameters:
    df: DataFrame with Store, Product columns and date columns
    folder: Folder path where to save the tensor
    filename: Filename (without extension, .pt will be added)
    """
    # Create tensor from dataframe
    tensor = df_to_tensor(df)
    
    # Create folder if it doesn't exist
    os.makedirs(folder, exist_ok=True)
    
    # Add .pt extension if not present
    if not filename.endswith('.pt'):
        filename = filename + '.pt'
    
    # Save tensor
    filepath = os.path.join(folder, filename)
    torch.save(tensor, filepath)
    
    print(f"Tensor saved to {filepath}")
    print(f"Tensor shape: {tensor.shape}")

def save_tensor(tensor, folder, filename):
    """
    Saves a PyTorch tensor to a file.
    
    Parameters:
    tensor: PyTorch tensor to save
    folder: Folder path where to save the tensor
    filename: Filename (without extension, .pt will be added)
    """
    # Create folder if it doesn't exist
    os.makedirs(folder, exist_ok=True)
    
    # Add .pt extension if not present
    if not filename.endswith('.pt'):
        filename = filename + '.pt'
    
    # Save tensor
    filepath = os.path.join(folder, filename)
    torch.save(tensor, filepath)
    
    print(f"Tensor saved to {filepath}")
    print(f"Tensor shape: {tensor.shape}")

def save_df_to_csv(df, folder, filename):
    """
    Saves a dataframe to CSV file.
    
    Parameters:
    df: DataFrame to save
    folder: Folder path where to save the CSV
    filename: Filename (without extension, .csv will be added)
    """
    # Create folder if it doesn't exist
    os.makedirs(folder, exist_ok=True)
    
    # Add .csv extension if not present
    if not filename.endswith('.csv'):
        filename = filename + '.csv'
    
    # Create full filepath
    filepath = os.path.join(folder, filename)
    
    # Save to CSV without index
    df.to_csv(filepath, index=False)
    
    print(f"DataFrame saved to {filepath}")
    print(f"Shape: {df.shape}")

In [4]:
def create_date_features(df, additional_weeks=0):
    """
    Creates a dataframe with date features from a sales dataframe.
    
    Parameters:
    df: DataFrame with date columns (excluding Store and Product)
    additional_weeks: int, number of additional weeks to generate features for (default: 0)
    
    Returns:
    DataFrame with columns: date, day_of_week, month_1, month_2, ..., month_12, 
                           year, day_of_month, days_from_christmas
    """
    # Get all date columns (exclude Store and Product)
    date_columns = df.columns.difference(['Store', 'Product'])
    date_columns = sorted(date_columns)
    
    # Convert to datetime
    dates = pd.to_datetime(date_columns)
    
    # Add additional weeks if specified (one date per week, 7 days apart)
    if additional_weeks > 0:
        last_date = dates[-1]
        additional_dates = [last_date + pd.Timedelta(days=7 * (i + 1)) 
                           for i in range(additional_weeks)]
        dates = dates.append(pd.DatetimeIndex(additional_dates))
    
    # Create the features dataframe
    date_features = pd.DataFrame({
        'date': dates,
        'day_of_week': dates.dayofweek,  # Monday=0, Sunday=6
        'month': dates.month,
        'year': dates.year,
        'day_of_month': dates.day
    })
    
    # Create one-hot encoding for months
    for month in range(1, 13):
        date_features[f'month_{month}'] = (date_features['month'] == month).astype(int)
    
    # Drop the original month column
    date_features = date_features.drop('month', axis=1)
    
    # Reorder columns to have month columns after day_of_week
    cols = ['date', 'day_of_week'] + [f'month_{i}' for i in range(1, 13)] + ['year', 'day_of_month']
    date_features = date_features[cols]
    
    # Calculate days_from_christmas
    def calculate_days_from_christmas(date):
        # Christmas of the same year
        christmas_current = pd.Timestamp(year=date.year, month=12, day=25)
        # Christmas of previous year
        christmas_prev = pd.Timestamp(year=date.year - 1, month=12, day=25)
        # Christmas of next year
        christmas_next = pd.Timestamp(year=date.year + 1, month=12, day=25)
        
        # Calculate days from each Christmas
        days_from_current = (date - christmas_current).days
        days_from_prev = (date - christmas_prev).days
        days_from_next = (date - christmas_next).days
        
        # Return the one with minimum absolute value
        candidates = [days_from_current, days_from_prev, days_from_next]
        return min(candidates, key=abs)
    
    date_features['days_from_christmas'] = date_features['date'].apply(calculate_days_from_christmas)
    
    return date_features

# Main script

### First, we read the data and create the respective dfs

In [5]:
# load data
init_state = pd.read_csv('vn2_data/Week 0 - 2024-04-08 - Initial State.csv')
sales = pd.read_csv('vn2_data/Week 0 - 2024-04-08 - Sales.csv')
stock = pd.read_csv('vn2_data/Week 0 - In Stock.csv')
product_info = pd.read_csv('vn2_data/Week 0 - Master.csv')

print(sales.head())

   Store  Product  2021-04-12  2021-04-19  2021-04-26  2021-05-03  2021-05-10  \
0      0      126         0.0         0.0         3.0         3.0         0.0   
1      0      182         0.0         0.0         0.0         0.0         0.0   
2      1      124        13.0         4.0        10.0         5.0         1.0   
3      2      124         5.0         5.0        12.0        16.0        10.0   
4      2      126         0.0         0.0         6.0         5.0         7.0   

   2021-05-17  2021-05-24  2021-05-31  ...  2024-02-05  2024-02-12  \
0         1.0         1.0         0.0  ...         0.0         2.0   
1         0.0         0.0         0.0  ...         1.0         1.0   
2         2.0         3.0         4.0  ...         8.0        17.0   
3         8.0        10.0         9.0  ...         6.0         8.0   
4         4.0         1.0         2.0  ...         2.0         0.0   

   2024-02-19  2024-02-26  2024-03-04  2024-03-11  2024-03-18  2024-03-25  \
0         2.0  

In [7]:
# define directory to save the dataframes and tensors
save_directory = 'vn2_processed_data/new_data'

In [7]:
# Optional: save the dataframes as tensors
save_the_dfs = True
# this will create a tensor for each dataframe
# each tensor will have shape [products, 1, time]
if save_the_dfs:
    save_df_as_tensor(stock, save_directory, 'stock')
    save_df_as_tensor(sales, save_directory, 'sales')  # .pt optional

# Optional: save the product info dataframe as a csv file
# this will create a csv file with the product information, which agents will use to create a tensor of the respective features size
save_product_info = True
if save_product_info:
    save_df_to_csv(product_info, save_directory, 'product_features')


Tensor saved to vn2_processed_data/new_data/stock.pt
Tensor shape: torch.Size([599, 1, 165])
Tensor saved to vn2_processed_data/new_data/sales.pt
Tensor shape: torch.Size([599, 1, 157])
DataFrame saved to vn2_processed_data/new_data/product_features.csv
Shape: (599, 8)


#### Here, we create a dataframe that in each row has date-related information. This will be valuable input for the neural policies we create, since it can allow it to learn non-stationary patterns throughout the year, such as the proximity to christmas and the month of the year

In [10]:
# Usage example:
# First df is needed to infer the weeks for which to create data.
# Additional_weeks allow us to create a date for each week. Remember that when we create predictions, we will use the last row in this df!
# For the first round, we need to create a date for the week starting april 15th, so additional_weeks=1
date_df = create_date_features(sales, additional_weeks=1)
print(date_df.head())

# optional: save the date_df to a csv file
save_date_df = True
if save_date_df:
    save_df_to_csv(date_df, save_directory, 'date_features')


        date  day_of_week  month_1  month_2  month_3  month_4  month_5  \
0 2021-04-12            0        0        0        0        1        0   
1 2021-04-19            0        0        0        0        1        0   
2 2021-04-26            0        0        0        0        1        0   
3 2021-05-03            0        0        0        0        0        1   
4 2021-05-10            0        0        0        0        0        1   

   month_6  month_7  month_8  month_9  month_10  month_11  month_12  year  \
0        0        0        0        0         0         0         0  2021   
1        0        0        0        0         0         0         0  2021   
2        0        0        0        0         0         0         0  2021   
3        0        0        0        0         0         0         0  2021   
4        0        0        0        0         0         0         0  2021   

   day_of_month  days_from_christmas  
0            12                  108  
1            1

### The neural policies also accept time-related data for each product. This could represent, for example, that for product i there was a promotion at time t. The resulting tensor has to be of shape [features, products, 1, periods]. When we train our agents, we will create a separate [batch, 1, past periods] tensor for each feature, which will be fed as input to the neural network. As an example, we stack the stock 2 times, to create a tensor of shape [2, products, 1, periods].

In [8]:
stock_tensor = df_to_tensor(stock)
stock_tensor_2copies = torch.stack([stock_tensor, stock_tensor])
print(stock_tensor_2copies.shape)

save_time_product_tensor = True
# this will create a tensor for the time-product features, which will have shape [features, products, 1, periods]
if save_time_product_tensor:
    save_tensor(stock_tensor_2copies, save_directory, 'time_product_features')


torch.Size([2, 599, 1, 165])
Tensor saved to vn2_processed_data/new_data/time_product_features.pt
Tensor shape: torch.Size([2, 599, 1, 165])


### Create a tensor for the initial state, with shape [products, 1, 2] since lead times is of 2 periods (the 1 comes from the number of stores, which here is always 1). We do this by reading a csv, creating a tensor with columns [col1 + col2, col2] and then saving it. This tensor will be used as the initial state of inventory when we create our outputs for submission!

In [12]:
# The first columns crresponds to 'End Inventory' + 'In Transit W+1' and the second to 'In Transit W+2'
create_initial_state(
    col1='End Inventory',
    col2='In Transit W+1',
    col3='In Transit W+2',
    input_filename='vn2_data/Week 0 - 2024-04-08 - Initial State.csv',
    output_folder=save_directory,
    output_filename='inventory_state'
)

Tensor saved to vn2_processed_data/new_data/inventory_state.pt
Tensor shape: torch.Size([599, 1, 2])
Column 1: End Inventory + In Transit W+1
Column 2: In Transit W+2
