In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error,accuracy_score
from sklearn.preprocessing import LabelEncoder

In [None]:
# Define the function to map months to seasons in New York
def map_season(month):
    if month in [12, 1, 2]:
        return 'winter'
    elif month in [3, 4, 5]:
        return 'spring'
    elif month in [6, 7, 8]:
        return 'summer'
    else:
        return 'Autumn'

In [None]:
# Function to expand monthly totals into daily values
def expand_monthly_to_daily(df, date_col, value_cols, name_col, season_col):
    daily_data = []
    for _, row in df.iterrows():
        month_days = pd.date_range(
            start=row[date_col].replace(day=1),
            end=row[date_col] + pd.offsets.MonthEnd(0),
            freq='D'
        )
        daily_values = {col: row[col] / len(month_days) for col in value_cols}
        for day in month_days:
            daily_data.append({
                date_col: day,
                name_col: row[name_col],
                season_col: row[season_col],  # Include the season
                **daily_values
            })
    return pd.DataFrame(daily_data)

In [None]:
# Load the dataset
df = pd.read_csv("2021_2022_NewYork_Sales_Cancel.csv", encoding='|unicode_escape')
df['ds'] = pd.to_datetime(df['years'].astype(str) + '-' + df['months'].astype(str))
df['season'] = df['months'].apply(map_season)
df['season'] = df['season'].astype('category').cat.codes

In [None]:
df[df['ShortName'] == 'Tm date chny10oz']

Unnamed: 0,item_id,item_code,ShortName,years,months,saleamt,saleqty,cancelamt,cancelqty,ds,season
8,9,34,Tm date chny10oz,2021,1,10255.7761,5088.0,1106.05,660.0,2021-01-01,3
4338,9,34,Tm date chny10oz,2021,2,7019.486,3672.0,60.33,36.0,2021-02-01,3
8668,9,34,Tm date chny10oz,2021,3,16288.0084,7248.0,1075.75,780.0,2021-03-01,1
12998,9,34,Tm date chny10oz,2021,4,20391.0225,9072.0,0.0,0.0,2021-04-01,1
17328,9,34,Tm date chny10oz,2021,5,12770.0938,5904.0,0.0,0.0,2021-05-01,1
21658,9,34,Tm date chny10oz,2021,6,12762.5723,5640.0,3063.8,1704.0,2021-06-01,2
25988,9,34,Tm date chny10oz,2021,7,12809.0034,5916.0,38.84,24.0,2021-07-01,2
30318,9,34,Tm date chny10oz,2021,8,6613.7558,3120.0,6265.98,3504.0,2021-08-01,2
34648,9,34,Tm date chny10oz,2021,9,11715.7833,5292.0,4677.67,2592.0,2021-09-01,0
38978,9,34,Tm date chny10oz,2021,10,3174.6155,1524.0,14900.44,8316.0,2021-10-01,0


In [None]:
for i in df['ShortName'].unique()[:210]:
  df = df[df['ShortName'] == i]  ## change here
  # Expand the monthly totals into daily values
  daily_df = expand_monthly_to_daily(df, 'ds', ['saleqty', 'cancelqty'], 'ShortName', 'season')
  def add_time_features(df, date_col):
      df['year'] = df[date_col].dt.year
      df['month'] = df[date_col].dt.month
      df['day'] = df[date_col].dt.day
      # df['day_of_week'] = df[date_col].dt.dayofweek
      return df
  # Add time-related features to the daily data
  daily_df = add_time_features(daily_df, 'ds')

  # Encode 'ShortName' using label encoding
  label_encoder = LabelEncoder()
  daily_df['ShortName_encoded'] = label_encoder.fit_transform(daily_df['ShortName'])
  daily_df.head()
  # Drop the original 'ShortName' and 'ds' columns
  X = daily_df.drop(['ShortName', 'ds', 'saleqty', 'cancelqty'], axis=1)
  y_saleqty = daily_df['saleqty']
  y_cancelqty = daily_df['cancelqty']
  # print(X)
  # print(y_saleqty)
  # Time series cross-validation
  tscv = TimeSeriesSplit(n_splits=100)
  # Initialize models
  saleqty_model = XGBRegressor(n_estimators=100, learning_rate=0.0001, max_depth=3, enable_categorical=True)
  cancelqty_model = XGBRegressor(n_estimators=100, learning_rate=0.0001, max_depth=3, enable_categorical=True)
  # Train separate models for 'saleqty' and 'cancelqty'
  for target, model in zip(['saleqty', 'cancelqty'], [saleqty_model, cancelqty_model]):
      y = daily_df[target]

      for train_index, test_index in tscv.split(X):
          X_train, X_test = X.iloc[train_index], X.iloc[test_index]
          y_train, y_test = y.iloc[train_index], y.iloc[test_index]

          # Train the model
          model.fit(X_train, y_train)

          # Predict on test set
          y_pred = model.predict(X_test)

          # Calculate RMSE
          rmse = np.sqrt(mean_squared_error(y_test, y_pred))
          # print(f'RMSE for {target}: {rmse}')

  print("Training done")
  # Prepare the DataFrame for predictions in January 2024
  jan_2024_days = pd.date_range(start='2023-12-01', end='2023-12-31', freq='D') ## change here
  num_days = len(jan_2024_days)
  num_short_names = len(daily_df['ShortName_encoded'].unique())
  # Create repeated day entries for each 'ShortName'
  repeated_days = np.repeat(jan_2024_days, num_short_names)

  # Repeat each 'ShortName_encoded' for each day in January 2024
  repeated_short_names = np.tile(daily_df['ShortName_encoded'].unique(), num_days)
  # Repeat the season value for January for each day and each 'ShortName'
  season_value = map_season(12)  # January is winter
  repeated_seasons = np.tile(season_value, num_days * num_short_names)

  future_data = {
      'ds': repeated_days,
      'ShortName_encoded': repeated_short_names
      # 'season': repeated_seasons
  }
  future_df = pd.DataFrame(future_data)
  # future_df['months'] = 1
  # future_df['season'] = future_df['months'].apply(map_season)
  # future_df['season'] = future_df['season'].astype('category').cat.codes
  future_df['season'] = df[df['months'] == 12]['season'].values[0] ##change here
  # print(future_df)
  # Convert 'ds' into numerical features
  future_df = add_time_features(future_df, 'ds')
  # future_df['season'] = future_df['season'].astype('category').cat.codes

  # Drop the original 'ds' datetime column
  future_df.drop(['ds'], axis=1, inplace=True)

  feature_columns = ['season', 'year', 'month', 'day', 'ShortName_encoded']  # Add any other features used during training
  future_df = future_df[feature_columns]
  # print(future_df)
  future_df_for_saleqty = future_df[feature_columns]
  future_df['predicted_saleqty'] = saleqty_model.predict(future_df_for_saleqty)
  # Predict cancelqty
  future_df_for_cancelqty = future_df[feature_columns]  # Do not include 'predicted_saleqty'
  future_df['predicted_cancelqty'] = cancelqty_model.predict(future_df_for_cancelqty)

  # Aggregate the daily predictions to get the total predicted sales for each `ShortName` in January 2024
  future_df['ShortName'] = label_encoder.inverse_transform(future_df['ShortName_encoded'])
  aggregated_predictions = future_df.groupby('ShortName').agg({
      'predicted_saleqty': 'sum',
      'predicted_cancelqty': 'sum'
  }).reset_index()

  # aggregated_predictions.to_csv('aggregated_predictions_January_2024.csv', index=False)

  # print("Aggregated predictions for January 2024 saved to CSV.")
  len(set(aggregated_predictions['ShortName'].to_list()))
  aggregated_predictions['predicted_saleqty'] = aggregated_predictions['predicted_saleqty'] - aggregated_predictions['predicted_cancelqty']
  aggregated_predictions.drop('predicted_cancelqty', axis=1, inplace=True)
  aggregated_predictions
  new_df = pd.DataFrame(columns=list(aggregated_predictions.columns).extend(['2021_saleqty', '2022_saleqty']))
  for i,temp_df in aggregated_predictions.iterrows():
    temp_df['2021_saleqty'] = df[df['ShortName'].isin([temp_df['ShortName']]) & (df['years'] == 2021) & (df['months']==12)]['saleqty'].values[0] ## change here month
    temp_df['2022_saleqty'] = df[df['ShortName'].isin([temp_df['ShortName']]) & (df['years'] == 2022) & (df['months']==12)]['saleqty'].values[0] ## change here month
    temp_df['2021_cancelqty'] = df[df['ShortName'].isin([temp_df['ShortName']]) & (df['years'] == 2021) & (df['months']==12)]['cancelqty'].values[0] ## change here month
    temp_df['2022_cancelqty'] = df[df['ShortName'].isin([temp_df['ShortName']]) & (df['years'] == 2022) & (df['months']==12)]['cancelqty'].values[0] ## change here month
    new_df = new_df.append(temp_df)
  new_df
  # Load the existing CSV file into a DataFrame
  csv_file_path = 'predictions_December_2023.csv'
  existing_dataframe = pd.read_csv(csv_file_path)

  # Convert the new data to a DataFrame
  new_dataframe = pd.DataFrame(new_df)

  # Append the new rows to the existing DataFrame
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)

  # Save the updated DataFrame back to the CSV file
  updated_dataframe.to_csv(csv_file_path, index=False)

  df = pd.read_csv("2021_2022_NewYork_Sales_Cancel.csv", encoding='unicode_escape')
  df['ds'] = pd.to_datetime(df['years'].astype(str) + '-' + df['months'].astype(str))
  df['season'] = df['months'].apply(map_season)
  df['season'] = df['season'].astype('category').cat.codes


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_df['predicted_saleqty'] = saleqty_model.predict(future_df_for_saleqty)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_df['predicted_cancelqty'] = cancelqty_model.predict(future_df_for_cancelqty)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_df['ShortName'] = label_encoder.inve

Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_df['predicted_saleqty'] = saleqty_model.predict(future_df_for_saleqty)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_df['predicted_cancelqty'] = cancelqty_model.predict(future_df_for_cancelqty)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_df['ShortName'] = label_encoder.inve

Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


Training done


  new_df = new_df.append(temp_df)
  updated_dataframe = existing_dataframe.append(new_dataframe, ignore_index=True)


new_df.to_csv('aggregated_predictions_January_2024.csv', index=False)

In [None]:
pd.date_range(start='2024-01-01', end='2024-01-31', freq='D')

DatetimeIndex(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04',
               '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08',
               '2024-01-09', '2024-01-10', '2024-01-11', '2024-01-12',
               '2024-01-13', '2024-01-14', '2024-01-15', '2024-01-16',
               '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20',
               '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-24',
               '2024-01-25', '2024-01-26', '2024-01-27', '2024-01-28',
               '2024-01-29', '2024-01-30', '2024-01-31'],
              dtype='datetime64[ns]', freq='D')

In [None]:
for i, row in df.iterrows():
  for j in ['saleqty','cancelqty']:
    print(row[j]/ 10)

2735.4
472.8
3649.8
0.0
6703.2
0.0
6059.4
0.0
6456.0
0.0
6112.8
1032.6
815.4
3379.2
0.0
6631.8
2153.4
4238.4
4731.6
924.6
4637.4
18.0
2326.2
0.0
2580.0
0.0
2745.6
0.0
3753.0
2.4
5102.4
0.0
4050.6
0.0
5272.8
0.0
4714.2
1.2
234.0
1743.6
5260.8
1.8
6552.0
0.0
803.4
0.0
2698.2
0.0


In [None]:
for target, model in zip(['saleqty', 'cancelqty'], [saleqty_model, cancelqty_model]):
    y = daily_df[target]
    print(y)

0      882.387097
1      882.387097
2      882.387097
3      882.387097
4      882.387097
          ...    
725    870.387097
726    870.387097
727    870.387097
728    870.387097
729    870.387097
Name: saleqty, Length: 730, dtype: float64
0      152.516129
1      152.516129
2      152.516129
3      152.516129
4      152.516129
          ...    
725      0.000000
726      0.000000
727      0.000000
728      0.000000
729      0.000000
Name: cancelqty, Length: 730, dtype: float64


In [None]:
df = pd.read_csv('aggregated_predictions_January_2024.csv')

In [None]:
i = df[df['ShortName'] == 'Cashews-Red    8oz'].index

In [None]:
df.drop(i, inplace=True)

In [None]:
df

Unnamed: 0,ShortName,predicted_saleqty,2022_saleqty,2023_saleqty,2022_cancelqty,2023_cancelqty
0,Boondi 14.1oz.,881.388306,876.0,876.0,0.0,0.0
1,Hot Mix 12oz.,2766.709473,7080.0,2680.0,180.0,2540.0
2,Kesar MangoPulp 30,28580.164062,27354.0,25800.0,4728.0,0.0
3,Bhel Mix 12 oz.,991.97467,996.0,936.0,0.0,0.0
4,Tm date chny10oz,-12312.930664,5676.0,0.0,12180.0,11508.0
5,TamDat Sce18oz,-3265.65332,2580.0,11460.0,19848.0,96.0
6,Hot Chips 14.1oz.,436.704285,552.0,384.0,0.0,0.0
7,Peanuts Spicy 8oz,460.516846,2720.0,2560.0,0.0,0.0


In [None]:
for i in df['ShortName'].unique():
  print(type(i))

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class