In [124]:
import pandas as pd
import numpy as np

# Load your dataset into a DataFrame (replace with your actual data loading logic)
df = pd.read_csv('DataCoSupplyChainDataset.csv', encoding='latin-1')

# Convert 'order date (DateOrders)' to datetime if not already
df['order date (DateOrders)'] = pd.to_datetime(df['order date (DateOrders)'])
df['Date'] = df['order date (DateOrders)'].dt.date  # Extract date part

# Process orders to subtract daily orders from inventory
order_df = df.groupby(['Date', 'Product Card Id', 'Department Id'])['Order Item Quantity'].sum().reset_index()
order_df.rename(columns={'Order Item Quantity': 'Quantity'}, inplace=True)

# Sort order_df by Date to ensure calculations follow chronological order
order_df.sort_values(by='Date', inplace=True)

# Initialize a dictionary to store initial stock quantities
initial_stock = {}

# Set initial stock for each combination of Product Card Id and Department Id
for (product_id, dept_id), group in order_df.groupby(['Product Card Id', 'Department Id']):
    min_date = group['Date'].min()
    initial_stock[(product_id, dept_id)] = np.random.randint(200, 1001)  # Random number between 200 and 1000

# Add 'In Stock' and 'New Stock Arrival' columns to order_df
order_df['In Stock'] = np.nan
order_df['New Stock Arrival'] = 0

# Iterate through order_df to update 'In Stock' and 'New Stock Arrival'
for index, row in order_df.iterrows():
    product_id = row['Product Card Id']
    department_id = row['Department Id']
    date = row['Date']
    order_quantity = row['Quantity']
    
    # Set initial stock if it's the first occurrence for this combination
    if date == initial_stock[(product_id, department_id)]:
        order_df.loc[index, 'In Stock'] = initial_stock[(product_id, department_id)]
    
    else:
        # Find the previous index with the same product and department
        prev_index = order_df[(order_df['Product Card Id'] == product_id) & 
                              (order_df['Department Id'] == department_id) &
                              (order_df['Date'] < date)].index.max()
        
        if not np.isnan(prev_index):
            # Subtract order quantity from 'In Stock' of the previous row
            order_df.loc[index, 'In Stock'] = order_df.loc[prev_index, 'In Stock'] - order_quantity
            
            # Check if 'In Stock' goes below 20 and replenish
            if order_df.loc[index, 'In Stock'] < 20:
                replenish_amount = np.random.randint(100, 501)  # Random number between 100 and 500
                order_df.loc[index, 'In Stock'] += replenish_amount
                order_df.loc[index, 'New Stock Arrival'] = 1
        else:
            # Handle the first occurrence separately if no previous index is found
            order_df.loc[index, 'In Stock'] = initial_stock[(product_id, department_id)]

In [125]:
len(order_df)

22273

In [126]:
order_df[(order_df['Department Id'] == 2) & (order_df['Product Card Id'] == 1360)].head()

Unnamed: 0,Date,Product Card Id,Department Id,Quantity,In Stock,New Stock Arrival
22188,2017-12-02,1360,2,18,562.0,0
22189,2017-12-03,1360,2,69,493.0,0
22190,2017-12-04,1360,2,58,435.0,0
22245,2018-01-10,1360,2,38,397.0,0
22246,2018-01-11,1360,2,68,329.0,0


In [130]:
order_df[order_df['New Stock Arrival']> 0].head()

Unnamed: 0,Date,Product Card Id,Department Id,Quantity,In Stock,New Stock Arrival
121,2015-01-06,627,5,34,319.0,1
158,2015-01-08,365,4,83,80.0,1
182,2015-01-09,365,4,79,476.0,1
229,2015-01-11,502,5,67,344.0,1
318,2015-01-14,1014,7,62,371.0,1


In [131]:
order_df.to_excel('FakeInventory.xlsx', index=False)

print("order_df saved to processed_orders.xlsx successfully.")

order_df saved to processed_orders.xlsx successfully.
