In [1]:
import pandas as pd
import numpy as np

# Load dataset into a DataFrame
df = pd.read_csv('DataCoSupplyChainDataset.csv', encoding='latin-1')

# Convert 'order date (DateOrders)' to datetime
df['order date (DateOrders)'] = pd.to_datetime(df['order date (DateOrders)'])
df['Date'] = df['order date (DateOrders)'].dt.date  # Extract date part only

# Filter out rows where 'Delivery Status' is 'Shipping canceled'
filtered_df = df[df['Delivery Status'] != 'Shipping canceled']

# Group by orders to subtract daily orders of each product within each department from inventory
order_df = filtered_df.groupby(['Date', 'Product Card Id', 'Department Id'])['Order Item Quantity'].sum().reset_index()
order_df.rename(columns={'Order Item Quantity': 'Quantity'}, inplace=True)

# Sort order_df by Date
order_df.sort_values(by='Date', inplace=True)

# Initialize a dictionary to store initial stock quantities and their start dates
initial_stock = {}
initial_stock_dates = {}

# Set initial stock for each combination of Product Card Id and Department Id
for (product_id, dept_id), group in order_df.groupby(['Product Card Id', 'Department Id']):
    min_date = group['Date'].min()
    initial_stock[(product_id, dept_id)] = np.random.randint(200, 250)  # Random number between 200 and 250
    initial_stock_dates[(product_id, dept_id)] = min_date

# Add 'In Stock' and 'New Stock Arrival' columns to order_df
order_df['In Stock'] = np.nan
order_df['New Stock Arrival'] = 0
order_df['Replenish Amount'] = 0

# Iterate through order_df to update 'In Stock' and 'New Stock Arrival'
for index, row in order_df.iterrows():
    product_id = row['Product Card Id']
    department_id = row['Department Id']
    date = row['Date']
    order_quantity = row['Quantity']
    
    # Check if it's the first occurrence for this combination
    if date == initial_stock_dates[(product_id, department_id)]:
        # Set initial stock
        order_df.loc[index, 'In Stock'] = initial_stock[(product_id, department_id)]
    else:
        # Find the previous index with the same product and department
        prev_index = order_df[(order_df['Product Card Id'] == product_id) & 
                              (order_df['Department Id'] == department_id) &
                              (order_df['Date'] < date)].index.max()
        
        if not np.isnan(prev_index):
            # Subtract order quantity from 'In Stock' of the previous row
            order_df.loc[index, 'In Stock'] = order_df.loc[prev_index, 'In Stock'] - order_quantity
            
            # Check if 'In Stock' goes below 133 and replenish
            if order_df.loc[index, 'In Stock'] < 133:
                replenish_amount = np.random.randint(133, 200)  # Random number between 133 and 200
                order_df.loc[index, 'In Stock'] += replenish_amount
                order_df.loc[index, 'New Stock Arrival'] = 1
                order_df.loc[index, 'Replenish Amount'] = replenish_amount
        else:
            # Handle the case where no previous index is found
            order_df.loc[index, 'In Stock'] = initial_stock[(product_id, department_id)]

In [2]:
# Rename 'Product Card Id' to 'Product Id'
order_df.rename(columns={'Product Card Id': 'Product Id'}, inplace=True)

# Add prefix 'D' to 'Department Id'
order_df['Department Id'] = 'D' + order_df['Department Id'].astype(str)

In [3]:
len(order_df)

21787

In [4]:
order_df[(order_df['Department Id'] == 'D2') & (order_df['Product Id'] == 1360)].head()

Unnamed: 0,Date,Product Id,Department Id,Quantity,In Stock,New Stock Arrival,Replenish Amount
21702,2017-12-02,1360,D2,17,232.0,0,0
21703,2017-12-03,1360,D2,65,167.0,0,0
21704,2017-12-04,1360,D2,55,263.0,1,151
21759,2018-01-10,1360,D2,35,228.0,0,0
21760,2018-01-11,1360,D2,66,162.0,0,0


In [5]:
order_df[order_df['New Stock Arrival']> 0].head()

Unnamed: 0,Date,Product Id,Department Id,Quantity,In Stock,New Stock Arrival,Replenish Amount
63,2015-01-03,1014,D7,54,265.0,1,153
47,2015-01-03,365,D4,73,233.0,1,134
44,2015-01-03,191,D3,59,260.0,1,134
71,2015-01-04,502,D5,75,277.0,1,198
73,2015-01-04,627,D5,50,285.0,1,170


In [6]:
len(order_df[order_df['New Stock Arrival']>0])

2194

In [7]:
# One supplier for each department
unique_departments = order_df['Department Id'].unique()
unique_departments

array(['D2', 'D7', 'D6', 'D3', 'D4', 'D5', 'D8', 'D9', 'D10', 'D12',
       'D11'], dtype=object)

In [8]:
num_departments = len(unique_departments)

In [9]:
from faker import Faker
import random
import pandas as pd

# Initialize Faker
fake = Faker()

# Function to generate a realistic 10-digit phone number in the format XXX-XXX-XXXX
def generate_phone_number():
    area_code = random.randint(100, 999)  # Area code range from 100 to 999
    central_office_code = random.randint(100, 999)  # Central office code range from 100 to 999
    subscriber_number = random.randint(1000, 9999)  # Subscriber number range from 1000 to 9999
    return f'{area_code}-{central_office_code}-{subscriber_number}'

num_departments = len(unique_departments)
unique_departments = order_df['Department Id'].unique()  # Example line to get unique departments

# Create lists for the supplier data
supplier_ids = [f'S{i}' for i in range(1, num_departments + 1)]  # Prefix 'S' added to each Supplier ID
department_ids = unique_departments
supplier_names = [fake.company() for _ in range(num_departments)]
contact_names = [fake.name() for _ in range(num_departments)]
contact_emails = [fake.email() for _ in range(num_departments)]
phone_numbers = [generate_phone_number() for _ in range(num_departments)]  # Ensure formatted phone numbers

# Create a DataFrame to store supplier information
supplier_df = pd.DataFrame({
    'Supplier ID': supplier_ids,
    'Department Id': department_ids,
    'Supplier Name': supplier_names,
    'Contact Name': contact_names,
    'Contact Email': contact_emails,
    'Phone Number': phone_numbers
})

In [10]:
supplier_df

Unnamed: 0,Supplier ID,Department Id,Supplier Name,Contact Name,Contact Email,Phone Number
0,S1,D2,Garcia-Lopez,Brian Smith,breanna88@example.net,338-625-6146
1,S2,D7,Boone Ltd,Mr. Scott Conrad,diamondmacdonald@example.net,827-419-4819
2,S3,D6,Young-Mosley,Richard Cox,kelly17@example.org,380-602-6805
3,S4,D3,Stevens-Austin,Robert Brown,jonathanwilliams@example.org,961-397-8129
4,S5,D4,White-Baldwin,Aaron Silva,miranda69@example.org,530-804-3630
5,S6,D5,Gray LLC,Nathaniel Coleman,guzmanjames@example.com,537-974-2039
6,S7,D8,"Conway, Spencer and Green",Tonya Maynard,jeffrey42@example.org,725-778-3599
7,S8,D9,Kelly PLC,Tammy Walker,rwalker@example.com,975-824-2186
8,S9,D10,Ingram Group,Nathan Davidson,lorismith@example.com,543-610-5029
9,S10,D12,Allen PLC,Annette Casey,meganmurphy@example.net,923-327-9025


In [11]:
# Save supplier information to an Excel file
supplier_df.to_excel('suppliers.xlsx', index=False)

In [12]:
department_supplier_map = supplier_df.set_index('Department Id')['Supplier ID'].to_dict()
department_supplier_map

{'D2': 'S1',
 'D7': 'S2',
 'D6': 'S3',
 'D3': 'S4',
 'D4': 'S5',
 'D5': 'S6',
 'D8': 'S7',
 'D9': 'S8',
 'D10': 'S9',
 'D12': 'S10',
 'D11': 'S11'}

In [13]:
supplier_df.head()

Unnamed: 0,Supplier ID,Department Id,Supplier Name,Contact Name,Contact Email,Phone Number
0,S1,D2,Garcia-Lopez,Brian Smith,breanna88@example.net,338-625-6146
1,S2,D7,Boone Ltd,Mr. Scott Conrad,diamondmacdonald@example.net,827-419-4819
2,S3,D6,Young-Mosley,Richard Cox,kelly17@example.org,380-602-6805
3,S4,D3,Stevens-Austin,Robert Brown,jonathanwilliams@example.org,961-397-8129
4,S5,D4,White-Baldwin,Aaron Silva,miranda69@example.org,530-804-3630


In [14]:
# Create a dictionary to map each department to its corresponding supplier ID
department_supplier_map = supplier_df.set_index('Department Id')['Supplier ID'].to_dict()

# Add the supplier column to order_df, setting it to null by default
order_df['Supplier ID'] = np.nan
order_df['Supplier ID'] = order_df['Supplier ID'].astype(str)

# Update the 'Supplier ID' column only if 'New Stock Arrival' is 1
order_df.loc[order_df['New Stock Arrival'] == 1, 'Supplier ID'] = order_df['Department Id'].map(department_supplier_map)

# Convert Supplier ID to type int and fill na with -1 ensuring no match with supplier
#order_df['Supplier ID'] = order_df['Supplier ID'].fillna(-1).astype(int)

# Print the resulting order_df for inspection
print(order_df[['Department Id', 'Supplier ID', 'New Stock Arrival']].head())

   Department Id Supplier ID  New Stock Arrival
0             D2         nan                  0
23            D7         nan                  0
22            D7         nan                  0
21            D7         nan                  0
20            D7         nan                  0


In [15]:
order_df[order_df['New Stock Arrival'] == 1].head()

Unnamed: 0,Date,Product Id,Department Id,Quantity,In Stock,New Stock Arrival,Replenish Amount,Supplier ID
63,2015-01-03,1014,D7,54,265.0,1,153,S2
47,2015-01-03,365,D4,73,233.0,1,134,S5
44,2015-01-03,191,D3,59,260.0,1,134,S4
71,2015-01-04,502,D5,75,277.0,1,198,S6
73,2015-01-04,627,D5,50,285.0,1,170,S6


In [16]:
supplier_df['Supplier ID'].unique()

array(['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11'],
      dtype=object)

In [17]:
supplier_df['Department Id'].unique()

array(['D2', 'D7', 'D6', 'D3', 'D4', 'D5', 'D8', 'D9', 'D10', 'D12',
       'D11'], dtype=object)

In [18]:
order_df.columns

Index(['Date', 'Product Id', 'Department Id', 'Quantity', 'In Stock',
       'New Stock Arrival', 'Replenish Amount', 'Supplier ID'],
      dtype='object')

In [19]:
# Merging order_df with df on 'Product Id' and 'Product Card Id' using a left join
merged_df = order_df.merge(df[['Product Card Id', 'Product Category Id']].drop_duplicates(), 
                           left_on='Product Id', right_on='Product Card Id', how='left')

# Dropping the 'Product Card Id' column as it's redundant after the merge
merged_df.drop(columns=['Product Card Id'], inplace=True)

In [20]:
merged_df.to_excel('FakeInventory_Final.xlsx', index=False)

print("Merged_df saved to processed_orders.xlsx successfully.")

Merged_df saved to processed_orders.xlsx successfully.
