In [1]:
import pandas as pd
import numpy as np

# Load dataset into a DataFrame
df = pd.read_csv('DataCoSupplyChainDataset.csv', encoding='latin-1')

# Convert 'order date (DateOrders)' to datetime
df['order date (DateOrders)'] = pd.to_datetime(df['order date (DateOrders)'])
df['Date'] = df['order date (DateOrders)'].dt.date  # Extract date part only

# Filter out rows where 'Delivery Status' is 'Shipping canceled'
filtered_df = df[df['Delivery Status'] != 'Shipping canceled']

# Group by orders to subtract daily orders of each product within each department from inventory
order_df = filtered_df.groupby(['Date', 'Product Card Id', 'Department Id'])['Order Item Quantity'].sum().reset_index()
order_df.rename(columns={'Order Item Quantity': 'Quantity'}, inplace=True)

# Sort order_df by Date
order_df.sort_values(by='Date', inplace=True)

# Initialize a dictionary to store initial stock quantities and their start dates
initial_stock = {}
initial_stock_dates = {}

# Set initial stock for each combination of Product Card Id and Department Id
for (product_id, dept_id), group in order_df.groupby(['Product Card Id', 'Department Id']):
    min_date = group['Date'].min()
    initial_stock[(product_id, dept_id)] = np.random.randint(200, 250)  # Random number between 200 and 250
    initial_stock_dates[(product_id, dept_id)] = min_date

# Add 'In Stock' and 'New Stock Arrival' columns to order_df
order_df['In Stock'] = np.nan
order_df['New Stock Arrival'] = 0

# Iterate through order_df to update 'In Stock' and 'New Stock Arrival'
for index, row in order_df.iterrows():
    product_id = row['Product Card Id']
    department_id = row['Department Id']
    date = row['Date']
    order_quantity = row['Quantity']
    
    # Check if it's the first occurrence for this combination
    if date == initial_stock_dates[(product_id, department_id)]:
        # Set initial stock
        order_df.loc[index, 'In Stock'] = initial_stock[(product_id, department_id)]
    else:
        # Find the previous index with the same product and department
        prev_index = order_df[(order_df['Product Card Id'] == product_id) & 
                              (order_df['Department Id'] == department_id) &
                              (order_df['Date'] < date)].index.max()
        
        if not np.isnan(prev_index):
            # Subtract order quantity from 'In Stock' of the previous row
            order_df.loc[index, 'In Stock'] = order_df.loc[prev_index, 'In Stock'] - order_quantity
            
            # Check if 'In Stock' goes below 133 and replenish
            if order_df.loc[index, 'In Stock'] < 133:
                replenish_amount = np.random.randint(133, 200)  # Random number between 133 and 200
                order_df.loc[index, 'In Stock'] += replenish_amount
                order_df.loc[index, 'New Stock Arrival'] = 1
        else:
            # Handle the case where no previous index is found
            order_df.loc[index, 'In Stock'] = initial_stock[(product_id, department_id)]

In [2]:
# Rename 'Product Card Id' to 'Product Id'
order_df.rename(columns={'Product Card Id': 'Product Id'}, inplace=True)

# Add prefix 'D' to 'Department Id'
order_df['Department Id'] = 'D' + order_df['Department Id'].astype(str)

In [3]:
len(order_df)

21787

In [4]:
order_df[(order_df['Department Id'] == 'D2') & (order_df['Product Id'] == 1360)].head()

Unnamed: 0,Date,Product Id,Department Id,Quantity,In Stock,New Stock Arrival
21702,2017-12-02,1360,D2,17,208.0,0
21703,2017-12-03,1360,D2,65,143.0,0
21704,2017-12-04,1360,D2,55,244.0,1
21759,2018-01-10,1360,D2,35,209.0,0
21760,2018-01-11,1360,D2,66,143.0,0


In [5]:
order_df[order_df['New Stock Arrival']> 0].head()

Unnamed: 0,Date,Product Id,Department Id,Quantity,In Stock,New Stock Arrival
63,2015-01-03,1014,D7,54,264.0,1
49,2015-01-03,502,D5,44,305.0,1
47,2015-01-03,365,D4,73,248.0,1
68,2015-01-04,191,D3,44,270.0,1
73,2015-01-04,627,D5,50,283.0,1


In [6]:
len(order_df[order_df['New Stock Arrival']>0])

2194

In [7]:
# One supplier for each department
unique_departments = order_df['Department Id'].unique()
unique_departments

array(['D2', 'D7', 'D6', 'D3', 'D4', 'D5', 'D8', 'D9', 'D10', 'D12',
       'D11'], dtype=object)

In [8]:
num_departments = len(unique_departments)

In [20]:
from faker import Faker
import random
import pandas as pd

# Initialize Faker
fake = Faker()

# Function to generate a realistic 10-digit phone number in the format XXX-XXX-XXXX
def generate_phone_number():
    area_code = random.randint(100, 999)  # Area code range from 100 to 999
    central_office_code = random.randint(100, 999)  # Central office code range from 100 to 999
    subscriber_number = random.randint(1000, 9999)  # Subscriber number range from 1000 to 9999
    return f'{area_code}-{central_office_code}-{subscriber_number}'

# Assume these variables are defined somewhere in your code
num_departments = len(unique_departments)
unique_departments = order_df['Department Id'].unique()  # Example line to get unique departments

# Create lists for the supplier data
supplier_ids = [f'S{i}' for i in range(1, num_departments + 1)]  # Prefix 'S' added to each Supplier ID
department_ids = unique_departments
supplier_names = [fake.company() for _ in range(num_departments)]
contact_names = [fake.name() for _ in range(num_departments)]
contact_emails = [fake.email() for _ in range(num_departments)]
phone_numbers = [generate_phone_number() for _ in range(num_departments)]  # Ensure formatted phone numbers

# Create a DataFrame to store supplier information
supplier_df = pd.DataFrame({
    'Supplier ID': supplier_ids,
    'Department Id': department_ids,
    'Supplier Name': supplier_names,
    'Contact Name': contact_names,
    'Contact Email': contact_emails,
    'Phone Number': phone_numbers
})

In [21]:
supplier_df

Unnamed: 0,Supplier ID,Department Id,Supplier Name,Contact Name,Contact Email,Phone Number
0,S1,D2,"Adkins, Kennedy and Gomez",Brandon Smith,lambertkaren@example.org,255-412-3767
1,S2,D7,Walters Group,Sharon Bailey,pmorales@example.com,120-667-2951
2,S3,D6,Michael Inc,Jason Barnes,ginamiller@example.net,757-507-6083
3,S4,D3,Rowland and Sons,Sean Mclaughlin,thomaswells@example.org,906-403-1108
4,S5,D4,Morales-Obrien,Mrs. Sophia Richmond,parkerfinley@example.net,585-862-5757
5,S6,D5,"Jackson, Scott and Davis",Mary Chaney,antonio27@example.net,708-483-7848
6,S7,D8,Santos-Christian,Christina Barker,marywilliams@example.org,246-907-8925
7,S8,D9,Morris-Bishop,Jennifer Howard,tyler44@example.com,631-642-3132
8,S9,D10,Phillips-Mclaughlin,Juan Simpson,jillianmccall@example.org,415-568-7897
9,S10,D12,"Gordon, Howell and Glover",Anne James,ambercarter@example.org,390-623-4153


In [11]:
# Save supplier information to an Excel file
supplier_df.to_excel('suppliers.xlsx', index=False)

In [12]:
department_supplier_map = supplier_df.set_index('Department Id')['Supplier ID'].to_dict()
department_supplier_map

{'D2': 1,
 'D7': 2,
 'D6': 3,
 'D3': 4,
 'D4': 5,
 'D5': 6,
 'D8': 7,
 'D9': 8,
 'D10': 9,
 'D12': 10,
 'D11': 11}

In [13]:
supplier_df.head()

Unnamed: 0,Supplier ID,Department Id,Supplier Name,Contact Name,Contact Email,Phone Number
0,1,D2,Wilson LLC,Adam Smith,moorecaitlin@example.net,839-621-97
1,2,D7,Rodriguez Group,Kevin Miller,hstevens@example.org,186-739-80
2,3,D6,"Knapp, Johnson and Mitchell",Christina Shelton,amysteele@example.org,370-529-88
3,4,D3,"Holloway, Tucker and Hawkins",Alexandra Ramos,mariotownsend@example.net,343-900-80
4,5,D4,"Garcia, Daniels and Wright",Mr. Daniel Bentley,heather39@example.com,795-342-30


In [14]:
# Create a dictionary to map each department to its corresponding supplier ID
department_supplier_map = supplier_df.set_index('Department Id')['Supplier ID'].to_dict()

# Add the supplier column to order_df, setting it to null by default
order_df['Supplier ID'] = np.nan

# Update the 'Supplier ID' column only if 'New Stock Arrival' is 1
order_df.loc[order_df['New Stock Arrival'] == 1, 'Supplier ID'] = order_df['Department Id'].map(department_supplier_map)

# Convert Supplier ID to type int and fill na with -1 ensuring no match with supplier
#order_df['Supplier ID'] = order_df['Supplier ID'].fillna(-1).astype(int)

# Print the resulting order_df for inspection
print(order_df[['Department Id', 'Supplier ID', 'New Stock Arrival']].head())

   Department Id  Supplier ID  New Stock Arrival
0             D2          NaN                  0
23            D7          NaN                  0
22            D7          NaN                  0
21            D7          NaN                  0
20            D7          NaN                  0


In [15]:
order_df.to_excel('FakeInventory.xlsx', index=False)

print("order_df saved to processed_orders.xlsx successfully.")

order_df saved to processed_orders.xlsx successfully.


In [16]:
order_df[order_df['New Stock Arrival'] == 1].head()

Unnamed: 0,Date,Product Id,Department Id,Quantity,In Stock,New Stock Arrival,Supplier ID
63,2015-01-03,1014,D7,54,264.0,1,2.0
49,2015-01-03,502,D5,44,305.0,1,6.0
47,2015-01-03,365,D4,73,248.0,1,5.0
68,2015-01-04,191,D3,44,270.0,1,4.0
73,2015-01-04,627,D5,50,283.0,1,6.0


In [17]:
# Read the supplier information from the Excel file
supplier_df = pd.read_excel('suppliers.xlsx')

supplier_df

Unnamed: 0,Supplier ID,Department Id,Supplier Name,Contact Name,Contact Email,Phone Number
0,1,D2,Wilson LLC,Adam Smith,moorecaitlin@example.net,839-621-97
1,2,D7,Rodriguez Group,Kevin Miller,hstevens@example.org,186-739-80
2,3,D6,"Knapp, Johnson and Mitchell",Christina Shelton,amysteele@example.org,370-529-88
3,4,D3,"Holloway, Tucker and Hawkins",Alexandra Ramos,mariotownsend@example.net,343-900-80
4,5,D4,"Garcia, Daniels and Wright",Mr. Daniel Bentley,heather39@example.com,795-342-30
5,6,D5,Browning PLC,Bryan Craig DDS,laceycamacho@example.com,739-451-28
6,7,D8,Dean Inc,John Smith,terrigrant@example.com,392-544-77
7,8,D9,Myers Ltd,Donald Edwards,jessicamartin@example.net,673-469-73
8,9,D10,Meyer Group,Alexandra Rodriguez,elizabeth36@example.net,241-854-92
9,10,D12,"Sandoval, Thompson and Hernandez",Kelly Reese,jamespruitt@example.net,851-831-15


In [18]:
supplier_df['Supplier ID'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11], dtype=int64)

In [None]:
supplier_df['Department Id'].unique()