# Create an inventory data set of 2m rows using Python and show some analysis using this data.

In [16]:
# Import different libraries

import pandas as pd
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
import uuid
import time
import calendar
import seaborn as sns

# Create a data set of 2m rows

In [28]:
# create a dataframe with random values in the 'COGS' column
df = pd.DataFrame(np.random.randint(100, 300, size=(2000000)), columns=['COGS'])

# generate unique identifier strings for each row and create a 'UID' column
df['UID'] = [uuid.uuid4().hex.upper()[:16] for _ in range(len(df.index))]

# create a 'SKU' column by concatenating 'SKU-' with the values in the 'UID' column
df['SKU'] = 'SKU-' + df['UID']

# specify a probability distribution for the 'Item_Status' column and generate random values
weights = [0.8, 0.05, 0.05, 0.05,0.05]
df['Item_Status'] = np.random.choice(['Delivered', 'Shipped', 'Packed', 'Received', 'lost'], size=len(df), p=weights)

df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status
0,168,DBFF2353E4794747,SKU-DBFF2353E4794747,Delivered
1,146,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,Delivered
2,248,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,Received
3,156,643029223A6D4030,SKU-643029223A6D4030,Delivered
4,189,23019617DD11413F,SKU-23019617DD11413F,Delivered


# Create Country Name

In [29]:
weights = [0.2, 0.15, 0.1, 0.15, 0.05, 0.13, 0.05, 0.17]
df['Country_Name'] = np.random.choice(['Germany', 'Spain', 'France', 'England', 'Portugal', 'Italy', 'Poland', 'Holland'],
                     size=(2000000), p=weights)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name
0,168,DBFF2353E4794747,SKU-DBFF2353E4794747,Delivered,Poland
1,146,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,Delivered,Holland
2,248,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,Received,Holland
3,156,643029223A6D4030,SKU-643029223A6D4030,Delivered,Italy
4,189,23019617DD11413F,SKU-23019617DD11413F,Delivered,England


# Create Delivery note

In [30]:
df['Delivery note'] = np.random.randint(10000, size=(2000000))
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note
0,168,DBFF2353E4794747,SKU-DBFF2353E4794747,Delivered,Poland,7978
1,146,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,Delivered,Holland,2861
2,248,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,Received,Holland,4300
3,156,643029223A6D4030,SKU-643029223A6D4030,Delivered,Italy,7422
4,189,23019617DD11413F,SKU-23019617DD11413F,Delivered,England,3196


# Create items category

In [31]:
def a(df):
    if df['Delivery note']  >= 0 and df['Delivery note']  <=2000:
        val = 'Fashion'
    elif df['Delivery note']  > 2000 and df['Delivery note'] <=4000:
        val = 'Electronics'
    elif df['Delivery note']  > 4000 and df['Delivery note'] <=6000:
        val = 'Phones'
    elif df['Delivery note']  > 6000 and df['Delivery note'] <=8000:
        val = 'Furnitures'
    else:
        val = 'Computing'
    return val

df['Category'] = df.apply(a, axis=1)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category
0,168,DBFF2353E4794747,SKU-DBFF2353E4794747,Delivered,Poland,7978,Furnitures
1,146,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,Delivered,Holland,2861,Electronics
2,248,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,Received,Holland,4300,Phones
3,156,643029223A6D4030,SKU-643029223A6D4030,Delivered,Italy,7422,Furnitures
4,189,23019617DD11413F,SKU-23019617DD11413F,Delivered,England,3196,Electronics


# Create Serial number for the data

In [32]:
df['Serial_number'] = range (1, 2000001)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category,Serial_number
0,168,DBFF2353E4794747,SKU-DBFF2353E4794747,Delivered,Poland,7978,Furnitures,1
1,146,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,Delivered,Holland,2861,Electronics,2
2,248,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,Received,Holland,4300,Phones,3
3,156,643029223A6D4030,SKU-643029223A6D4030,Delivered,Italy,7422,Furnitures,4
4,189,23019617DD11413F,SKU-23019617DD11413F,Delivered,England,3196,Electronics,5


# Create Received date for the items

In [33]:
def random_dates(start, end, n, unit='D', seed=None):
    if not seed:
        np.random.seed(0)

    ndays = (end - start).days + 1
    return start + pd.to_timedelta(np.random.randint(0, ndays, n), unit=unit)

start = pd.to_datetime('2022-01-01')
end = pd.to_datetime('2022-12-31')

df['Received_date'] = random_dates(start, end, 2000000)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category,Serial_number,Received_date
0,168,DBFF2353E4794747,SKU-DBFF2353E4794747,Delivered,Poland,7978,Furnitures,1,2022-06-22
1,146,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,Delivered,Holland,2861,Electronics,2,2022-02-17
2,248,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,Received,Holland,4300,Phones,3,2022-04-28
3,156,643029223A6D4030,SKU-643029223A6D4030,Delivered,Italy,7422,Furnitures,4,2022-07-12
4,189,23019617DD11413F,SKU-23019617DD11413F,Delivered,England,3196,Electronics,5,2022-11-20


# Create the Received month from the Received date

In [34]:
df['Received_month'] = df['Received_date'].dt.to_period('M')
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category,Serial_number,Received_date,Received_month
0,168,DBFF2353E4794747,SKU-DBFF2353E4794747,Delivered,Poland,7978,Furnitures,1,2022-06-22,2022-06
1,146,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,Delivered,Holland,2861,Electronics,2,2022-02-17,2022-02
2,248,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,Received,Holland,4300,Phones,3,2022-04-28,2022-04
3,156,643029223A6D4030,SKU-643029223A6D4030,Delivered,Italy,7422,Furnitures,4,2022-07-12,2022-07
4,189,23019617DD11413F,SKU-23019617DD11413F,Delivered,England,3196,Electronics,5,2022-11-20,2022-11


# Arrange the dataset

In [35]:
df_arranged = df[['Serial_number','Received_date', 'Received_month','UID','SKU','Delivery note', 'Category','COGS'
                  ,'Item_Status','Country_Name']]
df_arranged.head()

Unnamed: 0,Serial_number,Received_date,Received_month,UID,SKU,Delivery note,Category,COGS,Item_Status,Country_Name
0,1,2022-06-22,2022-06,DBFF2353E4794747,SKU-DBFF2353E4794747,7978,Furnitures,168,Delivered,Poland
1,2,2022-02-17,2022-02,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,2861,Electronics,146,Delivered,Holland
2,3,2022-04-28,2022-04,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,4300,Phones,248,Received,Holland
3,4,2022-07-12,2022-07,643029223A6D4030,SKU-643029223A6D4030,7422,Furnitures,156,Delivered,Italy
4,5,2022-11-20,2022-11,23019617DD11413F,SKU-23019617DD11413F,3196,Electronics,189,Delivered,England


# Set the Serial number column as index

In [36]:
df_arranged.set_index('Serial_number').head()

Unnamed: 0_level_0,Received_date,Received_month,UID,SKU,Delivery note,Category,COGS,Item_Status,Country_Name
Serial_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2022-06-22,2022-06,DBFF2353E4794747,SKU-DBFF2353E4794747,7978,Furnitures,168,Delivered,Poland
2,2022-02-17,2022-02,2ADA2FA9DF8940D2,SKU-2ADA2FA9DF8940D2,2861,Electronics,146,Delivered,Holland
3,2022-04-28,2022-04,DB78C0CAAC754E8A,SKU-DB78C0CAAC754E8A,4300,Phones,248,Received,Holland
4,2022-07-12,2022-07,643029223A6D4030,SKU-643029223A6D4030,7422,Furnitures,156,Delivered,Italy
5,2022-11-20,2022-11,23019617DD11413F,SKU-23019617DD11413F,3196,Electronics,189,Delivered,England


# Export the data into CSV file

In [37]:
df_arranged.to_csv('Inventory Management.csv',index=False)