# Create an inventory data set of 2m rows using Python and show some analysis using this data.

In [1]:
# Import different libraries

import pandas as pd
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
import uuid
import time
import calendar
import seaborn as sns

# Create a data set of 2m rows

In [2]:
# create a dataframe with random values in the 'COGS' column
df = pd.DataFrame(np.random.randint(100, 300, size=(2000000)), columns=['COGS'])

# generate unique identifier strings for each row and create a 'UID' column
df['UID'] = [uuid.uuid4().hex.upper()[:16] for _ in range(len(df.index))]

# create a 'SKU' column by concatenating 'SKU-' with the values in the 'UID' column
df['SKU'] = 'SKU-' + df['UID']

# specify a probability distribution for the 'Item_Status' column and generate random values
weights = [0.4, 0.1, 0.1, 0.3, 0.09, 0.01]
df['Item_Status'] = np.random.choice(['Delivered', 'Shipped', 'Packed', 'Received', 'picked', 'lost'], size=len(df), p=weights)

df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status
0,290,042785AEA04E4909,SKU-042785AEA04E4909,Packed
1,283,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,Packed
2,263,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,Packed
3,127,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,Delivered
4,282,A2E255EC93AA4051,SKU-A2E255EC93AA4051,Delivered


# Create Country Name

In [3]:
weights = [0.2, 0.15, 0.1, 0.15, 0.05, 0.13, 0.05, 0.17]
df['Country_Name'] = np.random.choice(['Germany', 'Spain', 'France', 'England', 'Portugal', 'Italy', 'Poland', 'Holland'],
                     size=(2000000), p=weights)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name
0,290,042785AEA04E4909,SKU-042785AEA04E4909,Packed,Germany
1,283,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,Packed,Spain
2,263,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,Packed,France
3,127,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,Delivered,France
4,282,A2E255EC93AA4051,SKU-A2E255EC93AA4051,Delivered,England


# Create Delivery note

In [4]:
df['Delivery note'] = np.random.randint(10000, size=(2000000))
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note
0,290,042785AEA04E4909,SKU-042785AEA04E4909,Packed,Germany,8869
1,283,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,Packed,Spain,579
2,263,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,Packed,France,7475
3,127,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,Delivered,France,1710
4,282,A2E255EC93AA4051,SKU-A2E255EC93AA4051,Delivered,England,6777


# Create items category

In [5]:
def a(df):
    if df['Delivery note']  >= 0 and df['Delivery note']  <=2000:
        val = 'Fashion'
    elif df['Delivery note']  > 2000 and df['Delivery note'] <=4000:
        val = 'Electronics'
    elif df['Delivery note']  > 4000 and df['Delivery note'] <=6000:
        val = 'Phones'
    elif df['Delivery note']  > 6000 and df['Delivery note'] <=8000:
        val = 'Furnitures'
    else:
        val = 'Computing'
    return val

df['Category'] = df.apply(a, axis=1)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category
0,290,042785AEA04E4909,SKU-042785AEA04E4909,Packed,Germany,8869,Computing
1,283,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,Packed,Spain,579,Fashion
2,263,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,Packed,France,7475,Furnitures
3,127,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,Delivered,France,1710,Fashion
4,282,A2E255EC93AA4051,SKU-A2E255EC93AA4051,Delivered,England,6777,Furnitures


# Create Serial number for the data

In [6]:
df['Serial_number'] = range (1, 2000001)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category,Serial_number
0,290,042785AEA04E4909,SKU-042785AEA04E4909,Packed,Germany,8869,Computing,1
1,283,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,Packed,Spain,579,Fashion,2
2,263,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,Packed,France,7475,Furnitures,3
3,127,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,Delivered,France,1710,Fashion,4
4,282,A2E255EC93AA4051,SKU-A2E255EC93AA4051,Delivered,England,6777,Furnitures,5


# Create Delivery date for the items

In [14]:
def random_dates(start, end, n, unit='D', seed=None):
    if not seed:
        np.random.seed(0)

    ndays = (end - start).days + 1
    return start + pd.to_timedelta(np.random.randint(0, ndays, n), unit=unit)

start = pd.to_datetime('2022-01-01')
end = pd.to_datetime('2022-12-31')

df['Delivery_date'] = random_dates(start, end, 2000000)
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category,Serial_number,Delivery_date,Delivery_month
0,290,042785AEA04E4909,SKU-042785AEA04E4909,Packed,Germany,8869,Computing,1,2022-06-22,2022-06
1,283,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,Packed,Spain,579,Fashion,2,2022-02-17,2022-02
2,263,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,Packed,France,7475,Furnitures,3,2022-04-28,2022-04
3,127,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,Delivered,France,1710,Fashion,4,2022-07-12,2022-07
4,282,A2E255EC93AA4051,SKU-A2E255EC93AA4051,Delivered,England,6777,Furnitures,5,2022-11-20,2022-11


# Create the Delivery month

In [15]:
df['Delivery_month'] = df['Delivery_date'].dt.to_period('M')
df.head()

Unnamed: 0,COGS,UID,SKU,Item_Status,Country_Name,Delivery note,Category,Serial_number,Delivery_date,Delivery_month
0,290,042785AEA04E4909,SKU-042785AEA04E4909,Packed,Germany,8869,Computing,1,2022-06-22,2022-06
1,283,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,Packed,Spain,579,Fashion,2,2022-02-17,2022-02
2,263,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,Packed,France,7475,Furnitures,3,2022-04-28,2022-04
3,127,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,Delivered,France,1710,Fashion,4,2022-07-12,2022-07
4,282,A2E255EC93AA4051,SKU-A2E255EC93AA4051,Delivered,England,6777,Furnitures,5,2022-11-20,2022-11


# Arrange the dataset

In [16]:
df_arranged = df[['Serial_number','Delivery_date', 'Delivery_month','UID','SKU','Delivery note', 'Category','COGS'
                  ,'Item_Status','Country_Name']]
df_arranged.head()

Unnamed: 0,Serial_number,Delivery_date,Delivery_month,UID,SKU,Delivery note,Category,COGS,Item_Status,Country_Name
0,1,2022-06-22,2022-06,042785AEA04E4909,SKU-042785AEA04E4909,8869,Computing,290,Packed,Germany
1,2,2022-02-17,2022-02,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,579,Fashion,283,Packed,Spain
2,3,2022-04-28,2022-04,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,7475,Furnitures,263,Packed,France
3,4,2022-07-12,2022-07,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,1710,Fashion,127,Delivered,France
4,5,2022-11-20,2022-11,A2E255EC93AA4051,SKU-A2E255EC93AA4051,6777,Furnitures,282,Delivered,England


# Set the Serial number column as index

In [17]:
df_arranged.set_index('Serial_number').head()

Unnamed: 0_level_0,Delivery_date,Delivery_month,UID,SKU,Delivery note,Category,COGS,Item_Status,Country_Name
Serial_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2022-06-22,2022-06,042785AEA04E4909,SKU-042785AEA04E4909,8869,Computing,290,Packed,Germany
2,2022-02-17,2022-02,22F2C64EAE484ABF,SKU-22F2C64EAE484ABF,579,Fashion,283,Packed,Spain
3,2022-04-28,2022-04,DFFE97ACE1FD4511,SKU-DFFE97ACE1FD4511,7475,Furnitures,263,Packed,France
4,2022-07-12,2022-07,98E31AF97D2D44A3,SKU-98E31AF97D2D44A3,1710,Fashion,127,Delivered,France
5,2022-11-20,2022-11,A2E255EC93AA4051,SKU-A2E255EC93AA4051,6777,Furnitures,282,Delivered,England


# Export the data into CSV file

In [18]:
df_arranged.to_csv('Inventory Management.csv',index=False)