# Fct_Orders Creator

The purpose of this Jupyter Notebook is to create the values for Fact Table Orders. Using the Date IDs from Dim_Dates, I used np.random.randint to create the span of time for each iteration of the Orders_Randomizer function. Also, as the overall length of "time" the company Pelipper Delivery Service has been active, we add on more towns and regions. We also expand the randomizer for the Number of Orders.

All of these pseudo-randomized variables are supposed to simulate scaling the business over time. 

In [57]:
import pandas as pd
import numpy as np

# NOTES: We have tried doing this randomization all the way for one year's worth of data, BUT
# What ends up happening is that Python's code is TOO EQUAL. The data can't differentiate itself
# From each item, month, trainer, etc.

# So what we have to do instead if manually randomize values to simulate scaling the business
# So that our results are more realistic

In [58]:
# Alternative Orders_Randomizer

# This one will be based on Day 1 = January 1, 2000
def Orders_Randomizer(dataframe, num_orders, day_start, day_end):
    dataframe = pd.DataFrame(columns=['A'])
    dataframe = pd.DataFrame(np.random.randint(1, 28, size=(num_orders, 1)), columns=['item_id'])
    dataframe['trainer_id'] = np.random.randint(1, 21, len(dataframe))
    dataframe['town_id'] = np.random.randint(1, 11, len(dataframe))
    dataframe['flyer_id'] = np.random.randint(1, 51, len(dataframe))
    dataframe['date_id'] = np.random.randint(day_start, day_end, len(dataframe))
    dataframe['qty'] = np.random.randint(1, 4, len(dataframe))
    return dataframe


In [59]:
# Orders_Randomizer(dataframe, num_orders, day_start, day_end)
df1 = Orders_Randomizer(df1, 100, 1, 30)
df2 = Orders_Randomizer(df2, 300, 30, 90)
df3 = Orders_Randomizer(df3, 550, 90, 180)
df4 = Orders_Randomizer(df4, 775, 180, 270)
df5 = Orders_Randomizer(df5, 1500, 270, 366)

# Concat all Dataframes together
dfs = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)
dfs.to_csv('Output.csv', index = False)

In [24]:
# In this Randomizer we have also added a randomizer on number of orders AND a multipler
# This should help randomize things even more

# Additions:
    # More Towns
    # Now we have Regions (Kanto, Johto, Hoenn, Sinnoh, Unova)
    # Keep same number of flyers
    # Keep same number of items
    # Keep same price of items


In [80]:
# This Randomizer is for the Year 2001  
def Orders_Randomizer2(dataframe, day_start, day_end, num_orders):
    
    # Blank dataframe
    dataframe = pd.DataFrame(columns=['A'])
    
    # Run through all the Fct_Orders fields
    dataframe = pd.DataFrame(np.random.randint(1, 28, size = (num_orders, 1)), columns=['item_id'])
    dataframe['trainer_id'] = np.random.randint(1, 21, len(dataframe))
    dataframe['town_id'] = np.random.randint(1, 27, len(dataframe))
    dataframe['flyer_id'] = np.random.randint(1, 51, len(dataframe))
    dataframe['date_id'] = np.random.randint(day_start, day_end, len(dataframe))
    dataframe['qty'] = np.random.randint(1, 4, len(dataframe))
    return dataframe

# Randomize the number of orders and a multipler
orders = np.random.randint(100, 1500)
multipler = np.random.randint(1, 6)

# Acquire num_orders based on random numbers above
num_orders = orders * multipler

# Orders_Randomizer2(dataframe, time_start, time_end)
df6 = Orders_Randomizer2(df1, 367, 457, num_orders)
df7 = Orders_Randomizer2(df1, 457, 548, num_orders)
df8 = Orders_Randomizer2(df1, 548, 640, num_orders)
df9 = Orders_Randomizer2(df1, 640, 732, num_orders)

dfs1 = pd.concat([df6, df7, df8, df9], ignore_index=True)

In [81]:
# This Randomizer is for the Year 2002
def Orders_Randomizer3(dataframe, day_start, day_end, num_orders):
    
    # Blank dataframe
    dataframe = pd.DataFrame(columns=['A'])
    
    # Run through all the Fct_Orders fields
    dataframe = pd.DataFrame(np.random.randint(1, 28, size = (num_orders, 1)), columns=['item_id'])
    dataframe['trainer_id'] = np.random.randint(1, 21, len(dataframe))
    dataframe['town_id'] = np.random.randint(1, 37, len(dataframe))
    dataframe['flyer_id'] = np.random.randint(1, 51, len(dataframe))
    dataframe['date_id'] = np.random.randint(day_start, day_end, len(dataframe))
    dataframe['qty'] = np.random.randint(1, 4, len(dataframe))
    return dataframe

# Randomize the number of orders and a multipler
orders = np.random.randint(250, 3000)
multipler = np.random.randint(1, 6)

# Acquire num_orders based on random numbers above
num_orders = orders * multipler

# Orders_Randomizer3
df6 = Orders_Randomizer3(df1, 732, 822, num_orders)
df7 = Orders_Randomizer3(df1, 822, 913, num_orders)
df8 = Orders_Randomizer3(df1, 913, 1005, num_orders)
df9 = Orders_Randomizer3(df1, 1005, 1097, num_orders)

dfs2 = pd.concat([df6, df7, df8, df9], ignore_index=True)

In [82]:
# This Randomizer is for the Year 2003
def Orders_Randomizer4(dataframe, day_start, day_end, num_orders):
    
    # Blank dataframe
    dataframe = pd.DataFrame(columns=['A'])
    
    # Run through all the Fct_Orders fields
    dataframe = pd.DataFrame(np.random.randint(1, 28, size=(num_orders, 1)), columns=['item_id'])
    dataframe['trainer_id'] = np.random.randint(1, 21, len(dataframe))
    dataframe['town_id'] = np.random.randint(1, 51, len(dataframe))
    dataframe['flyer_id'] = np.random.randint(1, 51, len(dataframe))
    dataframe['date_id'] = np.random.randint(day_start, day_end, len(dataframe))
    dataframe['qty'] = np.random.randint(1, 4, len(dataframe))
    return dataframe

# Randomize the number of orders and a multipler
orders = np.random.randint(400, 4200)
multipler = np.random.randint(2, 6)

# Acquire num_orders based on random numbers above
num_orders = orders * multipler

# Orders_Randomizer4
df6 = Orders_Randomizer4(df1, 1097, 1187, num_orders)
df7 = Orders_Randomizer4(df1, 1187, 1278, num_orders)
df8 = Orders_Randomizer4(df1, 1278, 1370, num_orders)
df9 = Orders_Randomizer4(df1, 1370, 1462, num_orders)

dfs3 = pd.concat([df6, df7, df8, df9], ignore_index=True)

In [83]:
# This Randomizer is for the Year 2004
def Orders_Randomizer5(dataframe, day_start, day_end, num_orders):
    
    # Blank dataframe
    dataframe = pd.DataFrame(columns=['A'])
    
    # Run through all the Fct_Orders fields
    dataframe = pd.DataFrame(np.random.randint(1, 28, size=(num_orders, 1)), columns=['item_id'])
    dataframe['trainer_id'] = np.random.randint(1, 21, len(dataframe))
    dataframe['town_id'] = np.random.randint(1, 71, len(dataframe))
    dataframe['flyer_id'] = np.random.randint(1, 51, len(dataframe))
    dataframe['date_id'] = np.random.randint(day_start, day_end, len(dataframe))
    dataframe['qty'] = np.random.randint(1, 4, len(dataframe))
    return dataframe

# Randomize the number of orders and a multipler
orders = np.random.randint(600, 5000)
multipler = np.random.randint(4, 8)

# Acquire num_orders based on random numbers above
num_orders = orders * multipler

# Orders_Randomizer5
df6 = Orders_Randomizer5(df1, 1462, 1553, num_orders)
df7 = Orders_Randomizer5(df1, 1553, 1644, num_orders)
df8 = Orders_Randomizer5(df1, 1644, 1736, num_orders)
df9 = Orders_Randomizer5(df1, 1736, 1828, num_orders)

dfs4 = pd.concat([df6, df7, df8, df9], ignore_index=True)

# Big Concat all of the smaller concats
bigdfs = pd.concat([dfs1, dfs2, dfs3, dfs4], ignore_index = True)

bigdfs.to_csv('PDS2.csv', index = False)