# Random Data
https://pypi.org/project/Faker/

Oprettelse af følgende data filer:

- Customeres
- Employees
- Orders
- Products - 20 produkter fast defineret

## Faker
### install Faker
pip install Faker

## XlsxWriter
pip install XlsxWriter

In [1]:
# Import af moduler
from faker import Factory
import pandas as pd
import random
import xlsxwriter

In [2]:
# Use Faker
fake = Factory.create()

## Antal
Du kan her vælge hvor mange; customeres, sales og employees du vil have i dit datasæt.
Bemærk dog at hvis du sætter tallet *meget* højt vil det tage relativ lang tid at generer data.

In [3]:
# Antal
no_customeres = 20
no_sales = 5000
no_employee = 10

## Customers

In [4]:
# Customers Dataframe
df_customers = pd.DataFrame(columns=[
    'customer_id',
    'first_name',
    'last_name',
    'address',
    'postcode',
    'city',
    'country',
    'ascii_safe_email',
    'date_of_birth'])

In [5]:
# Tilføj Customers
for i in range(no_customeres):
    data = [
        i+1,
        fake.first_name(),
        fake.last_name(),  
        fake.address(),
        fake.postcode(),    
        fake.city(),
        fake.country(),
        fake.ascii_safe_email(),
        fake.date_of_birth()]
    df_customers.loc[i] = [item for item in data]

In [6]:
df_customers.head()

Unnamed: 0,customer_id,first_name,last_name,address,postcode,city,country,ascii_safe_email,date_of_birth
0,1,Mary,Beck,"6536 Daniel Underpass\nNew Bethanyton, AL 73886",8752,Lake Michelletown,French Southern Territories,sarah34@example.com,1965-12-22
1,2,Kristen,Chandler,"94671 Murray Stream Apt. 740\nSouth David, LA ...",8236,North Sarahberg,Mali,ryanmunoz@example.com,1917-09-22
2,3,Timothy,Gaines,313 Buchanan Ville Apt. 405\nLake Patriciamout...,14461,New Kevin,United Kingdom,cameronfisher@example.com,1917-08-30
3,4,James,Montgomery,"894 Paige Ranch\nPort Brian, FL 41318",27728,West Steven,Turkmenistan,sarahjones@example.net,1946-06-25
4,5,Brent,Carr,"18309 Michael Union\nNew Crystal, MS 21275",68682,Hansenbury,Mali,chapmanryan@example.net,1976-01-05


## Order

In [7]:
## Order Dataframe
df_order = pd.DataFrame(columns=[
    'order_id',
    'product_id',
    'quantity',
    'customer_id',
    'orderdate',
    'employee_id'])

In [9]:
# Tilføj Order
for i in range(no_sales):
    data = [
        fake.random_int(10000, 15000),
        fake.random_int(0, 20),
        fake.random_int(1, 100),
        fake.random_int(0, 200),    
        fake.date_between(start_date='-3y', end_date='now'),
        fake.random_int(1, no_employee)]
    df_order.loc[i] = [item for item in data]

In [10]:
# Tilføj Deliverydate
df_order['deliverydate'] = df_order['orderdate'] + pd.DateOffset(days=fake.random_int(0, 12))

In [11]:
#df_order.head()
df_order.tail()

Unnamed: 0,order_id,product_id,quantity,customer_id,orderdate,employee_id,deliverydate
4995,10080,7,58,143,2021-09-30,8,2021-10-07
4996,11262,17,87,119,2020-10-20,3,2020-10-27
4997,13062,5,15,68,2021-09-23,9,2021-09-30
4998,11860,19,57,95,2020-07-25,10,2020-08-01
4999,13388,5,82,138,2021-09-15,7,2021-09-22


## Employee

In [12]:
## Employee Dataframe
df_employee = pd.DataFrame(columns=[
    'employee_id',
    'firstname',
    'lastname',
    'date_of_birth'])

In [13]:
# Tilføj Employee
for i in range(no_employee):
    data = [
        i+1,
        fake.first_name(),
        fake.last_name(),  
        fake.date_of_birth()]
    df_employee.loc[i] = [item for item in data]

In [14]:
df_employee

Unnamed: 0,employee_id,firstname,lastname,date_of_birth
0,1,Ryan,Benitez,1949-01-26
1,2,Eileen,Ross,1982-11-08
2,3,Yvonne,Rowe,1948-07-16
3,4,William,Turner,1974-10-19
4,5,Benjamin,Chavez,1975-01-26
5,6,Jennifer,Rocha,1958-04-04
6,7,Martin,Dunn,1906-08-06
7,8,Peter,Ashley,1921-06-28
8,9,Bridget,Hanson,1925-06-19
9,10,Sherri,Lucas,1988-05-11


## Products

In [15]:
# Liste af produkter
produkt_data = [
    [1 , 'Chai', 26.50, 104, 50, 'standard'],
    [2 , 'Chef Anton Cajun Seasoning', 109.00, 53, 25, 'luxury'],
    [3 , 'Chef Anton Gumbo Mix', 87.75, 34, 20, 'luxury'],
    [4 , 'Tofu', 0, 20, 17.75, 'standard'],
    [5 , 'Sir Rodney Marmalade', 99.75, 28, 30, 'luxury'],
    [6 , 'Sir Rodney Scones', 47.75, 60, 80, 'luxury'],
    [7 , 'Geitost', 72.25, 230, 100, 'luxury'],
    [8 , 'Gravad lax', 99.00, 147, 100, 'luxury'],
    [9 , 'Cte de Blaye', 56.50, 0, 20, 'standard'],
    [10 , 'Chartreuse verte', 29.50, 9, 20, 'standard'],
    [11 , 'Boston Crab Meat', 299.75, 30, 20, 'luxury'],
    [12 , 'Chocolade', 140.50, 26, 20, 'standard'],
    [13 , 'Raclette Courdavault', 67.75, 0, 20, 'luxury'],
    [14 , 'Camembert Pierrot', 75.50, 89, 50, 'luxury'],
    [15 , 'Tarte au sucre', 37.75, 0, 20, 'luxury'],
    [16 , 'Louisiana Hot Spiced Okra', 152.00, 98, 40, 'luxury'],
    [17 , 'Laughing Lumberjack Lager', 205.50, 54, 20, 'luxury'],
    [18 , 'Scottish Longbreads', 89.75, 19, 20, 'luxury'],
    [19 , 'Outback Lager', 199.50, 24, 20, 'standard'],
    [20 , 'Rd Kaviar', 299.50, 72, 20, 'luxury']
]

# Opret Dataframe
df_products = pd.DataFrame(produkt_data, columns = ['product_id', 'productname', 'unitprice', 'stock', 'reorder', 'type'])

In [16]:
df_products

Unnamed: 0,product_id,productname,unitprice,stock,reorder,type
0,1,Chai,26.5,104,50.0,standard
1,2,Chef Anton Cajun Seasoning,109.0,53,25.0,luxury
2,3,Chef Anton Gumbo Mix,87.75,34,20.0,luxury
3,4,Tofu,0.0,20,17.75,standard
4,5,Sir Rodney Marmalade,99.75,28,30.0,luxury
5,6,Sir Rodney Scones,47.75,60,80.0,luxury
6,7,Geitost,72.25,230,100.0,luxury
7,8,Gravad lax,99.0,147,100.0,luxury
8,9,Cte de Blaye,56.5,0,20.0,standard
9,10,Chartreuse verte,29.5,9,20.0,standard


# Export
Export af data til Excel og CSV

I Excel gemmes der én Excel fil, med data fordelt på 4 ark.

I CSV gemmes der til en CSV filer pr. dataframe - altså 4 csv filer.

In [17]:
# Excel
create_excel = pd.ExcelWriter('sales_data.xlsx', engine='xlsxwriter', date_format='dd-mm-yyyy')

# Overfør hver DF til Excel
df_customers.to_excel(create_excel, sheet_name='customers', index=False)
df_order.to_excel(create_excel, sheet_name='order', index=False)
df_employee.to_excel(create_excel, sheet_name='employee', index=False)
df_products.to_excel(create_excel, sheet_name='products', index=False)

# Gem Excel fil
create_excel.save()

In [18]:
# Gem til CSV
df_customers.to_csv('customers.csv', sep=';', index=False)
df_order.to_csv('order.csv', sep=';', index=False, decimal=',')
df_employee.to_csv('employees.csv', sep=';', index=False)
df_products.to_csv('products.csv', sep=';', index=False)