### Import the Libraries

In [None]:
import time
import names #Allows for the generation random names
import random # Allows for the generation random value
import numpy as np
import pandas as pd
import pandasql as ps # Allows to run SQL statements
from faker import Faker # Allows for the generation of fake data
from datetime import datetime

faker = Faker() 

In [None]:
# Function to run SQL statements
def sql(query):
    return ps.sqldf(query)

### 1. Generating Product Data

In [None]:
# Loading the CSV file into a dataframe
products = pd.read_csv('./scrapped_data/whiskey_data.csv', index_col='Unnamed: 0')

# Changing the Alcohol Price to float
products['Alcohol_Price'] = products.Alcohol_Price.str.replace(',','').astype('float')

products.head()

In [None]:
# Generate a column of unique product ids
product_id = np.random.default_rng().choice(len(products.Product_Name), 
             len(products.Product_Name), replace = False)

product_id

In [None]:
# Verify that the new ids are unique
assert len(pd.Series(product_id).unique()) == len(product_id)

In [None]:
# Insert the new column into the dataframe
products['Product_ID']= product_id

# Reorder the columns
#products = products[['Product_ID', 'Product_Name', 'Alcohol_Percent', 'Alcohol_Amount', 'Alcohol_Price']]

column_names = ['Product_ID', 'Product_Name', 'Alcohol_Percent', 'Alcohol_Amount', 'Alcohol_Price']
products = products.reindex(columns = column_names)

products.head()

### 2. Generating Employee Data

In [None]:
# Generating 100 Employee Unique id's
employee_id = np.random.default_rng().choice(4000, 100, replace = False)

# Verify that there are as many ids as there are employees
assert len(set(employee_id)) == 100

# Verify that the new ids are unique
assert len(pd.Series(employee_id).unique()) == len(employee_id)

In [None]:
# Generating 100 Employee Data
employee_first_name = []
employee_last_name = []
employee_full_name = []
employee_email = []
employee_city = []
departments = ['Sales', 'Finance', 'Marketing', 'BI']
employee_department = []

# iterate through the employees and generate random data
for i in range(len(employee_id)):
    employee_first_name.append(names.get_first_name())
    employee_last_name.append(names.get_last_name())
    employee_full_name.append(employee_first_name[i] + ' ' + employee_last_name[i])
    employee_email.append(employee_first_name[i] + employee_last_name[i][0].lower() + '@gmail.com')
    employee_city.append(faker.city())
    employee_department.append(np.random.choice(departments, 1)[0])

In [None]:
# Create an employee dataframe
employees = pd.DataFrame(employee_id, columns = ['employee_id'])
employees['first_name'] = employee_first_name
employees['last_name'] = employee_last_name
employees['full_name'] = employee_full_name
employees['email'] = employee_email
employees['city'] = employee_city
employees['department'] = employee_department

employees.head()

### 3. Generating Customer Data

In [None]:
# Generating 1000 Customer Unique id's
customer_id = np.random.default_rng().choice(999999, 1000, replace = False)

# Verify that there are as many ids as there are customers
assert len(set(customer_id)) == 1000

# Verify that the new ids are unique
assert len(pd.Series(customer_id).unique()) == len(customer_id)

In [None]:
# Generating 1000 Customers Data
customer_first_name = []
customer_last_name = []
customer_full_name = []
customer_email = []
customer_last_four_digits = []
customer_country = []
customer_country_code = []
customer_street = []
customer_credit_card_company = []


# iterate through the customers and generate random data
for i in range(len(customer_id)): 
    customer_first_name.append(names.get_first_name())
    customer_last_name.append(names.get_last_name())
    customer_full_name.append(customer_first_name[i] + ' ' + customer_last_name[i])
    customer_email.append(customer_first_name[i] + customer_last_name[i][0].lower() + '@gmail.com')
    customer_last_four_digits.append(np.random.randint(low = 1000, high = 9999, size = 1)[0])
    customer_country.append(faker.country())
    customer_country_code.append(customer_country[i][0:3].upper())
    customer_street.append(faker.street_address())
    customer_credit_card_company.append(faker.credit_card_provider())

In [None]:
# Create a customer dataframe
customers = pd.DataFrame(customer_id, columns = ['customer_id'])
customers['first_name'] = customer_first_name
customers['last_name'] = customer_last_name
customers['full_name'] = customer_full_name
customers['email'] = customer_email
customers['country'] = customer_country
customers['country_code'] = customer_country_code
customers['street'] = customer_street
customers['credit_provider'] = customer_credit_card_company
customers['four_digits'] = customer_last_four_digits

customers.head()

### 4. Generating Payments Data

In [None]:
# Generating random days in the range of 1990 to 2020
date_range = pd.date_range(start = "1990-01-01", end = "2021-12-31", freq="D",)

date_range

In [None]:
# Generating Unique payment id's
payment_id = np.random.default_rng().choice(999999, len(date_range), replace = False)

# Verify that there are as many ids as there are dates
assert len(set(payment_id)) == len(date_range)

# Verify that the new ids are unique
assert len(pd.Series(payment_id).unique()) == len(payment_id)

In [None]:
# Generating payments Data
customer_id_payments = []
employee_id_payments = []
product_id_payments = []
dates = []


# iterate through the payments and generate random data
for i in range(len(payment_id)):
    dates.append(datetime.strftime(random.choice(date_range), format='%Y-%m-%d'))
    customer_id_payments.append(random.choice(customer_id))
    employee_id_payments.append(random.choice(employee_id))
    product_id_payments.append(random.choice(product_id))

In [None]:
# Create a payments dataframe
payments = pd.DataFrame(payment_id, columns = ['payment_id'])
payments['date'] = sorted(dates)
payments['customer_id'] = customer_id_payments
payments['employee_id'] = employee_id_payments
payments['product_id'] = product_id_payments

payments.head()

In [None]:
# Adding the Alcohol_price column to the table

# Query to excute
query = '''
select p1.*, p2.Alcohol_Price as price
from payments p1
inner join products p2
on p1.product_id = p2.product_id
'''

# Execute query
payments = sql(query)

payments.head()

### 5. Save the database as .CSV

In [None]:
products.to_csv('./database/products' + '.csv')
employees.to_csv('./database/employees' + '.csv')
customers.to_csv('./database/customers' + '.csv')
payments.to_csv('./database/payments' + '.csv')