### Import the libraries

In [None]:
import numpy as np
import pandas as pd
import pandasql as ps # Allows to run SQL statements

In [None]:
# Function to run SQL statements
def sql(query):
    return ps.sqldf(query)

### 1. Normalizing the Customers Table

In [None]:
# Read the customers table
customers = pd.read_csv("./database/customers.csv", index_col='Unnamed: 0')

# customers.head()

#### First Table - Countries

In [None]:
# Creating a new table called countries
unique_countries = customers.country.unique()
countries = pd.DataFrame(unique_countries, columns = ['Country'])
countries['Country_Code'] = countries.Country.str[0:3]
countries['Country_Code'] = countries.Country_Code.str.upper()
countries['country_id'] = [*range(0,len(countries))]

# countries.head()

In [None]:
# Extracting the country_id from customers
query = '''
select c2.country_id
from customers c1
join countries c2

on c1.country_code = c2.country_code and
c1.country = c2.country
'''

country_ids = sql(query)

# Connecting countries to customers by adding the foreign key: country_id
customers['country_id'] = country_ids

In [None]:
# Dropping the column country and country_code
customers = customers.drop(['country','country_code'],axis=1)

# customers.head()

#### Second Table - Customer_cc

In [None]:
# Creating a new table called customer_cc
unique_cc_providers = customers.credit_provider.unique()
customer_cc = pd.DataFrame(unique_cc_providers, columns = ['credit_provider'])
customer_cc['credit_provider_id'] = [*range(0,len(customer_cc))]

#customer_cc.head()

In [None]:
# Extracting the credit_provider_id column from customers
query = '''
select c2.credit_provider_id
from customers c1
join customer_cc c2
on 
    c1.credit_provider = c2.credit_provider
'''

credit_provider_id = sql(query)

# Connecting customer_cc to customers by adding the foregin key: credit_provider_id 
customers['credit_provider_id'] = credit_provider_id

In [None]:
# Dropping the column credit_provider
customers = customers.drop(['credit_provider'],axis=1)

#customers.head()

### 2. Normalizing the Employees Table

In [None]:
# Read the employees table
employees = pd.read_csv("./database/employees.csv", index_col='Unnamed: 0')

# employees.head()

#### First Table - Departments

In [None]:
# Extracting the departments from the employees table
unique_departments = employees.department.unique()

# Generating unique deepartment ids
department_id = [*range(0, len(unique_departments))]

# Creating a table called departments
departments = pd.DataFrame(department_id, columns=['department_id'])
departments['department'] = unique_departments

# departments.head()

In [None]:
# Extracting the country_id column from customers
query = '''
select d.department_id
from employees e
join departments d
on 
    e.department = d.department
'''

department_ids = sql(query)

# Connecting countries to customers by adding the foregin key: country_id
employees['department_id'] = department_ids

In [None]:
# Dropping the column department
employees = employees.drop('department', axis = 1)

# employees.head()

### 3. Save the Normlized Tables as CSV

In [None]:
customers.to_csv('./database/customers_norm' + '.csv')
employees.to_csv('./database/employees_norm' + '.csv')
countries.to_csv('./database/countries' + '.csv')
customer_cc.to_csv('./database/customer_cc' + '.csv')
departments.to_csv('./database/departments' + '.csv')