### Database Table Tests


In [147]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

# Creates connection to MySQL database
engine = create_engine('mysql+pymysql://root:1A2s3D4f5G6h7J8k9L0z1Z2x3C4v5B6n7M@localhost/tek_tractor')

In [148]:
def test_function(df):
    '''
    Function to test a database to make sure it is the correct shape with no null values
    '''
    
    # number of rows 
    df_rows = df.shape[0]
    df_rows = str(df_rows)
    
    # number of columns
    df_cols = df.shape[1]
    df_cols = str(df_cols)
    
    # sum of null values 
    df_nulls = df.isnull().sum()[0].sum()
    df_nulls = str(df_nulls)
    
    s = 'There are ' + df_rows + ' rows and ' + df_cols + ' columns with a total of ' + df_nulls + ' null values in this table!' 
    return ('\033[1m' + s + '\033[0m')

#### DateTable Tests


In [149]:
df_date = pd.read_sql_query('SELECT * FROM datetable', con = engine)
print(test_function(df_date))
df_date.info()

[1mThere are 156 rows and 5 columns with a total of 0 null values in this table![0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   date     156 non-null    object
 1   year     156 non-null    int64 
 2   quarter  156 non-null    object
 3   period   156 non-null    int64 
 4   week     156 non-null    int64 
dtypes: int64(3), object(2)
memory usage: 6.2+ KB


#### Employee Table Tests


In [150]:
df_emp = pd.read_sql_query('SELECT * FROM employee', con = engine)
print(test_function(df_emp))
df_emp.info()

[1mThere are 5 rows and 4 columns with a total of 0 null values in this table![0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   employee_id    5 non-null      object
 1   employee_name  5 non-null      object
 2   region         5 non-null      object
 3   pay_grade      5 non-null      object
dtypes: object(4)
memory usage: 288.0+ bytes


#### Extended Service Plan Price Table Tests

In [151]:
df_esp_price = pd.read_sql_query('SELECT * FROM esp_price', con = engine)
print(test_function(df_esp_price))
df_esp_price.info()

[1mThere are 16 rows and 3 columns with a total of 0 null values in this table![0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   esp_code  16 non-null     object
 1   price     16 non-null     int64 
 2   year      16 non-null     int64 
dtypes: int64(2), object(1)
memory usage: 512.0+ bytes


#### Product Price Table Tests

In [152]:
df_prod_price = pd.read_sql_query('SELECT * FROM prod_price', con = engine)
print(test_function(df_prod_price))
df_prod_price.info()

[1mThere are 64 rows and 4 columns with a total of 0 null values in this table![0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   prod_code  64 non-null     object
 1   price      64 non-null     int64 
 2   year       64 non-null     int64 
 3   quarter    64 non-null     object
dtypes: int64(2), object(2)
memory usage: 2.1+ KB


#### Product Information Table Tests

In [153]:
df_prod_info = pd.read_sql_query('SELECT * FROM prod_info', con = engine)
print(test_function(df_prod_info))
df_prod_info.info()

[1mThere are 8 rows and 3 columns with a total of 0 null values in this table![0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   prod_code          8 non-null      object
 1   prod_name          8 non-null      object
 2   prod_manufacturer  8 non-null      object
dtypes: object(3)
memory usage: 320.0+ bytes


#### Product Sales Table Tests 

In [154]:
df_prod_sales = pd.read_sql_query('SELECT * FROM prod_sales', con = engine, index_col = 'prod_sale_id')
print(test_function(df_prod_sales))
df_prod_sales.info()

[1mThere are 4160 rows and 6 columns with a total of 0 null values in this table![0m
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4160 entries, 1 to 4160
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   emp_id     4160 non-null   object
 1   prod_code  4160 non-null   object
 2   quantity   4160 non-null   object
 3   year       4160 non-null   int64 
 4   quarter    4160 non-null   object
 5   date       4160 non-null   object
dtypes: int64(1), object(5)
memory usage: 227.5+ KB


#### Extended Service Plan Sales Table Tests

In [155]:
df_esp_sales = pd.read_sql_query('SELECT * FROM esp_sales', con = engine, index_col = 'esp_sale_id')
print(test_function(df_esp_sales))
df_esp_sales.info()

[1mThere are 4160 rows and 5 columns with a total of 0 null values in this table![0m
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4160 entries, 1 to 4160
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   esp_code  4160 non-null   object
 1   emp_id    4160 non-null   object
 2   quantity  4160 non-null   object
 3   year      4160 non-null   int64 
 4   date      4160 non-null   object
dtypes: int64(1), object(4)
memory usage: 195.0+ KB


### Aggregate View


In [156]:
# Setting year to an object to enable merging 
df_prod_sales['year'] = df_prod_sales['year'].astype(str)
df_prod_price['year'] = df_prod_price['year'].astype(str)

In [164]:
# Merging Extended Service Plan Sales and Price tables 
esp_merge1 = pd.merge(df_esp_sales, df_esp_price, how = 'inner', on = ['year', 'esp_code'])

# Merging Product Sales and Price
prod_merge1 = pd.merge(df_prod_sales, df_prod_price, how = 'inner', on = ['prod_code', 'year', 'quarter'])

# Merging the above merge and Product Info 
prod_merge2 = pd.merge(prod_merge1, df_prod_info, how = 'inner', on = ['prod_code'])

# Merging Extended Service Plan first merge and Employee table 
df_emp.rename(columns={'employee_id':'emp_id'}, inplace = True)
esp_merge2 = pd.merge(esp_merge1, df_emp, how = 'inner', on = ['emp_id'])

# Merging date table and Extended Service Plan aggregate
date_esp = pd.merge(left = df_date, right = esp_merge2, how = 'right', on = ['date'])

# Merging Product aggregate and Date table 
date_prod = pd.merge(left = df_date, right = prod_merge2, how = 'right', on = ['date'])

# Setting all the date columns to Datetime 
# prod_merge2['date'] = pd.to_datetime(prod_merge2['date'])
# esp_merge2['date'] = pd.to_datetime(esp_merge2['date'])
# df_date['date'] = pd.to_datetime(df_date['date'])
# pd.set_option('display.max_columns', None)

# Creating a new column to use code data as a merge condition
date_prod['id_num'] = date_prod['prod_code'].str[-1:]
date_esp['id_num'] = date_esp['esp_code'].str[-1:]

# Merging to create the total aggregate table 
total = pd.merge(date_prod, date_esp, how = 'inner', on = ['emp_id', 'date', 'id_num'])
total

total.drop(['year_y_y', 'week_y', 'period_y', 'year_x_y', 'quarter_y', 'year_y_x'], axis = 1, inplace = True)

# Renaming duplicate columns
total.rename(columns = {'year_x_x': 'year',
                        'quarter_x': 'quarter',
                        'period_x': 'period',
                        'week_x': 'week',
                        'quantity_x': 'prod_quantity',
                        'price_x': 'prod_price',
                        'quantity_y': 'esp_quantity',
                        'price_y': 'esp_price'}, inplace = True)
                              

In [166]:
total

Unnamed: 0,date,year_x_x,quarter_x,period_x,week_x,emp_id,prod_code,quantity_x,price_x,prod_name,prod_manufacturer,id_num,quarter,esp_code,quantity_y,price_y,employee_name,region,pay_grade
0,1/3/2021,2021.0,Q1,1.0,2.0,EMP244,PROD_001,23,14550,Gator XUV 590M,John Deere,1,Q1,ESP_001,7,989,"Evans, Gina",NW,C12
1,1/10/2021,2021.0,Q1,1.0,3.0,EMP244,PROD_001,27,14550,Gator XUV 590M,John Deere,1,Q1,ESP_001,10,989,"Evans, Gina",NW,C12
2,1/17/2021,2021.0,Q1,1.0,4.0,EMP244,PROD_001,37,14550,Gator XUV 590M,John Deere,1,Q1,ESP_001,9,989,"Evans, Gina",NW,C12
3,1/24/2021,2021.0,Q1,2.0,5.0,EMP244,PROD_001,47,14550,Gator XUV 590M,John Deere,1,Q1,ESP_001,12,989,"Evans, Gina",NW,C12
4,1/31/2021,2021.0,Q1,2.0,6.0,EMP244,PROD_001,42,14550,Gator XUV 590M,John Deere,1,Q1,ESP_001,10,989,"Evans, Gina",NW,C12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4155,11/22/2020,2020.0,Q4,12.0,48.0,EMP290,PROD_008,5,11589,Z930M Ztrack,John Deere,8,Q4,ESP_008,0,843,"Allen, Maude",SW,C12
4156,11/29/2020,2020.0,Q4,13.0,49.0,EMP290,PROD_008,7,11589,Z930M Ztrack,John Deere,8,Q4,ESP_008,0,843,"Allen, Maude",SW,C12
4157,12/6/2020,2020.0,Q4,13.0,50.0,EMP290,PROD_008,5,11589,Z930M Ztrack,John Deere,8,Q4,ESP_008,0,843,"Allen, Maude",SW,C12
4158,12/13/2020,2020.0,Q4,13.0,51.0,EMP290,PROD_008,5,11589,Z930M Ztrack,John Deere,8,Q4,ESP_008,0,843,"Allen, Maude",SW,C12


In [130]:
# prod_merge1 = pd.merge(df_prod_sales, df_prod_price, how = 'inner', on = ['prod_code', 'year', 'quarter'])
# prod_merge1

Unnamed: 0,emp_id,prod_code,quantity,year,quarter,date,price
0,EMP244,PROD_001,23,2021,Q1,1/3/2021,14550
1,EMP244,PROD_001,27,2021,Q1,1/10/2021,14550
2,EMP244,PROD_001,37,2021,Q1,1/17/2021,14550
3,EMP244,PROD_001,47,2021,Q1,1/24/2021,14550
4,EMP244,PROD_001,42,2021,Q1,1/31/2021,14550
...,...,...,...,...,...,...,...
4155,EMP290,PROD_008,5,2020,Q4,11/22/2020,11589
4156,EMP290,PROD_008,7,2020,Q4,11/29/2020,11589
4157,EMP290,PROD_008,5,2020,Q4,12/6/2020,11589
4158,EMP290,PROD_008,5,2020,Q4,12/13/2020,11589


In [131]:
# prod_merge2 = pd.merge(prod_merge1, df_prod_info, how = 'inner', on = ['prod_code'])
# prod_merge2

Unnamed: 0,emp_id,prod_code,quantity,year,quarter,date,price,prod_name,prod_manufacturer
0,EMP244,PROD_001,23,2021,Q1,1/3/2021,14550,Gator XUV 590M,John Deere
1,EMP244,PROD_001,27,2021,Q1,1/10/2021,14550,Gator XUV 590M,John Deere
2,EMP244,PROD_001,37,2021,Q1,1/17/2021,14550,Gator XUV 590M,John Deere
3,EMP244,PROD_001,47,2021,Q1,1/24/2021,14550,Gator XUV 590M,John Deere
4,EMP244,PROD_001,42,2021,Q1,1/31/2021,14550,Gator XUV 590M,John Deere
...,...,...,...,...,...,...,...,...,...
4155,EMP290,PROD_008,5,2020,Q4,11/22/2020,11589,Z930M Ztrack,John Deere
4156,EMP290,PROD_008,7,2020,Q4,11/29/2020,11589,Z930M Ztrack,John Deere
4157,EMP290,PROD_008,5,2020,Q4,12/6/2020,11589,Z930M Ztrack,John Deere
4158,EMP290,PROD_008,5,2020,Q4,12/13/2020,11589,Z930M Ztrack,John Deere


In [132]:
# esp_merge2 = pd.merge(esp_merge1, df_emp, how = 'inner', on = ['emp_id'])
# esp_merge2

Unnamed: 0,esp_code,emp_id,quantity,year,date,price,employee_name,region,pay_grade
0,ESP_001,EMP234,3,2020,12/29/2019,843,"Bachmann, Jane",NW,C13
1,ESP_001,EMP234,3,2020,1/5/2020,843,"Bachmann, Jane",NW,C13
2,ESP_001,EMP234,3,2020,1/12/2020,843,"Bachmann, Jane",NW,C13
3,ESP_001,EMP234,6,2020,1/19/2020,843,"Bachmann, Jane",NW,C13
4,ESP_001,EMP234,12,2020,1/26/2020,843,"Bachmann, Jane",NW,C13
...,...,...,...,...,...,...,...,...,...
4155,ESP_008,EMP290,0,2021,11/28/2021,989,"Allen, Maude",SW,C12
4156,ESP_008,EMP290,0,2021,12/5/2021,989,"Allen, Maude",SW,C12
4157,ESP_008,EMP290,0,2021,12/12/2021,989,"Allen, Maude",SW,C12
4158,ESP_008,EMP290,0,2021,12/19/2021,989,"Allen, Maude",SW,C12


In [136]:
# date_esp = pd.merge(left = df_date, right = esp_merge2, how = 'right', on = ['date'])
# date_esp

Unnamed: 0,date,year_x,quarter,period,week,esp_code,emp_id,quantity,year_y,price,employee_name,region,pay_grade
0,2019-12-29,2020.0,Q1,1.0,1.0,ESP_001,EMP234,3,2020,843,"Bachmann, Jane",NW,C13
1,2020-01-05,2020.0,Q1,1.0,2.0,ESP_001,EMP234,3,2020,843,"Bachmann, Jane",NW,C13
2,2020-01-12,2020.0,Q1,1.0,3.0,ESP_001,EMP234,3,2020,843,"Bachmann, Jane",NW,C13
3,2020-01-19,2020.0,Q1,1.0,4.0,ESP_001,EMP234,6,2020,843,"Bachmann, Jane",NW,C13
4,2020-01-26,2020.0,Q1,2.0,5.0,ESP_001,EMP234,12,2020,843,"Bachmann, Jane",NW,C13
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4155,2021-11-28,2021.0,Q4,13.0,49.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12
4156,2021-12-05,2021.0,Q4,13.0,50.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12
4157,2021-12-12,2021.0,Q4,13.0,51.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12
4158,2021-12-19,2021.0,Q4,13.0,52.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12


In [137]:
# date_prod = pd.merge(left = df_date, right = prod_merge2, how = 'right', on = ['date'])
# date_prod

Unnamed: 0,date,year_x,quarter_x,period,week,emp_id,prod_code,quantity,year_y,quarter_y,price,prod_name,prod_manufacturer
0,2021-01-03,2021.0,Q1,1.0,2.0,EMP244,PROD_001,23,2021,Q1,14550,Gator XUV 590M,John Deere
1,2021-01-10,2021.0,Q1,1.0,3.0,EMP244,PROD_001,27,2021,Q1,14550,Gator XUV 590M,John Deere
2,2021-01-17,2021.0,Q1,1.0,4.0,EMP244,PROD_001,37,2021,Q1,14550,Gator XUV 590M,John Deere
3,2021-01-24,2021.0,Q1,2.0,5.0,EMP244,PROD_001,47,2021,Q1,14550,Gator XUV 590M,John Deere
4,2021-01-31,2021.0,Q1,2.0,6.0,EMP244,PROD_001,42,2021,Q1,14550,Gator XUV 590M,John Deere
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4155,2020-11-22,2020.0,Q4,12.0,48.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere
4156,2020-11-29,2020.0,Q4,13.0,49.0,EMP290,PROD_008,7,2020,Q4,11589,Z930M Ztrack,John Deere
4157,2020-12-06,2020.0,Q4,13.0,50.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere
4158,2020-12-13,2020.0,Q4,13.0,51.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere


In [135]:
prod_merge2['date'] = pd.to_datetime(prod_merge2['date'])
esp_merge2['date'] = pd.to_datetime(esp_merge2['date'])
df_date['date'] = pd.to_datetime(df_date['date'])
pd.set_option('display.max_columns', None)

In [144]:
total = pd.merge(date_prod, date_esp, how = 'inner', on = ['emp_id', 'date', 'id_num'])
total

Unnamed: 0,date,year_x_x,quarter_x,period_x,week_x,emp_id,prod_code,quantity_x,year_y_x,quarter_y,price_x,prod_name,prod_manufacturer,id_num,year_x_y,quarter,period_y,week_y,esp_code,quantity_y,year_y_y,price_y,employee_name,region,pay_grade
0,2021-01-03,2021.0,Q1,1.0,2.0,EMP244,PROD_001,23,2021,Q1,14550,Gator XUV 590M,John Deere,1,2021.0,Q1,1.0,2.0,ESP_001,7,2021,989,"Evans, Gina",NW,C12
1,2021-01-10,2021.0,Q1,1.0,3.0,EMP244,PROD_001,27,2021,Q1,14550,Gator XUV 590M,John Deere,1,2021.0,Q1,1.0,3.0,ESP_001,10,2021,989,"Evans, Gina",NW,C12
2,2021-01-17,2021.0,Q1,1.0,4.0,EMP244,PROD_001,37,2021,Q1,14550,Gator XUV 590M,John Deere,1,2021.0,Q1,1.0,4.0,ESP_001,9,2021,989,"Evans, Gina",NW,C12
3,2021-01-24,2021.0,Q1,2.0,5.0,EMP244,PROD_001,47,2021,Q1,14550,Gator XUV 590M,John Deere,1,2021.0,Q1,2.0,5.0,ESP_001,12,2021,989,"Evans, Gina",NW,C12
4,2021-01-31,2021.0,Q1,2.0,6.0,EMP244,PROD_001,42,2021,Q1,14550,Gator XUV 590M,John Deere,1,2021.0,Q1,2.0,6.0,ESP_001,10,2021,989,"Evans, Gina",NW,C12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4155,2020-11-22,2020.0,Q4,12.0,48.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,8,2020.0,Q4,12.0,48.0,ESP_008,0,2020,843,"Allen, Maude",SW,C12
4156,2020-11-29,2020.0,Q4,13.0,49.0,EMP290,PROD_008,7,2020,Q4,11589,Z930M Ztrack,John Deere,8,2020.0,Q4,13.0,49.0,ESP_008,0,2020,843,"Allen, Maude",SW,C12
4157,2020-12-06,2020.0,Q4,13.0,50.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,8,2020.0,Q4,13.0,50.0,ESP_008,0,2020,843,"Allen, Maude",SW,C12
4158,2020-12-13,2020.0,Q4,13.0,51.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,8,2020.0,Q4,13.0,51.0,ESP_008,0,2020,843,"Allen, Maude",SW,C12


In [139]:
# total_merge = pd.merge(left = date_prod, right = date_esp, how = 'right', on = ['date', 'emp_id'])
# total_merge
# import functools as ft
# df = [esp_merge2, prod_merge2, df_date]
# df_final = ft.reduce(lambda left, right: pd.merge(left, right, on = ['date']), df)
# df_final
concat_join = pd.concat([date_prod, date_esp], join = 'inner', axis = 1)
concat_join

Unnamed: 0,date,year_x,quarter_x,period,week,emp_id,prod_code,quantity,year_y,quarter_y,price,prod_name,prod_manufacturer,date.1,year_x.1,quarter,period.1,week.1,esp_code,emp_id.1,quantity.1,year_y.1,price.1,employee_name,region,pay_grade
0,2021-01-03,2021.0,Q1,1.0,2.0,EMP244,PROD_001,23,2021,Q1,14550,Gator XUV 590M,John Deere,2019-12-29,2020.0,Q1,1.0,1.0,ESP_001,EMP234,3,2020,843,"Bachmann, Jane",NW,C13
1,2021-01-10,2021.0,Q1,1.0,3.0,EMP244,PROD_001,27,2021,Q1,14550,Gator XUV 590M,John Deere,2020-01-05,2020.0,Q1,1.0,2.0,ESP_001,EMP234,3,2020,843,"Bachmann, Jane",NW,C13
2,2021-01-17,2021.0,Q1,1.0,4.0,EMP244,PROD_001,37,2021,Q1,14550,Gator XUV 590M,John Deere,2020-01-12,2020.0,Q1,1.0,3.0,ESP_001,EMP234,3,2020,843,"Bachmann, Jane",NW,C13
3,2021-01-24,2021.0,Q1,2.0,5.0,EMP244,PROD_001,47,2021,Q1,14550,Gator XUV 590M,John Deere,2020-01-19,2020.0,Q1,1.0,4.0,ESP_001,EMP234,6,2020,843,"Bachmann, Jane",NW,C13
4,2021-01-31,2021.0,Q1,2.0,6.0,EMP244,PROD_001,42,2021,Q1,14550,Gator XUV 590M,John Deere,2020-01-26,2020.0,Q1,2.0,5.0,ESP_001,EMP234,12,2020,843,"Bachmann, Jane",NW,C13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4155,2020-11-22,2020.0,Q4,12.0,48.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,2021-11-28,2021.0,Q4,13.0,49.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12
4156,2020-11-29,2020.0,Q4,13.0,49.0,EMP290,PROD_008,7,2020,Q4,11589,Z930M Ztrack,John Deere,2021-12-05,2021.0,Q4,13.0,50.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12
4157,2020-12-06,2020.0,Q4,13.0,50.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,2021-12-12,2021.0,Q4,13.0,51.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12
4158,2020-12-13,2020.0,Q4,13.0,51.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,2021-12-19,2021.0,Q4,13.0,52.0,ESP_008,EMP290,0,2021,989,"Allen, Maude",SW,C12


In [141]:
date_prod['id_num'] = date_prod['prod_code'].str[-1:]
date_esp['id_num'] = date_esp['esp_code'].str[-1:]

In [142]:
date_prod

Unnamed: 0,date,year_x,quarter_x,period,week,emp_id,prod_code,quantity,year_y,quarter_y,price,prod_name,prod_manufacturer,id_num
0,2021-01-03,2021.0,Q1,1.0,2.0,EMP244,PROD_001,23,2021,Q1,14550,Gator XUV 590M,John Deere,1
1,2021-01-10,2021.0,Q1,1.0,3.0,EMP244,PROD_001,27,2021,Q1,14550,Gator XUV 590M,John Deere,1
2,2021-01-17,2021.0,Q1,1.0,4.0,EMP244,PROD_001,37,2021,Q1,14550,Gator XUV 590M,John Deere,1
3,2021-01-24,2021.0,Q1,2.0,5.0,EMP244,PROD_001,47,2021,Q1,14550,Gator XUV 590M,John Deere,1
4,2021-01-31,2021.0,Q1,2.0,6.0,EMP244,PROD_001,42,2021,Q1,14550,Gator XUV 590M,John Deere,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4155,2020-11-22,2020.0,Q4,12.0,48.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,8
4156,2020-11-29,2020.0,Q4,13.0,49.0,EMP290,PROD_008,7,2020,Q4,11589,Z930M Ztrack,John Deere,8
4157,2020-12-06,2020.0,Q4,13.0,50.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,8
4158,2020-12-13,2020.0,Q4,13.0,51.0,EMP290,PROD_008,5,2020,Q4,11589,Z930M Ztrack,John Deere,8


In [57]:
# Merging product price and product sales tables 
merge_prod1 = pd.merge(left = df_prod_price, right = df_prod_sales, how = 'inner')

# Merging product info on the merged product table 
merge_prod2 = pd.merge(left = merge_prod1, right = df_prod_info, how = 'inner')

# Merging Extended Service Plan Price and Extended Service Plan Sales
merge_esp1 = pd.merge(left = df_esp_price, right = df_esp_sales, how = 'inner')

# Renaming employee id column to emp_id in employee table 
df_emp.rename(columns={'employee_id':'emp_id'}, inplace = True)

# Merging employees table on the merged esp table
merge_esp2 = pd.merge(left = df_emp, right = merge_esp1, how = 'left', on = 'emp_id')

date_esp = pd.merge(merge_esp2, df_date, how = 'inner')

date_prod = pd.merge(merge_prod2, df_date, how = 'right', on = 'date')

In [65]:
# Dropping repeat columns
date_prod.drop(['year_x', 'quarter_x'], axis = 1, inplace = True)

In [67]:
# Renaming duplicate columns
date_prod.rename(columns = {'year_y': 'year',
                              'quarter_y': 'quarter'}, inplace = True)
                              

#### Sending Merged Data to Database 

In [75]:
# Sends Merged data to Database 
date_prod = date_prod.dropna()
date_esp.to_sql('merged_esp', con = engine, if_exists = 'replace', index = False)
date_prod.to_sql('merged_prod', con = engine, if_exists = 'replace', index = False)

4120