In [14]:
#####
###
##
#    SYNOPSIS
#
#    As a small data analytics firm, our client approached us with an urgent project to collect information
#    about this year’s LEGO sets. They are part of the manufacturing process of LEGO pieces; and want to do 
#    predictive data modelling to find out what materials will be more in demand next year, based on this
#    year’s information, so that they can optimise their manufacturing process.
#
#    They have requested the information be put into database format so that it can be accessed in a way
#    that they are already familiar with.
#
#    Based on the timeframe of 1 week, we have assigned the following team members to this project:
#
#    CONTRIBUTORS
#    
#    Sylvia Broadbent @github/Supasyl
#    Cicily George @github/CicilyGeorge
#    Daniel Sobral @github/D0SO
#    John Bingley @github/JB-DA
#
#    Source and output can be found (with access) on https://github.com/Supasyl/ETL_project
#
##
###
#####


### SETTINGS
##
# Dependencies
import pandas as pd
from sqlalchemy import create_engine
import requests
import json


### LOAD DATA FROM CSV
##
# Load pre-downloaded information
df_colors = pd.read_csv( 'data_raw/colors.csv' )
df_elements = pd.read_csv( 'data_raw/elements.csv' )
df_inventories = pd.read_csv( 'data_raw/inventories.csv' )
df_inventory_minifigs = pd.read_csv( 'data_raw/inventory_minifigs.csv' )
df_inventory_parts = pd.read_csv( 'data_raw/inventory_parts.csv' )
df_inventory_sets = pd.read_csv( 'data_raw/inventory_sets.csv' )
df_minifigs = pd.read_csv( 'data_raw/minifigs.csv' )
df_part_categories = pd.read_csv( 'data_raw/part_categories.csv' )
df_part_relationships = pd.read_csv( 'data_raw/part_relationships.csv' )
df_parts = pd.read_csv( 'data_raw/parts.csv' )
df_sets = pd.read_csv( 'data_raw/sets.csv' )
df_themes = pd.read_csv( 'data_raw/themes.csv' )


### LOAD DATA FROM API
##
#


### CLEAN DATA
##
#


### JOIN DATA
##
#


### PUSH TO DATABASE
##
#


### VIEW DATA (Validation Purposes Only)
##
# Comment out when not required, used for validation purposes only

headsize = 3

display( df_colors \
        .head( headsize ) \
        .style.set_caption( 'Table: Colour value and names' ))

display( df_elements \
        .head( headsize ) \
        .style.set_caption( 'Table: Parts and colour combinations' ))

display( df_inventories \
        .head( headsize ) \
        .style.set_caption( 'Table: Rebrickable ID and Lego ID' ))

display( df_inventory_minifigs \
        .head( headsize ) \
        .style.set_caption( 'Table: Mini-figures in sets using Rebrickable ID' ))

display( df_inventory_parts \
        .head( headsize ) \
        .style.set_caption( 'Table: Parts in sets using Rebrickable ID' ))

display( df_inventory_sets \
        .head( headsize ) \
        .style.set_caption( 'Table: Lego sets' ))

display( df_minifigs \
        .head( headsize ) \
        .style.set_caption( 'Table: Mini-figures and their ID' ))

display( df_part_categories \
        .head( headsize ) \
        .style.set_caption( 'Table: Type of brick' ))

display( df_part_relationships \
        .head( headsize ) \
        .style.set_caption( 'Table: Parts and parents if applicable' ))

display( df_parts \
        .head( headsize ) \
        .style.set_caption( 'Table: Parts and descriptions' ))

display( df_sets \
        .head( headsize ) \
        .style.set_caption( 'Table: Sets using Lego ID' ))

display( df_themes \
        .head( headsize ) \
        .style.set_caption( 'Table: Theme names and their Rebrickable ID' ))


### LEGACY CODE
##
#

Unnamed: 0,id,name,rgb,is_trans
0,-1,[Unknown],0033B2,f
1,0,Black,05131D,f
2,1,Blue,0055BF,f


Unnamed: 0,element_id,part_num,color_id
0,4275423,53657,1004
1,6194308,92926,71
2,6229123,26561,4


Unnamed: 0,id,version,set_num
0,1,1,7922-1
1,3,1,3931-1
2,4,1,6942-1


Unnamed: 0,inventory_id,fig_num,quantity
0,3,fig-001549,1
1,4,fig-000764,1
2,19,fig-000555,1


Unnamed: 0,inventory_id,part_num,color_id,quantity,is_spare
0,1,48379c01,72,1,f
1,1,48395,7,1,f
2,1,mcsport6,25,1,f


Unnamed: 0,inventory_id,set_num,quantity
0,35,75911-1,1
1,35,75912-1,1
2,39,75048-1,1


Unnamed: 0,fig_num,name,num_parts
0,fig-000001,Toy Store Employee,4
1,fig-000002,Customer Kid,4
2,fig-000003,"Assassin Droid, White",8


Unnamed: 0,id,name
0,1,Baseplates
1,3,Bricks Sloped
2,4,"Duplo, Quatro and Primo"


Unnamed: 0,rel_type,child_part_num,parent_part_num
0,M,3192a,3192
1,A,50990b,50990a
2,P,27382pr0005,27382


Unnamed: 0,part_num,name,part_cat_id,part_material
0,3434,Sticker Sheet for Set 653-1,58,Cardboard/Paper
1,4219,"Sticker Sheet for Set 939-1 with flags for AU, IE",58,Plastic
2,4229,Sticker Sheet for Set 295-1,58,Plastic


Unnamed: 0,set_num,name,year,theme_id,num_parts
0,001-1,Gears,1965,1,43
1,0011-2,Town Mini-Figures,1978,84,12
2,0011-3,Castle 2 for 1 Bonus Offer,1987,199,0


Unnamed: 0,id,name,parent_id
0,1,Technic,
1,2,Arctic Technic,1.0
2,3,Competition,1.0


# Push data to the database

(created by Sylvia)
Add password and database name to code:

In [None]:
# Import dependencies
from sqlalchemy import create_engine
import pandas as pd

In [None]:
# connect to the local database
engine = create_engine(f"postgresql://postgres:<password>@localhost:5432/<database_name>")

In [None]:
# check for table names
engine.table_names()

In [None]:
# Load csv converted DataFrames into database
elements.to_sql(name='elements', con=engine, if_exists='append', index=True)
inventory_parts.to_sql(name='inventory_parts', con=engine, if_exists='append', index=True)
parts.to_sql(name='parts', con=engine, if_exists='append', index=True)
colors.to_sql(name='colors', con=engine, if_exists='append', index=True)
parts_category.to_sql(name='parts_category', con=engine, if_exists='append', index=True)
inventory_minifigs.to_sql(name='inventory_minifigs', con=engine, if_exists='append', index=True)
minifigs.to_sql(name='minifigs', con=engine, if_exists='append', index=True)
inventory_sets.to_sql(name='inventory_sets', con=engine, if_exists='append', index=True)
inventories.to_sql(name='inventories', con=engine, if_exists='append', index=True)
sets.to_sql(name='sets', con=engine, if_exists='append', index=True)
themes.to_sql(name='themes', con=engine, if_exists='append', index=True)


# Code to test data in pgAdmin

In [None]:
# (created by Sylvia)

In [None]:
# Query some Records in the the Database
pd.read_sql_query('select * from sets', con=engine).head()
