In [3]:
#####
###
##
#    SYNOPSIS
#
#    As a small data analytics firm, our client approached us with an urgent project to collect information
#    about this year’s LEGO sets. They are part of the manufacturing process of LEGO pieces; and want to do 
#    predictive data modelling to find out what materials will be more in demand next year, based on this
#    year’s information, so that they can optimise their manufacturing process.
#
#    They have requested the information be put into database format so that it can be accessed in a way
#    that they are already familiar with.
#
#    Based on the timeframe of 1 week, we have assigned the following team members to this project:
#
#    CONTRIBUTORS
#    
#    Sylvia Broadbent @github/Supasyl
#    Cicily George @github/CicilyGeorge
#    Daniel Sobral @github/D0SO
#    John Bingley @github/JB-DA
#
#    Source and output can be found (with access) on https://github.com/Supasyl/ETL_project
#
##
###
#####


### SETTINGS
##
# Dependencies
import pandas as pd
from sqlalchemy import create_engine
import requests
import json


### LOAD DATA FROM CSV
##
# Load pre-downloaded information
df_colors = pd.read_csv( 'data_raw/colors.csv' )
df_elements = pd.read_csv( 'data_raw/elements.csv' )
df_inventories = pd.read_csv( 'data_raw/inventories.csv' )
df_inventory_minifigs = pd.read_csv( 'data_raw/inventory_minifigs.csv' )
df_inventory_parts = pd.read_csv( 'data_raw/inventory_parts.csv' )
df_inventory_sets = pd.read_csv( 'data_raw/inventory_sets.csv' )
df_minifigs = pd.read_csv( 'data_raw/minifigs.csv' )
df_part_categories = pd.read_csv( 'data_raw/part_categories.csv' )
df_part_relationships = pd.read_csv( 'data_raw/part_relationships.csv' )
df_parts = pd.read_csv( 'data_raw/parts.csv' )
df_sets = pd.read_csv( 'data_raw/sets.csv' )
df_themes = pd.read_csv( 'data_raw/themes.csv' )


### LOAD DATA FROM API
##
# Left as proof of code only. Results were stored to sets2020.json
# API requires ' to be used in URL, converted to %27 for ease of use
# Comment out code block and api_key before publishing
api_key = "3-DLfb-T3ZA-qQLjW"
url = "https://brickset.com/api/v3.asmx/getSets?"
query_url = f"{url}apiKey={api_key}&userHash=&params={{ %27year%27:%272020%27, %27pageSize%27 : 900 }}"

response = requests.get(query_url)

with open('sets2020.json', 'w') as x:
    json.dump(response.json(), x)


### CLEAN & JOIN DATA
##
# Looking at only data from 2020 sets
df_clean_sets = df_sets.loc[ df_sets[ 'year' ] == 2020 ].copy()
df_clean_sets.drop( 'year', axis = 1, inplace = True )

# Inventories
df_inventories = df_inventories.rename( columns = { 'id' : 'inventory_id' })
df_temp = pd.merge( df_clean_sets, df_inventories, how = 'inner', on = 'set_num' )
df_clean_inventories = df_temp[[ 'inventory_id', 'version', 'set_num' ]]

# Inventory Sets
df_temp = pd.merge( df_clean_sets, df_inventory_sets, how = 'inner', on = 'set_num' )
df_clean_inventory_sets = df_temp[[ 'inventory_id', 'set_num', 'quantity' ]]

# Themes
df_themes = df_themes.rename( columns = { 'id' : 'theme_id', 'name' : 'theme_name' })
df_temp = pd.merge( df_themes, df_clean_sets, how = 'inner', on = 'theme_id' )
df_clean_themes = df_temp[[ 'theme_id', 'theme_name', 'parent_id' ]]

# Inventory Minifigs
df_temp = pd.merge( df_clean_inventories, df_inventory_minifigs, how = 'inner', on = 'inventory_id' )
df_clean_inventory_minifigs = df_temp[[ 'inventory_id', 'fig_num', 'quantity' ]]

# Minifigs
df_temp = pd.merge( df_clean_inventory_minifigs, df_minifigs, how = 'inner', on = 'fig_num' )
df_clean_minifigs = df_temp[[ 'fig_num', 'name', 'num_parts' ]]

# Inventory Parts
df_temp = pd.merge( df_inventory_parts, df_clean_inventories, how = 'inner', on = 'inventory_id' )
df_clean_inventory_parts = df_temp[[ 'inventory_id', 'part_num', 'color_id', 'quantity', 'is_spare' ]]
df_clean_inventory_parts[ 'is_spare' ] = df_clean_inventory_parts[ 'is_spare' ].map({ 't' : True, 'f' : False })

# Colours
df_colors = df_colors.rename( columns = { 'id' : 'color_id' })
df_temp = pd.merge( df_clean_inventory_parts, df_colors, how = 'inner', on = 'color_id' )
df_clean_colors = df_temp[[ 'color_id', 'name', 'rgb', 'is_trans' ]]
df_clean_colors[ 'is_trans' ] = df_clean_colors[ 'is_trans' ].map({ 't' : True, 'f' : False })

# Parts
df_temp = pd.merge( df_clean_inventory_parts, df_parts, how = 'inner', on = 'part_num' )
df_clean_parts = df_temp[[ 'part_num', 'name', 'part_cat_id' ]]

# Elements
df_temp = pd.merge( df_clean_parts, df_elements, how = 'inner', on = 'part_num' )
df_clean_elements = df_temp[[ 'element_id', 'part_num', 'color_id' ]]

# Part Categories
df_part_categories = df_part_categories.rename( columns = { 'id' : 'part_cat_id', 'name' : 'part_name' })
df_temp = pd.merge( df_clean_parts, df_part_categories, how = 'inner', on = 'part_cat_id' )
df_clean_part_categories = df_temp[[ 'part_cat_id', 'part_name' ]]

# Part Relationships
df_part_relationships = df_part_relationships.rename( columns = { 'child_part_num' : 'part_num' })
df_temp = pd.merge( df_clean_parts, df_part_relationships, how = 'inner', on = 'part_num' )
df_clean_part_relationships = df_temp[[ 'rel_type', 'part_num', 'parent_part_num' ]]


# ### PUSH TO DATABASE
# ##
# Connect to database
connection_string = "postgres:postgres@localhost:5432/Lego_db"
engine = create_engine(f'postgresql://{connection_string}')

# Get table names
engine.table_names()

# Load dataframes into database
df_clean_colors.tosql( name = 'colors', con = engine, if_exists = 'append', index = True )
df_clean_elements.tosql( name = 'elements', con = engine, if_exists = 'append', index = True )
df_clean_inventories.tosql( name = 'inventories', con = engine, if_exists = 'append', index = True )
df_clean_inventory_minifigs.tosql( name = 'inventory_minifigs', con = engine, if_exists = 'append', index = True )
df_clean_inventory_parts.tosql( name = 'inventory_parts', con = engine, if_exists = 'append', index = True )
df_clean_inventory_sets.tosql( name = 'inventory_sets', con = engine, if_exists = 'append', index = True )
df_clean_minifigs.tosql( name = 'minifigs', con = engine, if_exists = 'append', index = True )
df_clean_part_categories.tosql( name = 'part_categories', con = engine, if_exists = 'append', index = True )
df_clean_part_relationships.tosql( name = 'part_relationships', con = engine, if_exists = 'append', index = True )
df_clean_parts.tosql( name = 'parts', con = engine, if_exists = 'append', index = True )
df_clean_sets.tosql( name = 'sets', con = engine, if_exists = 'append', index = True )
df_clean_themes.tosql( name = 'themes', con = engine, if_exists = 'append', index = True )

# Query records in database
pd.read_sql_query('select * from sets', con=engine).head()


### VIEW DATA (Validation Purposes Only)
##
# Set viewHeaders to yes/no to display tables, used for validation purposes only
viewHeaders = 'yes'
headSize = 5

if viewHeaders == 'yes':
    
    display( df_clean_colors \
        .head( headSize ) \
        .style.set_caption( 'Table: Colour value and names' ))
    
    display( df_clean_elements \
        .head( headSize ) \
        .style.set_caption( 'Table: Parts and colour combinations' ))
    
    display( df_clean_inventories \
        .head( headSize ) \
        .style.set_caption( 'Table: Rebrickable ID and Lego ID' ))
    
    display( df_clean_inventory_minifigs \
        .head( headSize ) \
        .style.set_caption( 'Table: Mini-figures in sets using Rebrickable ID' ))
    
    display( df_clean_inventory_parts \
        .head( headSize ) \
        .style.set_caption( 'Table: Parts in sets using Rebrickable ID' ))
    
    display( df_clean_inventory_sets \
        .head( headSize ) \
        .style.set_caption( 'Table: Lego sets' ))
    
    display( df_clean_minifigs \
        .head( headSize ) \
        .style.set_caption( 'Table: Mini-figures and their ID' ))
    
    display( df_clean_part_categories \
        .head( headSize ) \
        .style.set_caption( 'Table: Type of brick' ))
    
    display( df_clean_part_relationships \
        .head( headSize ) \
        .style.set_caption( 'Table: Parts and parents if applicable' ))
    
    display( df_clean_parts \
        .head( headSize ) \
        .style.set_caption( 'Table: Parts and descriptions' ))
    
    display( df_clean_sets \
        .head( headSize ) \
        .style.set_caption( 'Table: Sets using Lego ID' ))
    
    display( df_clean_themes \
        .head( headSize ) \
        .style.set_caption( 'Table: Theme names and their Rebrickable ID' ))
    # END IF


### LEGACY CODE
##
#

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,color_id,name,rgb,is_trans
0,322,Medium Azure,36AEBF,False
1,322,Medium Azure,36AEBF,False
2,322,Medium Azure,36AEBF,False
3,322,Medium Azure,36AEBF,False
4,322,Medium Azure,36AEBF,False


Unnamed: 0,element_id,part_num,color_id
0,6031916,11954,0
1,6108906,11954,1
2,6036771,11954,4
3,6022952,11954,14
4,6164386,11954,73


Unnamed: 0,inventory_id,version,set_num
0,74939,1,0241401208-1
1,75721,1,0744023726-1
2,75724,1,0744023734-1
3,75722,1,0744024471-1
4,43322,1,10270-1


Unnamed: 0,inventory_id,fig_num,quantity
0,75721,fig-002113,1
1,75724,fig-010692,1
2,43322,fig-008333,1
3,43322,fig-008334,1
4,43322,fig-008335,1


Unnamed: 0,inventory_id,part_num,color_id,quantity,is_spare
0,34280,11954,322,2,False
1,34280,14720,71,4,False
2,34280,14769pr0080,15,2,False
3,34280,15458,0,2,False
4,34280,15535,0,4,False


Unnamed: 0,inventory_id,set_num,quantity
0,77818,41900-1,1
1,77818,41902-1,1
2,77818,41905-1,1
3,77818,41908-1,1
4,77814,60247-1,1


Unnamed: 0,fig_num,name,num_parts
0,fig-002113,"Moana - Red/Pink Shirt, Tan Skirt",4
1,fig-002113,"Moana - Red/Pink Shirt, Tan Skirt",4
2,fig-002113,"Moana - Red/Pink Shirt, Tan Skirt",4
3,fig-002113,"Moana - Red/Pink Shirt, Tan Skirt",4
4,fig-010692,"Classic Spaceman, Orange with Airtanks",5


Unnamed: 0,part_cat_id,part_name
0,40,Technic Panels
1,40,Technic Panels
2,40,Technic Panels
3,40,Technic Panels
4,40,Technic Panels


Unnamed: 0,rel_type,part_num,parent_part_num
0,A,11954,62531
1,A,11954,62531
2,A,11954,62531
3,A,11954,62531
4,A,11954,62531


Unnamed: 0,part_num,name,part_cat_id
0,11954,Technic Panel Curved 11 x 3 with 10 Pin Holes through Panel Surface,40
1,11954,Technic Panel Curved 11 x 3 with 10 Pin Holes through Panel Surface,40
2,11954,Technic Panel Curved 11 x 3 with 10 Pin Holes through Panel Surface,40
3,11954,Technic Panel Curved 11 x 3 with 10 Pin Holes through Panel Surface,40
4,11954,Technic Panel Curved 11 x 3 with 10 Pin Holes through Panel Surface,40


Unnamed: 0,set_num,name,theme_id,num_parts
21,0241401208-1,Cute Ideas,497,0
54,0744023726-1,Disney Princess: Enchanted Treasury,497,4
55,0744023734-1,LEGO Minifigure: A Visual History New Edition,497,5
56,0744024471-1,100 Ways to Rebuild the World,497,0
314,10270-1,Bookshop,155,2504


Unnamed: 0,theme_id,theme_name,parent_id
0,1,Technic,
1,1,Technic,
2,1,Technic,
3,1,Technic,
4,1,Technic,
