In [31]:
import numpy as np
import pandas as pd

# Load the datasets from the CSV files
colors = pd.read_csv("colors.csv", dtype={'name': 'str', 'rgb': 'str'})
inventories = pd.read_csv("inventories.csv")
inventory_parts = pd.read_csv("inventory_parts.csv")
inventory_sets = pd.read_csv("inventory_sets.csv")
part_categories = pd.read_csv("part_categories.csv")
parts = pd.read_csv("parts.csv")
sets = pd.read_csv("sets.csv")
themes = pd.read_csv("themes.csv")

# Merge inventory_parts with parts to get the part names
inventory_parts = inventory_parts.merge(parts, on='part_num')

# Merge inventory_parts with inventories on inventory_id = id
inventory_parts = inventory_parts.merge(inventories, left_on='inventory_id', right_on='id')

# Inspect the merged DataFrame
print("INVENTORY_PARTS DATAFRAME")
print(inventory_parts.head())
print()
print("INVENTORY_PARTS COLUMNS")
print(inventory_parts.columns)

INVENTORY_PARTS DATAFRAME
   inventory_id  part_num  color_id  quantity is_spare  \
0             1  48379c01        72         1        f   
1             1     48395         7         1        f   
2             1  mcsport6        25         1        f   
3             1    paddle         0         1        f   
4           738  48379c01         0         1        f   

                                                name  part_cat_id   id  \
0                 Sports Promo Figure Base with Feet           27    1   
1  Sports Promo Snowboard from McDonald's Sports ...           27    1   
2  Sports Promo Figure Head Torso Assembly McDona...           13    1   
3    Sports Promo Paddle from McDonald's Sports Sets           27    1   
4                 Sports Promo Figure Base with Feet           27  738   

   version set_num  
0        1  7922-1  
1        1  7922-1  
2        1  7922-1  
3        1  7922-1  
4        1  7920-1  

INVENTORY_PARTS COLUMNS
Index(['inventory_id', 'part_

## Creating the investigate function

In [None]:
def investigate(db):
    """explores the data"""
    
    print(f"Exploring the data in the dataframe")
    print(db.index)
    print("")
    print("COLUMN NAMES")
    print(db.columns)
    print()
    nRow, nCol = db.shape
    print(f'There are {nRow} rows and {nCol} columns')
    print()
    print("DATA TYPES")
    print(db.dtypes)
    print()
    print("NUMBER OF UNIQUE VALUES IN EACH COLUMN")
    print(db.nunique())
    print()
    print("PREVIEWING THE DATASET")
    print(db.head())
    print()
    print(db.info())
    print()
    print("NUMBER OF NON-NULL VALUES IN EACH COLUMN")
    print(db.count())


In [59]:
# Find the biggest set by the number of parts
biggest_set = sets.sort_values('num_parts', ascending=False).iloc[0]
print(f"The biggest set is: {biggest_set['name']} with set number {biggest_set['set_num']} and {biggest_set['num_parts']} parts.")

The biggest set is: Taj Mahal with set number 10189-1 and 5922 parts.


In [58]:
# Sample DataFrame
data = {
    'part_num': ['1234', '5678', '91011'],
    'color_id': [1, 2, 3],
    'quantity': [10, 20, 30]
}
df = pd.DataFrame(data)

# Iterate over the DataFrame rows
for _, row in df.iterrows():
    print(f"Part Number: {row['part_num']}, Color ID: {row['color_id']}, Quantity: {row['quantity']}")

Part Number: 1234, Color ID: 1, Quantity: 10
Part Number: 5678, Color ID: 2, Quantity: 20
Part Number: 91011, Color ID: 3, Quantity: 30


In [55]:
# Get the inventory ID for the biggest set.  
# This filters the inventories DataFrame to only 
# the rows where the set_num is equal to the biggest set's set_num, 
# then gets the ID of the first row.
inventory_id = inventories[inventories['set_num'] == biggest_set['set_num']]['id'].iloc[0]


# Get the parts and colors for the biggest set's inventory
biggest_set_inventory = inventory_parts[inventory_parts['inventory_id'] == inventory_id]

# Create a dataframe with part_num and color_id as indices and quantities as values
biggest_set_parts_dict = biggest_set_inventory.groupby(['part_num', 'color_id'])['quantity'].sum().to_dict()

print(biggest_set_parts_dict)

{('2339', 15): 8, ('2357', 15): 12, ('2412b', 71): 60, ('2419', 15): 12, ('2420', 15): 72, ('2420', 19): 36, ('2431', 15): 64, ('2431', 70): 16, ('2436', 15): 8, ('2450', 15): 32, ('2453a', 15): 8, ('2454a', 71): 4, ('2456', 71): 13, ('2577', 0): 4, ('2730', 15): 28, ('2730', 71): 2, ('2780', 0): 53, ('2877', 15): 240, ('3001', 1): 12, ('3001', 15): 8, ('3002', 4): 16, ('3002', 15): 8, ('3003', 14): 69, ('3003', 15): 8, ('30033', 15): 1, ('3004', 15): 69, ('3004', 19): 102, ('30044', 15): 48, ('3005', 15): 216, ('3006', 15): 5, ('3008', 1): 18, ('3009', 15): 22, ('3010', 15): 61, ('3020', 15): 33, ('3020', 70): 4, ('3021', 14): 4, ('3021', 15): 32, ('3021', 19): 16, ('3022', 15): 8, ('3022', 19): 44, ('3023', 4): 37, ('3023', 15): 174, ('3023', 19): 54, ('3023', 73): 24, ('3023', 320): 8, ('3024', 14): 76, ('3024', 15): 236, ('3024', 19): 44, ('3024', 47): 388, ('3024', 73): 60, ('3028', 70): 2, ('3030', 15): 4, ('3030', 70): 22, ('3031', 15): 21, ('3032', 15): 9, ('3033', 15): 24, ('3

In [56]:
# Define a function to check if a set can be built using parts from the biggest set
def can_build(set_inventory_id, biggest_set_parts_dict):
    # Get the required parts for the set we're checking
    required_parts = inventory_parts[inventory_parts['inventory_id'] == set_inventory_id]
    
    # Check if each required part with its color is available in sufficient quantity
    for _, part in required_parts.iterrows():
        if (part['part_num'], part['color_id']) not in biggest_set_parts_dict or \
           biggest_set_parts_dict[(part['part_num'], part['color_id'])] < part['quantity']:
            return False
    return True

# Check each inventory to see if it can be built from the biggest set's parts
buildable_sets = []
for inventory_id in inventories['id'].unique():
    if can_build(inventory_id, biggest_set_parts_dict):
        set_num = inventories[inventories['id'] == inventory_id]['set_num'].iloc[0]
        buildable_sets.append(set_num)

# Get the details of buildable sets
buildable_set_details = sets[sets['set_num'].isin(buildable_sets)]

print("\nSets that can be built using parts and colors from the biggest set:")
print(buildable_set_details[['set_num', 'name']])


Sets that can be built using parts and colors from the biggest set:
         set_num                                               name
2         0011-3                         Castle 2 for 1 Bonus Offer
12          00-6                                      Special Offer
13          00-7                  Weetabix Promotional Lego Village
85       10065-1                                 White Plates 1 x n
170      10189-1                                          Taj Mahal
...          ...                                                ...
11661       VP-7                   Fire Value Pack (6554 6407 6564)
11662       VP-8                 Star Wars Co-Pack of 7130 and 7150
11665     W098-1                            Watch Set, Classic Blue
11666  W991526-1  Homeschool Introduction to Simple and Motorize...
11672    wwgp1-1                Wild West Limited Edition Gift Pack

[967 rows x 2 columns]
