In [189]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import hex2color
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np

[LEGO Catalog Database Download](https://rebrickable.com/downloads/)

In [180]:
colors_df = pd.read_csv(r'colors.csv')
elements_df = pd.read_csv(r'elements.csv')
inventories_df = pd.read_csv(r'inventories.csv')
inventory_minifigs_df = pd.read_csv(r'inventory_minifigs.csv')
inventory_parts_df = pd.read_csv(r'inventory_parts.csv')
inventory_sets_df = pd.read_csv(r'inventory_sets.csv')
minifigs_df = pd.read_csv(r'minifigs.csv')
parts_df = pd.read_csv(r'parts.csv')
part_categories_df = pd.read_csv(r'part_categories.csv')
part_relationships_df = pd.read_csv(r'part_relationships.csv')
sets_df = pd.read_csv(r'sets.csv')
themes_df = pd.read_csv(r'themes.csv')

![Rebrickable ERD](https://rebrickable.com/static/img/diagrams/downloads_schema_v3.png)

In [83]:
colors_df.rename(columns={'id':'color_id','name':'color_name'}, inplace=True)
parts_df.rename(columns={'name':'part_name'}, inplace=True)

inventory_parts_colors = pd.merge(inventory_parts_df, colors_df,how='left', left_on='color_id', right_on='color_id')
inventories_sets = pd.merge(inventories_df, sets_df,how='left', left_on='set_num', right_on='set_num')
df = pd.merge(inventory_parts_colors, inventories_sets, how='left', left_on='inventory_id',right_on = 'id')

In [192]:
fig = make_subplots(rows=3, cols=1, subplot_titles=['Subplot 1', 'Subplot 2'])
fig


In [181]:

# Calculate average number of parts per set
num_parts_overtime = df.groupby(["year", 'set_num'])["num_parts"].mean()
avg_pieces_series = num_parts_overtime.groupby('year').mean()

# Calculate average number of unique pieces per set
avg_unique_pieces_per_set = df.groupby(['year', 'set_num'])['part_num'].nunique().reset_index(name='unique_part_count')
avg_diff_pieces_series = avg_unique_pieces_per_set.groupby('year')['unique_part_count'].mean()

# Calculate average number of unique colors per set
diff_colors = df.groupby(['year', 'set_num'])['color_name'].nunique().reset_index(name='unique_color_count')
avg_diff_colors_series = diff_colors.groupby('year')['unique_color_count'].mean()

# Create subplot figure
fig = px.subplots(
    [avg_pieces_series, avg_diff_pieces_series, avg_diff_colors_series],
    subplot_titles=['Average Number of Parts per Set', 'Average Number of Unique Pieces per Set', 'Average Number of Unique Colors per Set'],
    shared_xaxes=True,
    vertical_spacing=0.1,
)

# Update layout for better axis labels
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Count',
)

# Show the plots
fig.show()


AttributeError: module 'plotly.express' has no attribute 'subplots'

In [187]:
num_parts_overtime = df.groupby(["year",'set_num'])["num_parts"].mean()
avg_pieces_series = num_parts_overtime.groupby('year').mean()

avg_unique_pieces_per_set = df.groupby(['year', 'set_num'])['part_num'].nunique().reset_index(name='unique_part_count')
avg_diff_pieces_series = avg_unique_pieces_per_set.groupby('year')['unique_part_count'].mean()

diff_colors = df.groupby(['year', 'set_num'])['color_name'].nunique().reset_index(name='unique_color_count')
diff_colors_per_year = diff_colors.groupby('year')['unique_color_count'].mean()

In [186]:
avg_pieces_series.plot(title = 'Average number of parts per set')


In [184]:
diff_colors_per_year.plot(title = 'Avg Number of different colors per set')


In [185]:
avg_diff_pieces_series.plot(title = 'Avg Number of different types of pieces per set')


In [160]:
def min_max_normalize(series):
    """Apply Min-Max normalization to a pandas Series."""
    min_val = series.min()
    max_val = series.max()
    normalized_series = (series - min_val) / (max_val - min_val)
    return normalized_series

In [162]:
min_max_normalize(num_parts_overtime)

year
1949.0    0.230030
1950.0    0.000000
1953.0    0.036550
1954.0    0.017164
1955.0    0.028702
            ...   
2020.0    0.692345
2021.0    0.835440
2022.0    0.958905
2023.0    1.000000
2024.0    0.418766
Name: num_parts, Length: 74, dtype: float64

In [8]:
# complexity should be number of parts, number of different parts, number of different colors, 

Normilizing data

Average Number of Pieces per Set (Pieces)

In [165]:

def min_max_normalize(series):
    """Apply Min-Max normalization to a pandas Series."""
    min_val = series.min()
    max_val = series.max()
    normalized_series = (series - min_val) / (max_val - min_val)
    return normalized_series

def normalize_lego_metrics(avg_pieces, avg_diff_pieces, avg_diff_colors):
    """Normalize LEGO metrics using Min-Max normalization."""
    normalized_avg_pieces = min_max_normalize(avg_pieces)
    normalized_avg_diff_pieces = min_max_normalize(avg_diff_pieces)
    normalized_avg_diff_colors = min_max_normalize(avg_diff_colors)
    
    return normalized_avg_pieces, normalized_avg_diff_pieces, normalized_avg_diff_colors

def calculate_complexity_index(normalized_avg_pieces, normalized_avg_diff_pieces, normalized_avg_diff_colors, weights):
    """Calculate complexity index using normalized metrics and user-defined weights."""
    complexity_index = (
        weights[0] * normalized_avg_pieces +
        weights[1] * normalized_avg_diff_pieces +
        weights[2] * normalized_avg_diff_colors
    )
    return complexity_index

normalized_avg_pieces, normalized_avg_diff_pieces, normalized_avg_diff_colors = normalize_lego_metrics(
    avg_pieces_series,
    avg_diff_pieces_series,
    avg_diff_colors_series
)

# Define user weights
user_weights = [1/3, 1/3, 1/3]  # Equal weights

# Calculate the complexity index
complexity_index = calculate_complexity_index(
    normalized_avg_pieces,
    normalized_avg_diff_pieces,
    normalized_avg_diff_colors,
    user_weights
)


In [173]:
normalized_df = pd.concat([normalized_avg_pieces, normalized_avg_diff_pieces, normalized_avg_diff_colors], axis=1)
normalized_df.columns = ['Normalized Avg Pieces', 'Normalized Avg Diff Pieces', 'Normalized Avg Diff Colors']
normalized_df.plot()

In [176]:
complexity_index = normalized_avg_pieces+normalized_avg_diff_pieces+normalized_avg_diff_colors
complexity_index.plot()

In [167]:
normalized_avg_diff_pieces

year
1949.0    0.065299
1950.0    0.000000
1953.0    0.013060
1954.0    0.023321
1955.0    0.042346
            ...   
2020.0    0.746833
2021.0    0.799825
2022.0    0.889858
2023.0    1.000000
2024.0    0.879365
Name: unique_part_count, Length: 74, dtype: float64

In [169]:
normalized_avg_diff_colors

year
1949.0    0.217329
1950.0    0.000000
1953.0    0.249023
1954.0    0.218299
1955.0    0.094670
            ...   
2020.0    0.820939
2021.0    0.839587
2022.0    0.906668
2023.0    1.000000
2024.0    0.996090
Name: unique_color_count, Length: 74, dtype: float64