In [1]:
from pathlib import Path
import pandas as pd

folder = Path("./data")

csv_files = folder.glob("*.csv")

dfs = {}

In [2]:

for file in folder.glob("*.csv"):
    dfs[file.stem] = pd.read_csv(file)

print(dfs.keys())

dict_keys(['tools_goods', 'miscellaneous', 'artwork', 'interior_structures', 'dress_up', 'accessories', 'umbrellas', 'fish', 'clothing_other', 'posters', 'fencing', 'sea_creatures', 'photos', 'shoes', 'other', 'bags', 'ceiling_decor', 'bottoms', 'tops', 'housewares', 'music', 'headwear', 'recipes', 'rugs', 'socks', 'fossils', 'gyroids', 'wall_mounted', 'floors', 'wallpaper', 'insects'])


In [3]:
print(dfs["housewares"].columns.tolist())

['Name', 'Image', 'Variation', 'Body Title', 'Pattern', 'Pattern Title', 'DIY', 'Body Customize', 'Pattern Customize', 'Pattern Customize Options', 'Kit Cost', 'Kit Type', 'Cyrus Customize Price', 'Buy', 'Sell', 'Color 1', 'Color 2', 'Size', 'Surface', 'Exchange Price', 'Exchange Currency', 'Source', 'Source Notes', 'Season/Event', 'Season/Event Exclusive', 'HHA Base Points', 'HHA Concept 1', 'HHA Concept 2', 'HHA Series', 'HHA Set', 'HHA Category', 'Interact', 'Tag', 'Outdoor', 'Speaker Type', 'Lighting Type', 'Catalog', 'Version Added', 'Unlocked?', 'Filename', 'Variant ID', 'Internal ID', 'Unique Entry ID']


In [4]:
dfs["housewares"].head()

Unnamed: 0,Name,Image,Variation,Body Title,Pattern,Pattern Title,DIY,Body Customize,Pattern Customize,Pattern Customize Options,...,Outdoor,Speaker Type,Lighting Type,Catalog,Version Added,Unlocked?,Filename,Variant ID,Internal ID,Unique Entry ID
0,? Block,,,,,,No,No,No,,...,Yes,Does not play music,No lighting,Not in catalog,1.8.0,Yes,FtrMarioSquareA,,13821,mxpHakMcmNJgMH5eo
1,2021 celebratory arch,,,,,,No,No,No,,...,Yes,Does not play music,No lighting,Seasonal,1.6.0,Yes,Ftr2021Arch,,13543,kzegJttZt8DzxwamN
2,2022 celebratory arch,,,,,,No,No,No,,...,Yes,Does not play music,No lighting,Seasonal,2.0.0,Yes,Ftr2022Arch,,14590,7sEJLEMRhmr8kh4pG
3,ABD,,Silver,Color,,,No,No,No,,...,No,Does not play music,No lighting,Not for sale,2.0.0,Yes,FtrATM_Remake_0_0,0_0,14575,pdwvaGBRsGoyDWcuC
4,ABD,,Blue,Color,,,No,No,No,,...,No,Does not play music,No lighting,Not for sale,2.0.0,Yes,FtrATM_Remake_1_0,1_0,14575,8C6KfKTkJ3QLc4ZL9


In [5]:
def add_hex_id_col(df: pd.DataFrame):
    if "Internal ID" in df.columns:
        df["Internal ID as hex"] = df["Internal ID"].apply(
            lambda x: format(x, '04X'))  # 4 digits, uppercase, padded with zeros

In [6]:
def make_variant_info(variantId: str) -> str:
    """
    The column "Variant ID" has values either NaN or X_Y where X and Y are
    between 0 and 9. X are the variants, Y are the patterns.

    Args:
        variantId (str): Variant ID of the item

    Returns:
        str: Encoded variation + pattern, in hex.
    """
    if not variantId or not isinstance(variantId, str):
        return None  # No variations or patterns

    variation, pattern = variantId.split("_")

    return (int(variation), int(pattern) * 32)

In [7]:
def add_variation_pattern_col(df: pd.DataFrame):
    if "Variant ID" in df.columns:
        df["Variant Pattern Encoded"] = df["Variant ID"].apply(make_variant_info)

In [8]:
for frame in dfs.keys():
    add_hex_id_col(dfs[frame])
    add_variation_pattern_col(dfs[frame])

In [9]:
dfs["housewares"].head()

Unnamed: 0,Name,Image,Variation,Body Title,Pattern,Pattern Title,DIY,Body Customize,Pattern Customize,Pattern Customize Options,...,Lighting Type,Catalog,Version Added,Unlocked?,Filename,Variant ID,Internal ID,Unique Entry ID,Internal ID as hex,Variant Pattern Encoded
0,? Block,,,,,,No,No,No,,...,No lighting,Not in catalog,1.8.0,Yes,FtrMarioSquareA,,13821,mxpHakMcmNJgMH5eo,35FD,
1,2021 celebratory arch,,,,,,No,No,No,,...,No lighting,Seasonal,1.6.0,Yes,Ftr2021Arch,,13543,kzegJttZt8DzxwamN,34E7,
2,2022 celebratory arch,,,,,,No,No,No,,...,No lighting,Seasonal,2.0.0,Yes,Ftr2022Arch,,14590,7sEJLEMRhmr8kh4pG,38FE,
3,ABD,,Silver,Color,,,No,No,No,,...,No lighting,Not for sale,2.0.0,Yes,FtrATM_Remake_0_0,0_0,14575,pdwvaGBRsGoyDWcuC,38EF,"(0, 0)"
4,ABD,,Blue,Color,,,No,No,No,,...,No lighting,Not for sale,2.0.0,Yes,FtrATM_Remake_1_0,1_0,14575,8C6KfKTkJ3QLc4ZL9,38EF,"(1, 0)"


In [10]:
import os

output_dir = "processed_data"

os.makedirs(output_dir, exist_ok=True)

for frame in dfs.keys():
    path = os.path.join(output_dir, frame)
    dfs[frame].to_csv(f"{path}.csv", index=False)
