# Hot Wheels Data Processing
This notebook processes Hot Wheels data and loads it into a SQLite database.

## 1. Setup and Data Loading

In [3]:
import pandas as pd
import numpy as np
import uuid
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

load_dotenv()

POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
POSTGRES_DB = os.getenv("POSTGRES_DB")
POSTGRES_HOSTNAME = os.getenv("POSTGRES_HOSTNAME")
POSTGRES_PORT = os.getenv("POSTGRES_PORT")

# Load the data
df = pd.read_json("hotwheels.jsonl", lines=True, dtype={"Collector #": "Int64"})

## 2. Data Cleaning

In [4]:
# Remove rows with missing model names and drop unnecessary columns
df = df.dropna(subset=['Model Name'])
df = df.drop(columns=["Card Variant", "Mainline", "ID"])

## 3. Column Standardization

In [5]:
# Rename columns to snake_case format
column_mapping = {
    'Model Name': 'model_name', 
    'Image URL': 'image_url',
    'Collector #': 'collector_number',
    'Series #': 'series_number',
    'Release Year': 'release_year',
    'Series': 'series',
    'Color': 'color',
    'Tampo': 'tampo',
    'Wheel Type': 'wheel_type',
    'Base Type': 'base_type',
    'Base Color': 'base_color',
    'Window Color': 'window_color',
    'Interior Color': 'interior_color',
    'Toy #': 'toy_number',
    'Assortment #': 'assortment_number',
    'Scale': 'scale',
    'Country': 'country',
    'Base Codes': 'base_codes',
    'Case Number': 'case_number',
    'Notes': 'notes',
    'Treasure Hunt': 'treasure_hunt'
}
df = df.rename(columns=column_mapping)

## 4. Feature Engineering

In [6]:
def extract_treasure_hunt_year(text):
    if pd.isna(text):
        return np.nan
    return int(text.split(' ')[0]) if 'Treasure Hunts' in text and 'Super' not in text else np.nan

def extract_super_treasure_hunt_year(text):
    if pd.isna(text):
        return np.nan
    return int(text.split(' ')[0]) if 'Super Treasure Hunts' in text else np.nan

# Create treasure hunt features
df['treasure_hunt_year'] = df['treasure_hunt'].apply(extract_treasure_hunt_year)
df['super_treasure_hunt_year'] = df['treasure_hunt'].apply(extract_super_treasure_hunt_year)
df = df.drop(columns=["treasure_hunt"])

# Add unique identifier
df['id'] = [uuid.uuid4() for _ in range(len(df))]

## 5. Data Type Conversion

In [7]:
# Convert columns to appropriate data types
df["super_treasure_hunt_year"] = df["super_treasure_hunt_year"].astype("Int64")
df["treasure_hunt_year"] = df["treasure_hunt_year"].astype("Int64")
df["release_year"] = pd.to_numeric(df["release_year"], errors='coerce').astype("Int64")

In [8]:
df[['model_name', 'image_url', 'series', 'release_year']].sample(11)

Unnamed: 0,model_name,image_url,series,release_year
5174,C6 Corvette,https://images.collecthw.com/d05e76ec-86b5-497...,Auto Affinity Great 8s,2004
49899,Volkswagen T2 Pickup,https://images.collecthw.com/9ae41c94-fa11-46f...,Volkswagen,2019
5681,GMC Motor Home,https://images.collecthw.com/69df0223-9596-469...,1980 Hot Wheels,1980
43867,'15 Dodge Charger SRT,https://images.collecthw.com/b749c74b-81cd-450...,HW Rescue,2022
19511,Streex Machine,https://images.collecthw.com/504723af-34fa-482...,Streex Laucher Set,1992
24608,Fandango,https://images.collecthw.com/d502a23b-ada9-459...,Checkmate,2018
50683,Chevy Silverado Off Road,https://images.collecthw.com/54014dc8-e7c8-41f...,Mud Studs 5-Pack,2024
35901,'91 Mazda MX-5 Miata,https://images.collecthw.com/54ca4eff-6a1a-49c...,HW Roadsters,2020
33326,Surfin' School Bus,https://images.collecthw.com/c0e30015-add6-416...,Flying Customs,2003
40236,Batmobile,https://images.collecthw.com/8934108a-217c-463...,Batman,2018


## 6. Database Export

In [9]:
""" DATABASE_URL = f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOSTNAME}:{POSTGRES_PORT}/{POSTGRES_DB}"
engine = create_engine(DATABASE_URL)

df.to_sql('hotwheels', engine, if_exists='append', index=False) """

' DATABASE_URL = f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOSTNAME}:{POSTGRES_PORT}/{POSTGRES_DB}"\nengine = create_engine(DATABASE_URL)\n\ndf.to_sql(\'hotwheels\', engine, if_exists=\'append\', index=False) '