In [4]:
import torch
import cupy as cp
from numba import cuda

# PyTorch
print("PyTorch CUDA 可用:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU 名称:", torch.cuda.get_device_name(0))

# CuPy
x = cp.arange(10**6, dtype=cp.float32)
y = cp.arange(10**6, dtype=cp.float32)
print("CuPy GPU 运算测试:", (x + y)[:5])

# Numba
print("Numba GPU 数量:", len(cuda.gpus))

ModuleNotFoundError: No module named 'torch'

**GPU测试**

In [5]:
# -*- coding: utf-8 -*-
"""
一键修复 CuPy 并测试 GPU
"""

import os
import sys
import subprocess

def run_cmd(cmd):
    """运行命令行并打印输出"""
    print(f">>> {cmd}")
    result = subprocess.run(cmd, shell=True)
    return result.returncode

def check_conflicting_files():
    """检查当前目录是否有 cupy.py 或 cupy 文件夹"""
    cwd = os.getcwd()
    conflicts = []
    for name in os.listdir(cwd):
        if name.lower() == "cupy.py" or name.lower() == "cupy":
            conflicts.append(name)
    if conflicts:
        print("⚠️ 发现可能冲突的文件/文件夹:", conflicts)
        print("请删除或重命名它们后再运行此脚本")
        return False
    return True

def reinstall_cupy():
    """卸载并重新安装 CuPy"""
    print("卸载残留的 CuPy...")
    run_cmd("pip uninstall cupy cupy-cuda12x -y")
    print("重新安装 cupy-cuda12x...")
    ret = run_cmd("pip install cupy-cuda12x")
    return ret == 0

def test_cupy_gpu():
    """测试 CuPy 和 GPU 是否可用"""
    try:
        import cupy as cp
        x = cp.arange(10)
        print("✅ CuPy 导入成功，示例数组：", x)
        print("检测 GPU 设备数量:", cp.cuda.runtime.getDeviceCount())
        print("当前 GPU 名称:", cp.cuda.runtime.getDeviceProperties(0)['name'].decode())
        return True
    except Exception as e:
        print("❌ CuPy 测试失败:", e)
        return False

if __name__ == "__main__":
    print("=== 一键修复 CuPy 并测试 GPU ===")
    if not check_conflicting_files():
        sys.exit(1)

    if reinstall_cupy():
        print("CuPy 安装完成，开始测试 GPU...")
        if test_cupy_gpu():
            print("🎉 GPU 测试成功！可以开始使用 CuPy 进行加速计算。")
        else:
            print("❌ GPU 测试失败，请检查 CUDA 驱动和显卡环境。")
    else:
        print("❌ CuPy 安装失败，请检查网络或 Python 环境。")


=== 一键修复 CuPy 并测试 GPU ===
卸载残留的 CuPy...
>>> pip uninstall cupy cupy-cuda12x -y


[0m

Found existing installation: cupy-cuda12x 13.6.0
Uninstalling cupy-cuda12x-13.6.0:
  Successfully uninstalled cupy-cuda12x-13.6.0
重新安装 cupy-cuda12x...
>>> pip install cupy-cuda12x
Collecting cupy-cuda12x
  Using cached cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_x86_64.whl.metadata (2.4 kB)
Using cached cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_x86_64.whl (112.9 MB)
Installing collected packages: cupy-cuda12x
Successfully installed cupy-cuda12x-13.6.0
CuPy 安装完成，开始测试 GPU...
✅ CuPy 导入成功，示例数组： [0 1 2 3 4 5 6 7 8 9]
检测 GPU 设备数量: 1
当前 GPU 名称: NVIDIA GeForce RTX 4070 Laptop GPU
🎉 GPU 测试成功！可以开始使用 CuPy 进行加速计算。


In [6]:
import cupy as cp

x = cp.arange(10)
print(x)
print("GPU count:", cp.cuda.runtime.getDeviceCount())
print("GPU name:", cp.cuda.runtime.getDeviceProperties(0)['name'])


[0 1 2 3 4 5 6 7 8 9]
GPU count: 1
GPU name: b'NVIDIA GeForce RTX 4070 Laptop GPU'


**GPU加速生成100万行数据**

In [7]:
# -*- coding: utf-8 -*-
"""Tesla Simulated Sales Data Generator

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1qSg43jfMBtM4DZY_bMze_D93QjNTqAZo
"""

# tesla_sales_data_generator_star_schema.py
# 完整的星型模式数据生成脚本
# 目的：为每个国家的所有州/省/直辖市/特别行政区生成 Tesla 销售记录
# - 确保每个州至少有一条记录（避免地图空白）
# - 优化新西兰邮编生成，尽量使用现实可用的四位格式
# - 生成完整的星型模式：Fact_Sales, Dim_Product, Dim_Time, Dim_Geography, Dim_Prices, Dim_Customer
# 使用：python tesla_sales_data_generator_star_schema.py

import pandas as pd
import numpy as np
import random
import datetime
import math
import os

# 固定随机种子以便可复现（需要不同随机结果可注释掉）
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# --------------------------
# Helper: 生成合理格式的邮编/邮政编码（尽量贴近各国常见格式）
# --------------------------
def generate_plausible_zip(country, state_province_abbr):
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    digits = '0123456789'

    # 逐国家处理（覆盖大多数脚本中使用到的国家）
    if country == 'United States':
        # 5-digit zip (做一些基于州缩写简单偏好)
        if state_province_abbr.startswith(('C', 'I')):
            return f"9{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith(('T', 'L')):
            return f"7{random.randint(5,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('F'):
            return f"3{random.randint(2,4)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('N'):
            return f"1{random.randint(0,4)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('W'):
            return f"98{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('A'):
            return f"85{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('G'):
            return f"30{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('P'):
            return f"15{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        return f"{random.randint(10000, 99999)}"

    elif country == 'Canada':
        # Canadian format A1A 1A1 (模拟)
        province_codes = {
            'ON': ['K','L','M','N','P'], 'QC': ['G','H','J'], 'BC': ['V'], 'AB': ['T'],
            'SK': ['S'], 'MB': ['R'], 'NB': ['E'], 'NS': ['B'], 'NL': ['A'], 'PE': ['C'],
            'YT': ['Y'], 'NT': ['X'], 'NU': ['X']
        }
        first_letter = random.choice(province_codes.get(state_province_abbr, ['A']))
        return f"{first_letter}{random.choice(digits)}{random.choice(letters)} {random.choice(digits)}{random.choice(letters)}{random.choice(digits)}"

    elif country == 'Mexico':
        return f"{random.randint(1000, 99999):05d}"

    elif country == 'United Kingdom':
        # 简化模拟英国邮编
        outward = f"{random.choice(letters)}{random.choice(letters)}{random.randint(1,9)}"
        inward = f"{random.choice(digits)}{random.choice(letters)}{random.choice(letters)}"
        return f"{outward} {inward}"

    elif country in ['China', 'Taiwan']:
        if country == 'China':
            # 6位邮编
            if state_province_abbr.startswith('B'):
                return f"10{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            if state_province_abbr.startswith('S'):
                return f"20{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            if state_province_abbr.startswith('G'):
                return f"51{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            return f"{random.randint(10000, 99999)}"
        else:
            # 台湾较常见的3位或3位+2的格式
            return f"{random.randint(100, 999)}"

    elif country == 'Germany':
        return f"{random.randint(10000, 99999)}"

    elif country == 'Japan':
        return f"{random.randint(100, 999)}-{random.randint(1000, 9999)}"

    elif country == 'Australia':
        return f"{random.randint(1000, 9999)}"

    elif country == 'New Zealand':
        # 更贴近新西兰的 4 位邮编格式 — 使用按州/区的近似区间（便于 Shape Map 匹配）
        # 说明：这些区间为常见/广义区间，足以覆盖可视化配对。可按需替换成精准映射表。
        nz_ranges = {
            'AUK': (600, 2699),   # Auckland (广义区间)
            'NTL': (100, 1099),   # Northland
            'WKO': (3200, 3799),  # Waikato
            'BOP': (3000, 3199),  # Bay of Plenty
            'GIS': (4010, 4199),  # Gisborne / East
            'HKB': (4100, 4299),  # Hawke's Bay
            'MWT': (4400, 4699),  # Manawatū-Whanganui
            'MBH': (7200, 7299),  # Marlborough
            'NSN': (7010, 7099),  # Nelson
            'OTA': (9000, 9799),  # Otago
            'STL': (9800, 9899),  # Southland
            'TKI': (4300, 4399),  # Taranaki
            'TAS': (7100, 7199),  # Tasman
            'WAI': (3200, 3799),  # Waikato (重复键安全)
            'WLG': (5010, 5799),  # Wellington region (广义)
            'WTC': (7800, 7999),  # West Coast
            'CAN': (7000, 7999),  # Canterbury (包含 Christchurch)
        }
        rng = nz_ranges.get(state_province_abbr)
        if rng:
            low, high = rng
            # 填充为 4 位字符串
            val = random.randint(low, high)
            return f"{val:04d}"
        else:
            return f"{random.randint(1000, 99999):05d}"

    elif country in ['France', 'Italy']:
        return f"{random.randint(10000, 99999)}"
    elif country == 'Spain':
        return f"{random.randint(10000, 52999)}"
    elif country == 'South Korea':
        return f"{random.randint(10000, 99999)}"
    elif country == 'Thailand':
        return f"{random.randint(10000, 99999)}"
    else:
        # 默认 5 位模拟
        return f"{random.randint(10000, 99999)}"


# --------------------------
# 定义国家/省/州 字典 (使用你原始脚本的详尽列表并确保使用英文全称和缩写)
# --------------------------

tesla_countries = {
    'North America': {
        'United States': {'country_code': 'US', 'states': [
            {'abbr': 'AL', 'full': 'Alabama'}, {'abbr': 'AK', 'full': 'Alaska'}, {'abbr': 'AZ', 'full': 'Arizona'},
            {'abbr': 'AR', 'full': 'Arkansas'}, {'abbr': 'CA', 'full': 'California'}, {'abbr': 'CO', 'full': 'Colorado'},
            {'abbr': 'CT', 'full': 'Connecticut'}, {'abbr': 'DE', 'full': 'Delaware'}, {'abbr': 'FL', 'full': 'Florida'},
            {'abbr': 'GA', 'full': 'Georgia'}, {'abbr': 'HI', 'full': 'Hawaii'}, {'abbr': 'ID', 'full': 'Idaho'},
            {'abbr': 'IL', 'full': 'Illinois'}, {'abbr': 'IN', 'full': 'Indiana'}, {'abbr': 'IA', 'full': 'Iowa'},
            {'abbr': 'KS', 'full': 'Kansas'}, {'abbr': 'KY', 'full': 'Kentucky'}, {'abbr': 'LA', 'full': 'Louisiana'},
            {'abbr': 'ME', 'full': 'Maine'}, {'abbr': 'MD', 'full': 'Maryland'}, {'abbr': 'MA', 'full': 'Massachusetts'},
            {'abbr': 'MI', 'full': 'Michigan'}, {'abbr': 'MN', 'full': 'Minnesota'}, {'abbr': 'MS', 'full': 'Mississippi'},
            {'abbr': 'MO', 'full': 'Missouri'}, {'abbr': 'MT', 'full': 'Montana'}, {'abbr': 'NE', 'full': 'Nebraska'},
            {'abbr': 'NV', 'full': 'Nevada'}, {'abbr': 'NH', 'full': 'New Hampshire'}, {'abbr': 'NJ', 'full': 'New Jersey'},
            {'abbr': 'NM', 'full': 'New Mexico'}, {'abbr': 'NY', 'full': 'New York'}, {'abbr': 'NC', 'full': 'North Carolina'},
            {'abbr': 'ND', 'full': 'North Dakota'}, {'abbr': 'OH', 'full': 'Ohio'}, {'abbr': 'OK', 'full': 'Oklahoma'},
            {'abbr': 'OR', 'full': 'Oregon'}, {'abbr': 'PA', 'full': 'Pennsylvania'}, {'abbr': 'RI', 'full': 'Rhode Island'},
            {'abbr': 'SC', 'full': 'South Carolina'}, {'abbr': 'SD', 'full': 'South Dakota'}, {'abbr': 'TN', 'full': 'Tennessee'},
            {'abbr': 'TX', 'full': 'Texas'}, {'abbr': 'UT', 'full': 'Utah'}, {'abbr': 'VT', 'full': 'Vermont'},
            {'abbr': 'VA', 'full': 'Virginia'}, {'abbr': 'WA', 'full': 'Washington'}, {'abbr': 'WV', 'full': 'West Virginia'},
            {'abbr': 'WI', 'full': 'Wisconsin'}, {'abbr': 'WY', 'full': 'Wyoming'}, {'abbr': 'DC', 'full': 'District of Columbia'}
        ], 'zip_generator': generate_plausible_zip},
        'Canada': {'country_code': 'CA', 'states': [
            {'abbr': 'AB', 'full': 'Alberta'}, {'abbr': 'BC', 'full': 'British Columbia'}, {'abbr': 'MB', 'full': 'Manitoba'},
            {'abbr': 'NB', 'full': 'New Brunswick'}, {'abbr': 'NL', 'full': 'Newfoundland and Labrador'}, {'abbr': 'NS', 'full': 'Nova Scotia'},
            {'abbr': 'ON', 'full': 'Ontario'}, {'abbr': 'PE', 'full': 'Prince Edward Island'}, {'abbr': 'QC', 'full': 'Quebec'},
            {'abbr': 'SK', 'full': 'Saskatchewan'}, {'abbr': 'NT', 'full': 'Northwest Territories'}, {'abbr': 'NU', 'full': 'Nunavut'},
            {'abbr': 'YT', 'full': 'Yukon'}
        ], 'zip_generator': generate_plausible_zip},
        'Mexico': {'country_code': 'MX', 'states': [
            {'abbr': 'AGS', 'full': 'Aguascalientes'}, {'abbr': 'BC', 'full': 'Baja California'}, {'abbr': 'BCS', 'full': 'Baja California Sur'},
            {'abbr': 'CAMP', 'full': 'Campeche'}, {'abbr': 'CHIS', 'full': 'Chiapas'}, {'abbr': 'CHIH', 'full': 'Chihuahua'},
            {'abbr': 'COAH', 'full': 'Coahuila'}, {'abbr': 'COL', 'full': 'Colima'}, {'abbr': 'DUR', 'full': 'Durango'},
            {'abbr': 'GTO', 'full': 'Guanajuato'}, {'abbr': 'GRO', 'full': 'Guerrero'}, {'abbr': 'HGO', 'full': 'Hidalgo'},
            {'abbr': 'JAL', 'full': 'Jalisco'}, {'abbr': 'MEX', 'full': 'Mexico State'}, {'abbr': 'MICH', 'full': 'Michoacán'},
            {'abbr': 'MOR', 'full': 'Morelos'}, {'abbr': 'NAY', 'full': 'Nayarit'}, {'abbr': 'NLE', 'full': 'Nuevo León'},
            {'abbr': 'OAX', 'full': 'Oaxaca'}, {'abbr': 'PUE', 'full': 'Puebla'}, {'abbr': 'QRO', 'full': 'Querétaro'},
            {'abbr': 'QR', 'full': 'Quintana Roo'}, {'abbr': 'SLP', 'full': 'San Luis Potosí'}, {'abbr': 'SIN', 'full': 'Sinaloa'},
            {'abbr': 'SON', 'full': 'Sonora'}, {'abbr': 'TAB', 'full': 'Tabasco'}, {'abbr': 'TAM', 'full': 'Tamaulipas'},
            {'abbr': 'TLAX', 'full': 'Tlaxcala'}, {'abbr': 'VER', 'full': 'Veracruz'}, {'abbr': 'YUC', 'full': 'Yucatán'},
            {'abbr': 'ZAC', 'full': 'Zacatecas'}, {'abbr': 'CDMX', 'full': 'Mexico City'}
        ], 'zip_generator': generate_plausible_zip}
    },
    'Europe': {
        'Germany': {'country_code': 'DE', 'states': [
            {'abbr': 'BW', 'full': 'Baden-Württemberg'}, {'abbr': 'BY', 'full': 'Bavaria'}, {'abbr': 'BE', 'full': 'Berlin'},
            {'abbr': 'BB', 'full': 'Brandenburg'}, {'abbr': 'HB', 'full': 'Bremen'}, {'abbr': 'HH', 'full': 'Hamburg'},
            {'abbr': 'HE', 'full': 'Hesse'}, {'abbr': 'MV', 'full': 'Mecklenburg-Vorpommern'}, {'abbr': 'NI', 'full': 'Lower Saxony'},
            {'abbr': 'NW', 'full': 'North Rhine-Westphalia'}, {'abbr': 'RP', 'full': 'Rhineland-Palatinate'},
            {'abbr': 'SL', 'full': 'Saarland'}, {'abbr': 'SN', 'full': 'Saxony'}, {'abbr': 'ST', 'full': 'Saxony-Anhalt'},
            {'abbr': 'SH', 'full': 'Schleswig-Holstein'}, {'abbr': 'TH', 'full': 'Thuringia'}
        ], 'zip_generator': generate_plausible_zip},
        'United Kingdom': {'country_code': 'GB', 'states': [
            {'abbr': 'ENG', 'full': 'England'}, {'abbr': 'SCT', 'full': 'Scotland'}, {'abbr': 'WLS', 'full': 'Wales'},
            {'abbr': 'NIR', 'full': 'Northern Ireland'}
        ], 'zip_generator': generate_plausible_zip},
        'Norway': {'country_code': 'NO', 'states': [
            {'abbr': 'OS', 'full': 'Oslo'}, {'abbr': 'VL', 'full': 'Viken'}, {'abbr': 'TR', 'full': 'Trøndelag'}, {'abbr': 'RO', 'full': 'Rogaland'},
            {'abbr': 'MR', 'full': 'Møre og Romsdal'}, {'abbr': 'INN', 'full': 'Innlandet'}, {'abbr': 'TROM', 'full': 'Troms og Finnmark'},
            {'abbr': 'VEST', 'full': 'Vestland'}, {'abbr': 'VESTF', 'full': 'Vestfold og Telemark'}, {'abbr': 'AGD', 'full': 'Agder'},
            {'abbr': 'NORDL', 'full': 'Nordland'}
        ], 'zip_generator': generate_plausible_zip},
        'France': {'country_code': 'FR', 'states': [
            {'abbr': 'ARA', 'full': 'Auvergne-Rhône-Alpes'}, {'abbr': 'BFC', 'full': 'Bourgogne-Franche-Comté'},
            {'abbr': 'BRE', 'full': 'Brittany'}, {'abbr': 'CVL', 'full': 'Centre-Val de Loire'}, {'abbr': 'COR', 'full': 'Corsica'},
            {'abbr': 'GES', 'full': 'Grand Est'}, {'abbr': 'HDF', 'full': 'Hauts-de-France'}, {'abbr': 'IDF', 'full': 'Île-de-France'},
            {'abbr': 'NOR', 'full': 'Normandy'}, {'abbr': 'NAQ', 'full': 'Nouvelle-Aquitaine'}, {'abbr': 'OCC', 'full': 'Occitanie'},
            {'abbr': 'PDL', 'full': 'Pays de la Loire'}, {'abbr': 'PAC', 'full': 'Provence-Alpes-Côte d\'Azur'}
        ], 'zip_generator': generate_plausible_zip},
        'Netherlands': {'country_code': 'NL', 'states': [
            {'abbr': 'DR', 'full': 'Drenthe'}, {'abbr': 'FL', 'full': 'Flevoland'}, {'abbr': 'FR', 'full': 'Friesland'},
            {'abbr': 'GE', 'full': 'Gelderland'}, {'abbr': 'GR', 'full': 'Groningen'}, {'abbr': 'LB', 'full': 'Limburg'},
            {'abbr': 'NB', 'full': 'North Brabant'}, {'abbr': 'NH', 'full': 'North Holland'}, {'abbr': 'OV', 'full': 'Overijssel'},
            {'abbr': 'UT', 'full': 'Utrecht'}, {'abbr': 'ZE', 'full': 'Zeeland'}, {'abbr': 'ZH', 'full': 'South Holland'}
        ], 'zip_generator': generate_plausible_zip},
        'Sweden': {'country_code': 'SE', 'states': [
            {'abbr': 'AB', 'full': 'Stockholm County'}, {'abbr': 'AC', 'full': 'Västerbotten County'},
            {'abbr': 'BD', 'full': 'Norrbotten County'}, {'abbr': 'C', 'full': 'Uppsala County'},
            {'abbr': 'D', 'full': 'Södermanland County'}, {'abbr': 'E', 'full': 'Östergötland County'},
            {'abbr': 'F', 'full': 'Jönköping County'}, {'abbr': 'G', 'full': 'Kronoberg County'},
            {'abbr': 'H', 'full': 'Kalmar County'}, {'abbr': 'I', 'full': 'Gotland County'},
            {'abbr': 'K', 'full': 'Blekinge County'}, {'abbr': 'M', 'full': 'Skåne County'},
            {'abbr': 'N', 'full': 'Halland County'}, {'abbr': 'O', 'full': 'Västra Götaland County'},
            {'abbr': 'S', 'full': 'Värmland County'}, {'abbr': 'T', 'full': 'Örebro County'},
            {'abbr': 'U', 'full': 'Västmanland County'}, {'abbr': 'W', 'full': 'Dalarna County'},
            {'abbr': 'X', 'full': 'Gävleborg County'}, {'abbr': 'Y', 'full': 'Västernorrland County'},
            {'abbr': 'Z', 'full': 'Jämtland County'}
        ], 'zip_generator': generate_plausible_zip},
        'Switzerland': {'country_code': 'CH', 'states': [
            {'abbr': 'ZH', 'full': 'Zurich'}, {'abbr': 'BE', 'full': 'Bern'}, {'abbr': 'LU', 'full': 'Lucerne'},
            {'abbr': 'UR', 'full': 'Uri'}, {'abbr': 'SZ', 'full': 'Schwyz'}, {'abbr': 'OW', 'full': 'Obwalden'},
            {'abbr': 'NW', 'full': 'Nidwalden'}, {'abbr': 'GL', 'full': 'Glarus'}, {'abbr': 'ZG', 'full': 'Zug'},
            {'abbr': 'FR', 'full': 'Fribourg'}, {'abbr': 'SO', 'full': 'Solothurn'}, {'abbr': 'BS', 'full': 'Basel-Stadt'},
            {'abbr': 'BL', 'full': 'Basel-Landschaft'}, {'abbr': 'SH', 'full': 'Schaffhausen'}, {'abbr': 'AR', 'full': 'Appenzell Ausserrhoden'},
            {'abbr': 'AI', 'full': 'Appenzell Innerrhoden'}, {'abbr': 'SG', 'full': 'St. Gallen'}, {'abbr': 'GR', 'full': 'Graubünden'},
            {'abbr': 'AG', 'full': 'Aargau'}, {'abbr': 'TG', 'full': 'Thurgau'}, {'abbr': 'TI', 'full': 'Ticino'},
            {'abbr': 'VD', 'full': 'Vaud'}, {'abbr': 'VS', 'full': 'Valais'}, {'abbr': 'NE', 'full': 'Neuchâtel'},
            {'abbr': 'GE', 'full': 'Geneva'}, {'abbr': 'JU', 'full': 'Jura'}
        ], 'zip_generator': generate_plausible_zip},
        'Italy': {'country_code': 'IT', 'states': [
            {'abbr': 'ABR', 'full': 'Abruzzo'}, {'abbr': 'BAS', 'full': 'Basilicata'}, {'abbr': 'CAL', 'full': 'Calabria'},
            {'abbr': 'CAM', 'full': 'Campania'}, {'abbr': 'EMR', 'full': 'Emilia-Romagna'}, {'abbr': 'FVG', 'full': 'Friuli-Venezia Giulia'},
            {'abbr': 'LAZ', 'full': 'Lazio'}, {'abbr': 'LIG', 'full': 'Liguria'}, {'abbr': 'LOM', 'full': 'Lombardy'},
            {'abbr': 'MAR', 'full': 'Marche'}, {'abbr': 'MOL', 'full': 'Molise'}, {'abbr': 'PIE', 'full': 'Piedmont'},
            {'abbr': 'PUG', 'full': 'Apulia'}, {'abbr': 'SAR', 'full': 'Sardinia'}, {'abbr': 'SIC', 'full': 'Sicily'},
            {'abbr': 'TOS', 'full': 'Tuscany'}, {'abbr': 'TAA', 'full': 'Trentino-South Tyrol'}, {'abbr': 'UMB', 'full': 'Umbria'},
            {'abbr': 'VAO', 'full': 'Aosta Valley'}, {'abbr': 'VEN', 'full': 'Veneto'}
        ], 'zip_generator': generate_plausible_zip},
        'Spain': {'country_code': 'ES', 'states': [
            {'abbr': 'AN', 'full': 'Andalusia'}, {'abbr': 'AR', 'full': 'Aragon'}, {'abbr': 'AS', 'full': 'Asturias'},
            {'abbr': 'CB', 'full': 'Cantabria'}, {'abbr': 'CM', 'full': 'Castile-La Mancha'}, {'abbr': 'CL', 'full': 'Castile and León'},
            {'abbr': 'CT', 'full': 'Catalonia'}, {'abbr': 'EX', 'full': 'Extremadura'}, {'abbr': 'GA', 'full': 'Galicia'},
            {'abbr': 'IB', 'full': 'Balearic Islands'}, {'abbr': 'RI', 'full': 'La Rioja'}, {'abbr': 'MD', 'full': 'Madrid'},
            {'abbr': 'MC', 'full': 'Murcia'}, {'abbr': 'NC', 'full': 'Navarre'}, {'abbr': 'PV', 'full': 'Basque Country'},
            {'abbr': 'VC', 'full': 'Valencian Community'}, {'abbr': 'CE', 'full': 'Ceuta'}, {'abbr': 'ML', 'full': 'Melilla'},
            {'abbr': 'CN', 'full': 'Canary Islands'}
        ], 'zip_generator': generate_plausible_zip},
        'Belgium': {'country_code': 'BE', 'states': [
            {'abbr': 'BRU', 'full': 'Brussels-Capital Region'}, {'abbr': 'WAL', 'full': 'Wallonia'}, {'abbr': 'VLG', 'full': 'Flanders'}
        ], 'zip_generator': generate_plausible_zip},
        'Austria': {'country_code': 'AT', 'states': [
            {'abbr': 'BGL', 'full': 'Burgenland'}, {'abbr': 'KNT', 'full': 'Carinthia'}, {'abbr': 'NOE', 'full': 'Lower Austria'},
            {'abbr': 'OÖ', 'full': 'Upper Austria'}, {'abbr': 'SAL', 'full': 'Salzburg'}, {'abbr': 'STE', 'full': 'Styria'},
            {'abbr': 'TIR', 'full': 'Tyrol'}, {'abbr': 'VOE', 'full': 'Vorarlberg'}, {'abbr': 'WIE', 'full': 'Vienna'}
        ], 'zip_generator': generate_plausible_zip},
        'Denmark': {'country_code': 'DK', 'states': [
            {'abbr': 'H', 'full': 'Capital Region of Denmark'}, {'abbr': 'M', 'full': 'Central Denmark Region'},
            {'abbr': 'S', 'full': 'Region of Southern Denmark'}, {'abbr': 'ND', 'full': 'North Denmark Region'},
            {'abbr': 'SJ', 'full': 'Zealand Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Finland': {'country_code': 'FI', 'states': [
            {'abbr': 'ES', 'full': 'Southern Ostrobothnia'}, {'abbr': 'LS', 'full': 'South Karelia'}, {'abbr': 'OL', 'full': 'North Ostrobothnia'},
            {'abbr': 'VA', 'full': 'Vaasa'}, {'abbr': 'AS', 'full': 'Åland Islands'}, {'abbr': 'HA', 'full': 'Central Ostrobothnia'},
            {'abbr': 'KA', 'full': 'Kainuu'}, {'abbr': 'KE', 'full': 'Central Finland'}, {'abbr': 'KM', 'full': 'Kymenlaakso'},
            {'abbr': 'KU', 'full': 'Pirkanmaa'}, {'abbr': 'LA', 'full': 'Lapland'}, {'abbr': 'PH', 'full': 'North Karelia'},
            {'abbr': 'PS', 'full': 'Northern Savonia'}, {'abbr': 'PV', 'full': 'South Karelia'}, {'abbr': 'SA', 'full': 'Satakunta'},
            {'abbr': 'SS', 'full': 'Southern Savonia'}, {'abbr': 'TA', 'full': 'Tavastia Proper'}, {'abbr': 'US', 'full': 'Uusimaa'}
        ], 'zip_generator': generate_plausible_zip},
        'Greece': {'country_code': 'GR', 'states': [
            {'abbr': 'AT', 'full': 'Attica'}, {'abbr': 'MK', 'full': 'Central Macedonia'}, {'abbr': 'CR', 'full': 'Crete'},
            {'abbr': 'EM', 'full': 'East Macedonia and Thrace'}, {'abbr': 'EP', 'full': 'Epirus'},
            {'abbr': 'IO', 'full': 'Ionian Islands'}, {'abbr': 'NA', 'full': 'North Aegean'},
            {'abbr': 'PC', 'full': 'Peloponnese'}, {'abbr': 'SM', 'full': 'South Aegean'},
            {'abbr': 'TH', 'full': 'Thessaly'}, {'abbr': 'WM', 'full': 'West Macedonia'},
            {'abbr': 'WG', 'full': 'West Greece'}
        ], 'zip_generator': generate_plausible_zip},
        'Iceland': {'country_code': 'IS', 'states': [
            {'abbr': 'HO', 'full': 'Capital Region'}, {'abbr': 'NV', 'full': 'South Region'}, {'abbr': 'SU', 'full': 'Westfjords'},
            {'abbr': 'V', 'full': 'Western Region'}, {'abbr': 'VE', 'full': 'Southern Peninsula'},
            {'abbr': 'AU', 'full': 'East Region'}, {'abbr': 'NO', 'full': 'Northeastern Region'},
            {'abbr': 'W', 'full': 'Northwestern Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Ireland': {'country_code': 'IE', 'states': [
            {'abbr': 'CW', 'full': 'Carlow'}, {'abbr': 'CN', 'full': 'Cavan'}, {'abbr': 'CE', 'full': 'Clare'},
            {'abbr': 'CO', 'full': 'Cork'}, {'abbr': 'DL', 'full': 'Donegal'}, {'abbr': 'D', 'full': 'Dublin'},
            {'abbr': 'G', 'full': 'Galway'}, {'abbr': 'KY', 'full': 'Kerry'}, {'abbr': 'KE', 'full': 'Kildare'},
            {'abbr': 'KK', 'full': 'Kilkenny'}, {'abbr': 'LS', 'full': 'Laois'}, {'abbr': 'LM', 'full': 'Leitrim'},
            {'abbr': 'L', 'full': 'Limerick'}, {'abbr': 'LD', 'full': 'Longford'}, {'abbr': 'LH', 'full': 'Louth'},
            {'abbr': 'MO', 'full': 'Mayo'}, {'abbr': 'MH', 'full': 'Meath'}, {'abbr': 'MN', 'full': 'Monaghan'},
            {'abbr': 'OY', 'full': 'Offaly'}, {'abbr': 'RN', 'full': 'Roscommon'}, {'abbr': 'SO', 'full': 'Sligo'},
            {'abbr': 'TA', 'full': 'Tipperary'}, {'abbr': 'W', 'full': 'Waterford'}, {'abbr': 'WM', 'full': 'Westmeath'},
            {'abbr': 'WX', 'full': 'Wexford'}, {'abbr': 'WW', 'full': 'Wicklow'}
        ], 'zip_generator': generate_plausible_zip},
        'Israel': {'country_code': 'IL', 'states': [
            {'abbr': 'HA', 'full': 'Haifa District'}, {'abbr': 'JM', 'full': 'Jerusalem District'},
            {'abbr': 'NO', 'full': 'Northern District'}, {'abbr': 'CE', 'full': 'Central District'},
            {'abbr': 'TA', 'full': 'Tel Aviv District'}, {'abbr': 'SO', 'full': 'Southern District'}
        ], 'zip_generator': generate_plausible_zip},
        'Luxembourg': {'country_code': 'LU', 'states': [
            {'abbr': 'DS', 'full': 'Diekirch District'}, {'abbr': 'LUX', 'full': 'Luxembourg District'},
            {'abbr': 'GR', 'full': 'Grevenmacher District'}
        ], 'zip_generator': generate_plausible_zip},
        'Monaco': {'country_code': 'MC', 'states': [{'abbr': 'MCO', 'full': 'Monaco'}], 'zip_generator': generate_plausible_zip},
        'Poland': {'country_code': 'PL', 'states': [
            {'abbr': 'DS', 'full': 'Lower Silesian Voivodeship'}, {'abbr': 'KP', 'full': 'Kuyavian-Pomeranian Voivodeship'},
            {'abbr': 'LD', 'full': 'Łódź Voivodeship'}, {'abbr': 'LU', 'full': 'Lublin Voivodeship'},
            {'abbr': 'LB', 'full': 'Lubusz Voivodeship'}, {'abbr': 'MA', 'full': 'Lesser Poland Voivodeship'},
            {'abbr': 'MZ', 'full': 'Masovian Voivodeship'}, {'abbr': 'OP', 'full': 'Opole Voivodeship'},
            {'abbr': 'PK', 'full': 'Subcarpathian Voivodeship'}, {'abbr': 'PD', 'full': 'Podlaskie Voivodeship'},
            {'abbr': 'PM', 'full': 'Pomeranian Voivodeship'}, {'abbr': 'SL', 'full': 'Silesian Voivodeship'},
            {'abbr': 'SW', 'full': 'Świętokrzyskie Voivodeship'}, {'abbr': 'WN', 'full': 'Warmian-Masurian Voivodeship'},
            {'abbr': 'WP', 'full': 'Greater Poland Voivodeship'}, {'abbr': 'ZP', 'full': 'West Pomeranian Voivodeship'}
        ], 'zip_generator': generate_plausible_zip},
        'Portugal': {'country_code': 'PT', 'states': [
            {'abbr': 'AV', 'full': 'Aveiro'}, {'abbr': 'BA', 'full': 'Beja'}, {'abbr': 'BRG', 'full': 'Braga'},
            {'abbr': 'BRC', 'full': 'Bragança'}, {'abbr': 'CB', 'full': 'Castelo Branco'}, {'abbr': 'CO', 'full': 'Coimbra'},
            {'abbr': 'EV', 'full': 'Évora'}, {'abbr': 'FA', 'full': 'Faro'}, {'abbr': 'GU', 'full': 'Guarda'},
            {'abbr': 'LE', 'full': 'Leiria'}, {'abbr': 'LI', 'full': 'Lisbon'}, {'abbr': 'PO', 'full': 'Portalegre'},
            {'abbr': 'PT', 'full': 'Porto'}, {'abbr': 'SA', 'full': 'Santarém'}, {'abbr': 'ST', 'full': 'Setúbal'},
            {'abbr': 'VC', 'full': 'Viana do Castelo'}, {'abbr': 'VL', 'full': 'Vila Real'}, {'abbr': 'VI', 'full': 'Viseu'},
            {'abbr': 'AZO', 'full': 'Azores'}, {'abbr': 'MADE', 'full': 'Madeira'}
        ], 'zip_generator': generate_plausible_zip},
        'Slovenia': {'country_code': 'SI', 'states': [
            {'abbr': 'LJ', 'full': 'Ljubljana'}, {'abbr': 'MB', 'full': 'Maribor'}, {'abbr': 'KR', 'full': 'Kranj'},
            {'abbr': 'CE', 'full': 'Celje'}, {'abbr': 'KPR', 'full': 'Koper'}, {'abbr': 'NGR', 'full': 'Nova Gorica'},
            {'abbr': 'MS', 'full': 'Murska Sobota'}, {'abbr': 'NM', 'full': 'Novo mesto'}, {'abbr': 'PO', 'full': 'Postojna'}
        ], 'zip_generator': generate_plausible_zip},
        'Slovakia': {'country_code': 'SK', 'states': [
            {'abbr': 'BA', 'full': 'Bratislava Region'}, {'abbr': 'TT', 'full': 'Trnava Region'},
            {'abbr': 'NR', 'full': 'Nitra Region'}, {'abbr': 'TN', 'full': 'Trenčín Region'},
            {'abbr': 'ZA', 'full': 'Žilina Region'}, {'abbr': 'BC', 'full': 'Banská Bystrica Region'},
            {'abbr': 'PO', 'full': 'Prešov Region'}, {'abbr': 'KE', 'full': 'Košice Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Czech Republic': {'country_code': 'CZ', 'states': [
            {'abbr': 'PR', 'full': 'Prague'}, {'abbr': 'ST', 'full': 'Central Bohemian Region'},
            {'abbr': 'JC', 'full': 'South Bohemian Region'}, {'abbr': 'PL', 'full': 'Plzeň Region'},
            {'abbr': 'KA', 'full': 'Karlovy Vary Region'}, {'abbr': 'US', 'full': 'Ústí nad Labem Region'},
            {'abbr': 'LI', 'full': 'Liberec Region'}, {'abbr': 'KR', 'full': 'Hradec Králové Region'},
            {'abbr': 'PA', 'full': 'Pardubice Region'}, {'abbr': 'OL', 'full': 'Olomouc Region'},
            {'abbr': 'ZL', 'full': 'Zlín Region'}, {'abbr': 'JM', 'full': 'South Moravian Region'},
            {'abbr': 'MO', 'full': 'Moravian-Silesian Region'}, {'abbr': 'VY', 'full': 'Vysočina Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Hungary': {'country_code': 'HU', 'states': [
            {'abbr': 'BU', 'full': 'Budapest'}, {'abbr': 'BA', 'full': 'Baranya County'}, {'abbr': 'BCS', 'full': 'Bács-Kiskun County'},
            {'abbr': 'BE', 'full': 'Békés County'}, {'abbr': 'BZ', 'full': 'Borsod-Abaúj-Zemplén County'},
            {'abbr': 'CS', 'full': 'Csongrád-Csanád County'}, {'abbr': 'FE', 'full': 'Fejér County'},
            {'abbr': 'GS', 'full': 'Győr-Moson-Sopron County'}, {'abbr': 'HB', 'full': 'Hajdú-Bihar County'},
            {'abbr': 'HE', 'full': 'Heves County'}, {'abbr': 'JN', 'full': 'Jász-Nagykun-Szolnok County'},
            {'abbr': 'KO', 'full': 'Komárom-Esztergom County'}, {'abbr': 'NO', 'full': 'Nógrád County'},
            {'abbr': 'PE', 'full': 'Pest County'}, {'abbr': 'SO', 'full': 'Somogy County'}, {'abbr': 'SZ', 'full': 'Szabolcs-Szatmár-Bereg County'},
            {'abbr': 'TO', 'full': 'Tolna County'}, {'abbr': 'VA', 'full': 'Vas County'}, {'abbr': 'VE', 'full': 'Veszprém County'},
            {'abbr': 'ZA', 'full': 'Zala County'}
        ], 'zip_generator': generate_plausible_zip},
    },
    'Asia Pacific': {
        'China': {'country_code': 'CN', 'states': [
            {'abbr': 'AH', 'full': 'Anhui'}, {'abbr': 'BJ', 'full': 'Beijing'}, {'abbr': 'CQ', 'full': 'Chongqing'},
            {'abbr': 'FJ', 'full': 'Fujian'}, {'abbr': 'GS', 'full': 'Gansu'}, {'abbr': 'GD', 'full': 'Guangdong'},
            {'abbr': 'GX', 'full': 'Guangxi'}, {'abbr': 'GZ', 'full': 'Guizhou'}, {'abbr': 'HA', 'full': 'Hainan'},
            {'abbr': 'HEB', 'full': 'Hebei'}, {'abbr': 'HLJ', 'full': 'Heilongjiang'}, {'abbr': 'HEN', 'full': 'Henan'},
            {'abbr': 'HK', 'full': 'Hong Kong'}, {'abbr': 'HUB', 'full': 'Hubei'}, {'abbr': 'HUN', 'full': 'Hunan'},
            {'abbr': 'NM', 'full': 'Inner Mongolia'}, {'abbr': 'JS', 'full': 'Jiangsu'}, {'abbr': 'JX', 'full': 'Jiangxi'},
            {'abbr': 'JL', 'full': 'Jilin'}, {'abbr': 'LN', 'full': 'Liaoning'}, {'abbr': 'MC', 'full': 'Macao'},
            {'abbr': 'NX', 'full': 'Ningxia'}, {'abbr': 'QH', 'full': 'Qinghai'}, {'abbr': 'SN', 'full': 'Shaanxi'},
            {'abbr': 'SD', 'full': 'Shandong'}, {'abbr': 'SH', 'full': 'Shanghai'}, {'abbr': 'SX', 'full': 'Shanxi'},
            {'abbr': 'SC', 'full': 'Sichuan'}, {'abbr': 'TJ', 'full': 'Tianjin'}, {'abbr': 'XJ', 'full': 'Xinjiang'},
            {'abbr': 'XZ', 'full': 'Tibet'}, {'abbr': 'YN', 'full': 'Yunnan'}, {'abbr': 'ZJ', 'full': 'Zhejiang'}
        ], 'zip_generator': generate_plausible_zip},
        'Taiwan': {'country_code': 'TW', 'states': [
            {'abbr': 'TP', 'full': 'Taipei City'}, {'abbr': 'KS', 'full': 'Kaohsiung City'},
            {'abbr': 'TC', 'full': 'Taichung City'}, {'abbr': 'TN', 'full': 'Tainan City'},
            {'abbr': 'TY', 'full': 'Taoyuan City'}, {'abbr': 'HSZ', 'full': 'Hsinchu City'},
            {'abbr': 'CY', 'full': 'Chiayi City'}, {'abbr': 'KE', 'full': 'Keelung City'},
            {'abbr': 'TWP', 'full': 'Taiwan Province'}, {'abbr': 'FUK', 'full': 'Fukien Province (Kinmen & Lienchiang)'}
        ], 'zip_generator': generate_plausible_zip},
        'South Korea': {'country_code': 'KR', 'states': [
            {'abbr': 'SO', 'full': 'Seoul'}, {'abbr': 'BU', 'full': 'Busan'}, {'abbr': 'DAE', 'full': 'Daegu'},
            {'abbr': 'IN', 'full': 'Incheon'}, {'abbr': 'GJ', 'full': 'Gwangju'}, {'abbr': 'DJ', 'full': 'Daejeon'},
            {'abbr': 'UL', 'full': 'Ulsan'}, {'abbr': 'SEJ', 'full': 'Sejong City'}, {'abbr': 'GG', 'full': 'Gyeonggi Province'},
            {'abbr': 'GW', 'full': 'Gangwon Province'}, {'abbr': 'CB', 'full': 'North Chungcheong Province'},
            {'abbr': 'CN', 'full': 'South Chungcheong Province'}, {'abbr': 'JB', 'full': 'North Jeolla Province'},
            {'abbr': 'JN', 'full': 'South Jeolla Province'}, {'abbr': 'GB', 'full': 'North Gyeongsang Province'},
            {'abbr': 'GN', 'full': 'South Gyeongsang Province'}, {'abbr': 'JJ', 'full': 'Jeju Province'}
        ], 'zip_generator': generate_plausible_zip},
        'Japan': {'country_code': 'JP', 'states': [
            {'abbr': 'HK', 'full': 'Hokkaido'}, {'abbr': 'AO', 'full': 'Aomori'}, {'abbr': 'IW', 'full': 'Iwate'},
            {'abbr': 'MI', 'full': 'Miyagi'}, {'abbr': 'AK', 'full': 'Akita'}, {'abbr': 'YA', 'full': 'Yamagata'},
            {'abbr': 'FU', 'full': 'Fukushima'}, {'abbr': 'IB', 'full': 'Ibaraki'}, {'abbr': 'TC', 'full': 'Tochigi'},
            {'abbr': 'GU', 'full': 'Gunma'}, {'abbr': 'SA', 'full': 'Saitama'}, {'abbr': 'CH', 'full': 'Chiba'},
            {'abbr': 'TO', 'full': 'Tokyo'}, {'abbr': 'KA', 'full': 'Kanagawa'}, {'abbr': 'NI', 'full': 'Niigata'},
            {'abbr': 'TOY', 'full': 'Toyama'}, {'abbr': 'IS', 'full': 'Ishikawa'}, {'abbr': 'FU_2', 'full': 'Fukui'},
            {'abbr': 'YA_2', 'full': 'Yamanashi'}, {'abbr': 'NA', 'full': 'Nagano'}, {'abbr': 'GI', 'full': 'Gifu'},
            {'abbr': 'SH', 'full': 'Shizuoka'}, {'abbr': 'AI', 'full': 'Aichi'}, {'abbr': 'MI_2', 'full': 'Mie'},
            {'abbr': 'SH_2', 'full': 'Shiga'}, {'abbr': 'KY', 'full': 'Kyoto'}, {'abbr': 'OS', 'full': 'Osaka'},
            {'abbr': 'HY', 'full': 'Hyogo'}, {'abbr': 'NA_2', 'full': 'Nara'}, {'abbr': 'WA', 'full': 'Wakayama'},
            {'abbr': 'TO_2', 'full': 'Tottori'}, {'abbr': 'SH_3', 'full': 'Shimane'}, {'abbr': 'OK', 'full': 'Okayama'},
            {'abbr': 'HI', 'full': 'Hiroshima'}, {'abbr': 'YA_3', 'full': 'Yamaguchi'}, {'abbr': 'TO_3', 'full': 'Tokushima'},
            {'abbr': 'KA_2', 'full': 'Kagawa'}, {'abbr': 'EH', 'full': 'Ehime'}, {'abbr': 'KO', 'full': 'Kochi'},
            {'abbr': 'FU_3', 'full': 'Fukuoka'}, {'abbr': 'SA_2', 'full': 'Saga'}, {'abbr': 'NA_3', 'full': 'Nagasaki'},
            {'abbr': 'KU', 'full': 'Kumamoto'}, {'abbr': 'OI', 'full': 'Oita'}, {'abbr': 'MI_3', 'full': 'Miyazaki'},
            {'abbr': 'KA_3', 'full': 'Kagoshima'}, {'abbr': 'OKI', 'full': 'Okinawa'}
        ], 'zip_generator': generate_plausible_zip},
        'Australia': {'country_code': 'AU', 'states': [
            {'abbr': 'NSW', 'full': 'New South Wales'}, {'abbr': 'VIC', 'full': 'Victoria'},
            {'abbr': 'QLD', 'full': 'Queensland'}, {'abbr': 'SA', 'full': 'South Australia'},
            {'abbr': 'WA', 'full': 'Western Australia'}, {'abbr': 'TAS', 'full': 'Tasmania'},
            {'abbr': 'ACT', 'full': 'Australian Capital Territory'}, {'abbr': 'NT', 'full': 'Northern Territory'}
        ], 'zip_generator': generate_plausible_zip},
        'New Zealand': {'country_code': 'NZ', 'states': [
            {'abbr': 'AUK', 'full': 'Auckland'}, {'abbr': 'BOP', 'full': 'Bay of Plenty'}, {'abbr': 'CAN', 'full': 'Canterbury'},
            {'abbr': 'GIS', 'full': 'Gisborne'}, {'abbr': 'HKB', 'full': 'Hawke\'s Bay'}, {'abbr': 'MWT', 'full': 'Manawatū-Whanganui'},
            {'abbr': 'MBH', 'full': 'Marlborough'}, {'abbr': 'NSN', 'full': 'Nelson'}, {'abbr': 'NTL', 'full': 'Northland'},
            {'abbr': 'OTA', 'full': 'Otago'}, {'abbr': 'STL', 'full': 'Southland'}, {'abbr': 'TKI', 'full': 'Taranaki'},
            {'abbr': 'TAS', 'full': 'Tasman'}, {'abbr': 'WAI', 'full': 'Waikato'}, {'abbr': 'WLG', 'full': 'Wellington'},
            {'abbr': 'WTC', 'full': 'West Coast'}
        ], 'zip_generator': generate_plausible_zip},
        'Thailand': {'country_code': 'TH', 'states': [
            {'abbr': 'BKK', 'full': 'Bangkok'}, {'abbr': 'CM', 'full': 'Chiang Mai'}, {'abbr': 'CN', 'full': 'Chiang Rai'},
            {'abbr': 'TRT', 'full': 'Trat'}, {'abbr': 'PN', 'full': 'Pattani'}, {'abbr': 'PH', 'full': 'Phuket'},
            {'abbr': 'URT', 'full': 'Surat Thani'}, {'abbr': 'SN', 'full': 'Songkhla'}, {'abbr': 'UBN', 'full': 'Ubon Ratchathani'}
        ], 'zip_generator': generate_plausible_zip},
        'Singapore': {'country_code': 'SG', 'states': [{'abbr': 'SGP', 'full': 'Singapore'}], 'zip_generator': generate_plausible_zip},
        'Malaysia': {'country_code': 'MY', 'states': [
            {'abbr': 'JHR', 'full': 'Johor'}, {'abbr': 'KDH', 'full': 'Kedah'}, {'abbr': 'KLT', 'full': 'Kelantan'},
            {'abbr': 'MLK', 'full': 'Malacca'}, {'abbr': 'NSN', 'full': 'Negeri Sembilan'}, {'abbr': 'PHG', 'full': 'Pahang'},
            {'abbr': 'PRK', 'full': 'Perak'}, {'abbr': 'PLS', 'full': 'Perlis'}, {'abbr': 'PNG', 'full': 'Penang'},
            {'abbr': 'SBA', 'full': 'Sabah'}, {'abbr': 'SWK', 'full': 'Sarawak'}, {'abbr': 'SGR', 'full': 'Selangor'},
            {'abbr': 'TRG', 'full': 'Terengganu'}, {'abbr': 'KUL', 'full': 'Kuala Lumpur'}, {'abbr': 'LBN', 'full': 'Labuan'},
            {'abbr': 'PUT', 'full': 'Putrajaya'}
        ], 'zip_generator': generate_plausible_zip}
    }
}


# --------------------------
# Generates the dimension tables
# --------------------------

def generate_dim_product():
    """生成 Dim_Product 维度表"""
    # 原始车型数据
    products_data = [
        {'Model_ID': 1, 'Model_Name': 'Model S', 'Standard_Price_USD': 74990, 'Launch_Date': '2012-06-01', 'Description': 'Luxury electric sedan'},
        {'Model_ID': 2, 'Model_Name': 'Model 3', 'Standard_Price_USD': 38990, 'Launch_Date': '2017-07-28', 'Description': 'Affordable electric sedan'},
        {'Model_ID': 3, 'Model_Name': 'Model X', 'Standard_Price_USD': 79990, 'Launch_Date': '2015-09-29', 'Description': 'Luxury electric SUV with Falcon Wing doors'},
        {'Model_ID': 4, 'Model_Name': 'Model Y', 'Standard_Price_USD': 43990, 'Launch_Date': '2020-03-13', 'Description': 'Compact electric SUV'},
        {'Model_ID': 5, 'Model_Name': 'Cybertruck', 'Standard_Price_USD': 60990, 'Launch_Date': '2023-11-30', 'Description': 'Futuristic electric pickup truck'}
    ]
    dim_product_df = pd.DataFrame(products_data)

    # 1. ADDED: Add 'Model_Category' column based on 'Model_Name'
    def get_category(model_name):
        if 'Model S' in model_name or 'Model 3' in model_name:
            return 'Sedan'
        elif 'Model X' in model_name or 'Model Y' in model_name:
            return 'SUV'
        elif 'Cybertruck' in model_name:
            return 'Truck'
        else:
            return 'Other'

    dim_product_df['Model_Category'] = dim_product_df['Model_Name'].apply(get_category)
    
    # 确保列顺序，以便在 Power BI 中更容易管理
    dim_product_df = dim_product_df[['Model_ID', 'Model_Name', 'Model_Category', 'Standard_Price_USD', 'Launch_Date', 'Description']]

    return dim_product_df


def generate_dim_time(start_date, end_date):
    """
    生成 Dim_Time 维度表
    """
    # 将日期范围转换为日期序列
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    dim_time_df = pd.DataFrame({'Date': date_range})

    # 2. ADDED: Add 'Time_ID' column (as integer YYYYMMDD)
    dim_time_df['Time_ID'] = dim_time_df['Date'].dt.strftime('%Y%m%d').astype(int)

    # 添加其他时间属性列
    dim_time_df['Full_Date'] = dim_time_df['Date'].dt.strftime('%Y-%m-%d')
    dim_time_df['Year'] = dim_time_df['Date'].dt.year
    dim_time_df['Quarter'] = dim_time_df['Date'].dt.quarter
    dim_time_df['Month'] = dim_time_df['Date'].dt.month
    dim_time_df['Day_of_Month'] = dim_time_df['Date'].dt.day
    dim_time_df['Day_of_Week'] = dim_time_df['Date'].dt.dayofweek + 1 # Monday=1, Sunday=7
    dim_time_df['Week_of_Year'] = dim_time_df['Date'].dt.isocalendar().week.astype(int)
    dim_time_df['Day_Name'] = dim_time_df['Date'].dt.day_name()
    dim_time_df['Month_Name'] = dim_time_df['Date'].dt.month_name()
    dim_time_df['Quarter_Name'] = 'Q' + dim_time_df['Quarter'].astype(str)
    
    # 确保列顺序
    dim_time_df = dim_time_df[['Time_ID', 'Date', 'Full_Date', 'Year', 'Quarter', 'Month', 'Day_of_Month', 'Day_of_Week', 'Week_of_Year', 'Day_Name', 'Month_Name', 'Quarter_Name']]

    return dim_time_df


def generate_dim_customer(num_customers):
    """
    生成 Dim_Customer 维度表
    """
    first_names = ['Liam', 'Olivia', 'Noah', 'Emma', 'Oliver', 'Charlotte', 'Elijah', 'Amelia', 'James', 'Ava', 'William', 'Sophia']
    last_names = ['Smith', 'Jones', 'Williams', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson']

    customers = []
    for i in range(1, num_customers + 1):
        customer_id = i
        # 3. ADDED: Add 'Customer_Name'
        customer_name = f"{random.choice(first_names)} {random.choice(last_names)}"
        customers.append({
            'Customer_ID': customer_id,
            'Customer_Name': customer_name,
            'Loyalty_Status': random.choice(['Bronze', 'Silver', 'Gold', 'Platinum'])
        })
    dim_customer_df = pd.DataFrame(customers)
    return dim_customer_df


def generate_sales_and_geography(num_sales, start_date, end_date):
    """
    生成 Fact_Sales 和 Dim_Geography 表
    """
    sales_data = []
    
    # 获取所有地理位置的列表
    all_locations = []
    for region, countries in tesla_countries.items():
        for country, data in countries.items():
            for state in data['states']:
                all_locations.append({
                    'Country': country,
                    'Country_Code': data['country_code'],
                    'State_Province_Abbr': state['abbr'],
                    'State_Province_Full': state['full'],
                    'Zip_Code': data['zip_generator'](country, state['abbr'])
                })

    # 为确保每个州都有数据，先为每个州生成一条销售记录
    for i, loc in enumerate(all_locations):
        sale_date = start_date + datetime.timedelta(days=random.randint(0, (end_date - start_date).days))
        sales_data.append({
            'Sale_Date': sale_date,
            'Product_ID': random.randint(1, 5),
            'Customer_ID': random.randint(1, 200000),
            'Quantity': 1,
            'Sale_Price_USD': 0, # Placeholder, will be calculated later
            'Country': loc['Country'],
            'State_Province': loc['State_Province_Full'],
            'Zip_Code': loc['Zip_Code']
        })

    # 生成剩余的销售数据
    for _ in range(num_sales - len(all_locations)):
        loc = random.choice(all_locations)
        sale_date = start_date + datetime.timedelta(days=random.randint(0, (end_date - start_date).days))
        sales_data.append({
            'Sale_Date': sale_date,
            'Product_ID': random.randint(1, 5),
            'Customer_ID': random.randint(1, 200000),
            'Quantity': random.choice([1, 1, 1, 1, 2]),
            'Sale_Price_USD': 0,
            'Country': loc['Country'],
            'State_Province': loc['State_Province_Full'],
            'Zip_Code': loc['Zip_Code']
        })

    fact_sales_df = pd.DataFrame(sales_data)
    
    # ADDED: 将 'Sale_Date' 列转换为日期时间类型以启用 `.dt` 属性
    fact_sales_df['Sale_Date'] = pd.to_datetime(fact_sales_df['Sale_Date'])
    
    # 生成 Dim_Geography
    dim_geography_data = []
    
    # 4. ADDED: Create a unique 'Geo_ID' for each unique location
    unique_locations = fact_sales_df[['Country', 'State_Province', 'Zip_Code']].drop_duplicates().reset_index(drop=True)
    unique_locations['Geo_ID'] = unique_locations.index + 1  # 确保ID从1开始

    # 5. ADDED: Add 'Geo_ID' to Fact_Sales table
    fact_sales_df = pd.merge(fact_sales_df, unique_locations, on=['Country', 'State_Province', 'Zip_Code'], how='left')

    # 将唯一的地理位置信息保存到 Dim_Geography
    dim_geography_df = unique_locations.rename(columns={'State_Province': 'State_Province_Full'})
    
    # 确保 Dim_Geography 有 'State_Province_Abbr' 列以便在 Power BI 中使用
    state_map = {state['full']: state['abbr'] for region, countries in tesla_countries.items() for country, data in countries.items() for state in data['states']}
    dim_geography_df['State_Province_Abbr'] = dim_geography_df['State_Province_Full'].map(state_map).fillna('')
    
    # 确保列顺序
    dim_geography_df = dim_geography_df[['Geo_ID', 'Country', 'State_Province_Full', 'State_Province_Abbr', 'Zip_Code']]
    
    # 添加 'Time_ID' 到事实表
    fact_sales_df['Time_ID'] = fact_sales_df['Sale_Date'].dt.strftime('%Y%m%d').astype(int)
    
    return fact_sales_df, dim_geography_df


# --------------------------
# Main function
# --------------------------

def main():
    print("正在生成数据表...")

    # 设置日期范围和数据行数
    start_date = datetime.date(2023, 1, 1)
    end_date = datetime.date(2025, 12, 31)
    num_sales = 1000000 # 100万行数据

    # 生成维度表和事实表
    dim_product_df = generate_dim_product()
    dim_time_df = generate_dim_time(start_date, end_date)
    dim_customer_df = generate_dim_customer(num_customers=200000)

    # 生成销售事实表和地理维度表
    fact_sales_df, dim_geography_df = generate_sales_and_geography(num_sales, start_date, end_date)

    # 计算销售价格
    dim_product_map = dim_product_df.set_index('Model_ID')['Standard_Price_USD'].to_dict()
    
    # 确保 fact_sales_df['Product_ID'] 的数据类型是可哈希的，以便进行映射
    fact_sales_df['Sale_Price_USD'] = fact_sales_df['Product_ID'].map(dim_product_map)
    fact_sales_df['Sale_Price_USD'] = fact_sales_df['Sale_Price_USD'] * fact_sales_df['Quantity'] * (1 - np.random.rand(len(fact_sales_df)) * 0.1)
    fact_sales_df['Sale_Price_USD'] = fact_sales_df['Sale_Price_USD'].apply(lambda x: math.ceil(x / 100) * 100) # 模拟价格调整

    # 添加外键
    fact_sales_df = pd.merge(fact_sales_df, dim_customer_df[['Customer_ID', 'Customer_Name']], on='Customer_ID', how='left')

    # 选择 Fact_Sales 的最终列
    fact_sales_df = fact_sales_df[['Sale_Date', 'Time_ID', 'Product_ID', 'Customer_ID', 'Customer_Name', 'Geo_ID', 'Quantity', 'Sale_Price_USD']]


    # 保存到CSV
    dim_product_df.to_csv('Dim_Product.csv', index=False, encoding='utf-8')
    dim_time_df.to_csv('Dim_Time.csv', index=False, encoding='utf-8')
    dim_customer_df.to_csv('Dim_Customer.csv', index=False, encoding='utf-8')
    dim_geography_df.to_csv('Dim_Geography.csv', index=False, encoding='utf-8')
    fact_sales_df.to_csv('Fact_Sales.csv', index=False, encoding='utf-8')

    print("数据生成完成，文件已保存到 Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, and Fact_Sales.csv")


if __name__ == "__main__":
    main()

正在生成数据表...
数据生成完成，文件已保存到 Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, and Fact_Sales.csv


**第二版GPU加速代码**

In [6]:
# -*- coding: utf-8 -*-
"""Tesla Simulated Sales Data Generator

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1qSg43jfMBtM4DZY_bMze_D93QjNTqAZo
"""

# tesla_sales_data_generator_star_schema.py
# 完整的星型模式数据生成脚本
# 目的：为每个国家的所有州/省/直辖市/特别行政区生成 Tesla 销售记录
# - 确保每个州至少有一条记录（避免地图空白）
# - 优化新西兰邮编生成，尽量使用现实可用的四位格式
# - 生成完整的星型模式：Fact_Sales, Dim_Product, Dim_Time, Dim_Geography, Dim_Prices, Dim_Customer
# 使用：python tesla_sales_data_generator_star_schema.py

import pandas as pd
import numpy as np
import random
import datetime
import math
import os
import copy # 新增：用于深拷贝字典

# 固定随机种子以便可复现（需要不同随机结果可注释掉）
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# --------------------------
# Helper: 生成合理格式的邮编/邮政编码（尽量贴近各国常见格式）
# --------------------------
def generate_plausible_zip(country, state_province_abbr):
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    digits = '0123456789'

    # 逐国家处理（覆盖大多数脚本中使用到的国家）
    if country == 'United States':
        # 5-digit US ZIP code
        return ''.join(random.choices(digits, k=5))
    if country == 'Canada':
        # A1A 1A1 format
        return ''.join(random.choices(letters, k=1)) + ''.join(random.choices(digits, k=1)) + ''.join(random.choices(letters, k=1)) + ' ' + \
               ''.join(random.choices(digits, k=1)) + ''.join(random.choices(letters, k=1)) + ''.join(random.choices(digits, k=1))
    if country == 'Germany':
        # 5-digit German postal code
        return ''.join(random.choices(digits, k=5))
    if country == 'United Kingdom':
        # Variable length UK postal code (e.g., SW1A 0AA)
        return ''.join(random.choices(letters, k=1)) + ''.join(random.choices(letters + digits, k=1)) + ''.join(random.choices(digits, k=1)) + ' ' + \
               ''.join(random.choices(digits, k=1)) + ''.join(random.choices(letters, k=2))
    if country == 'Norway':
        # 4-digit Norwegian postal code
        return ''.join(random.choices(digits, k=4))
    if country == 'China':
        # 6-digit Chinese postal code
        return ''.join(random.choices(digits, k=6))
    if country == 'Japan':
        # 7-digit Japanese postal code (e.g., 100-0001)
        return ''.join(random.choices(digits, k=3)) + '-' + ''.join(random.choices(digits, k=4))
    if country == 'Australia':
        # 4-digit Australian postal code
        return ''.join(random.choices(digits, k=4))
    if country == 'Mexico':
        # 5-digit Mexican postal code
        return ''.join(random.choices(digits, k=5))
    if country == 'New Zealand':
        # 4-digit New Zealand postal code (e.g., 1010)
        return ''.join(random.choices(digits, k=4))
    
    # fallback to a simple format for other countries
    return ''.join(random.choices(digits, k=5))

# --------------------------
# 数据集配置
# --------------------------

# ADDED: Add 'sales_weight' to each country and state/province to guide sales distribution.
# Weights are relative and based on a mix of market size, economic level, and population.
tesla_countries = {
    'North America': {
        'United States': {'country_code': 'US', 'sales_weight': 0.45, 'states': [
            {'abbr': 'AL', 'full': 'Alabama', 'sales_weight': 0.005}, {'abbr': 'AK', 'full': 'Alaska', 'sales_weight': 0.001},
            {'abbr': 'AZ', 'full': 'Arizona', 'sales_weight': 0.02}, {'abbr': 'CA', 'full': 'California', 'sales_weight': 0.18}, # High weight
            {'abbr': 'TX', 'full': 'Texas', 'sales_weight': 0.12}, # High weight
            {'abbr': 'FL', 'full': 'Florida', 'sales_weight': 0.07}, # High weight
            {'abbr': 'NY', 'full': 'New York', 'sales_weight': 0.05}, {'abbr': 'IL', 'full': 'Illinois', 'sales_weight': 0.03},
            # ... (rest of the US states with relative weights)
        ], 'zip_generator': generate_plausible_zip},
        'Canada': {'country_code': 'CA', 'sales_weight': 0.10, 'states': [
            {'abbr': 'ON', 'full': 'Ontario', 'sales_weight': 0.4}, # High weight for Ontario
            {'abbr': 'QC', 'full': 'Quebec', 'sales_weight': 0.25},
            {'abbr': 'BC', 'full': 'British Columbia', 'sales_weight': 0.2},
            # ... (rest of the Canadian provinces)
        ], 'zip_generator': generate_plausible_zip},
        'Mexico': {'country_code': 'MX', 'sales_weight': 0.02, 'states': [
            {'abbr': 'CDMX', 'full': 'Mexico City', 'sales_weight': 0.2},
            # ... (rest of the Mexican states)
        ], 'zip_generator': generate_plausible_zip}
    },
    'Europe': {
        'Germany': {'country_code': 'DE', 'sales_weight': 0.12, 'states': [
            {'abbr': 'BY', 'full': 'Bavaria', 'sales_weight': 0.15}, {'abbr': 'BW', 'full': 'Baden-Württemberg', 'sales_weight': 0.12},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'United Kingdom': {'country_code': 'GB', 'sales_weight': 0.08, 'states': [
            {'abbr': 'ENG', 'full': 'England', 'sales_weight': 0.8},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'Norway': {'country_code': 'NO', 'sales_weight': 0.05, 'states': [
            {'abbr': 'OS', 'full': 'Oslo', 'sales_weight': 0.3},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        # ... (rest of the European countries with relative weights)
    },
    'Asia Pacific': {
        'China': {'country_code': 'CN', 'sales_weight': 0.20, 'states': [
            {'abbr': 'SH', 'full': 'Shanghai', 'sales_weight': 0.15}, {'abbr': 'BJ', 'full': 'Beijing', 'sales_weight': 0.1},
            {'abbr': 'GD', 'full': 'Guangdong', 'sales_weight': 0.18},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'Japan': {'country_code': 'JP', 'sales_weight': 0.05, 'states': [
            {'abbr': 'TO', 'full': 'Tokyo', 'sales_weight': 0.2},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'Australia': {'country_code': 'AU', 'sales_weight': 0.03, 'states': [
            {'abbr': 'NSW', 'full': 'New South Wales', 'sales_weight': 0.4},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        # ... (rest of the APAC countries with relative weights)
    }
}

# --------------------------
# Helper: Generates the dimension tables
# --------------------------
def generate_dim_product():
    products_data = [
        {'Model_ID': 1, 'Model_Name': 'Model S', 'Standard_Price_USD': 74990, 'Launch_Date': '2012-06-01', 'Description': 'Luxury electric sedan'},
        {'Model_ID': 2, 'Model_Name': 'Model 3', 'Standard_Price_USD': 38990, 'Launch_Date': '2017-07-28', 'Description': 'Affordable electric sedan'},
        {'Model_ID': 3, 'Model_Name': 'Model X', 'Standard_Price_USD': 79990, 'Launch_Date': '2015-09-29', 'Description': 'Luxury electric SUV'},
        {'Model_ID': 4, 'Model_Name': 'Model Y', 'Standard_Price_USD': 43990, 'Launch_Date': '2020-03-13', 'Description': 'Compact electric SUV'},
        {'Model_ID': 5, 'Model_Name': 'Cybertruck', 'Standard_Price_USD': 60990, 'Launch_Date': '2023-11-30', 'Description': 'Futuristic electric pickup truck'}
    ]
    dim_product_df = pd.DataFrame(products_data)
    
    # ADDED: Add the required 'Model_Base_Price_USD' column
    dim_product_df['Model_Base_Price_USD'] = dim_product_df['Standard_Price_USD']
    # ADDED: Rename 'Launch_Date' to 'Model_Launch_Date'
    dim_product_df.rename(columns={'Launch_Date': 'Model_Launch_Date'}, inplace=True)

    # ADDED: Categorical assignment
    model_cat = np.empty(len(dim_product_df), dtype=object)
    model_cat[dim_product_df['Model_Name'].str.contains('Model S|Model 3').values] = 'Sedan'
    model_cat[dim_product_df['Model_Name'].str.contains('Model X|Model Y').values] = 'SUV'
    model_cat[dim_product_df['Model_Name'].str.contains('Cybertruck').values] = 'Truck'
    dim_product_df['Model_Category'] = model_cat

    return dim_product_df

def generate_dim_time():
    start_date = datetime.date(2023, 1, 1)
    end_date = datetime.date(2025, 12, 31)
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    dim_time_df = pd.DataFrame({'Date': date_range})

    dim_time_df['Time_ID'] = dim_time_df['Date'].dt.strftime('%Y%m%d').astype('int32')
    dim_time_df['Full_Date'] = dim_time_df['Date'].dt.strftime('%Y-%m-%d')
    dim_time_df['Year'] = dim_time_df['Date'].dt.year
    dim_time_df['Quarter'] = dim_time_df['Date'].dt.quarter
    dim_time_df['Month'] = dim_time_df['Date'].dt.month
    dim_time_df['Day'] = dim_time_df['Date'].dt.day
    dim_time_df['Day_of_Month'] = dim_time_df['Date'].dt.day
    dim_time_df['Day_of_Week'] = dim_time_df['Date'].dt.dayofweek + 1
    dim_time_df['Week_of_Year'] = dim_time_df['Date'].dt.isocalendar().week.astype('int32')
    dim_time_df['Day_Name'] = dim_time_df['Date'].dt.day_name()
    dim_time_df['Month_Name'] = dim_time_df['Date'].dt.month_name()
    dim_time_df['Quarter_Name'] = 'Q' + dim_time_df['Quarter'].astype(str)
    
    return dim_time_df

def generate_dim_customer():
    num_customers = 200000
    first_names = ['Liam', 'Olivia', 'Noah', 'Emma', 'Oliver', 'Charlotte', 'Elijah', 'Amelia', 'James', 'Ava', 'William', 'Sophia']
    last_names = ['Smith', 'Jones', 'Williams', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson']

    customer_ids = np.arange(1, num_customers + 1)
    
    # Vectorized generation of names and loyalty status
    first_name_cp = np.random.choice(first_names, size=num_customers)
    last_name_cp = np.random.choice(last_names, size=num_customers)
    
    # ADDED: Add the required 'Gender' column
    gender_list = ['Male', 'Female', 'Other']
    gender_cp = np.random.choice(gender_list, size=num_customers, p=[0.49, 0.49, 0.02])
    
    # ADDED: Add the required 'Age_Group' column
    age_groups = ['<25', '25-34', '35-44', '45-54', '55+']
    age_group_cp = np.random.choice(age_groups, size=num_customers, p=[0.2, 0.3, 0.25, 0.15, 0.1])
    
    # ADDED: Add the required 'Income_Level' column
    income_levels = ['Low', 'Medium', 'High']
    income_level_cp = np.random.choice(income_levels, size=num_customers, p=[0.2, 0.5, 0.3])


    dim_customer_df = pd.DataFrame({
        'Customer_ID': customer_ids,
        'Customer_Name': first_name_cp + ' ' + last_name_cp,
        'Gender': gender_cp,
        'Age_Group': age_group_cp,
        'Income_Level': income_level_cp
    })
    return dim_customer_df


def generate_sales_and_geography(dim_product_df, dim_time_df, dim_customer_df):
    """Generates Fact_Sales and Dim_Geography tables with weighted distribution."""
    
    # ADDED: A flat list of all locations to build the Dim_Geography table, including Zip_Code
    all_locations_list = []
    geo_id_counter = 1
    for region, countries in tesla_countries.items():
        for country, data in countries.items():
            for state in data['states']:
                # ADDED: Generate a plausible zip code for each unique location
                zip_code = data['zip_generator'](country, state['abbr'])
                all_locations_list.append({
                    'Geo_ID': geo_id_counter,
                    'Continent': region,
                    'Country': country,
                    'State_Province_Full': state['full'],
                    'State_Province_Abbr': state['abbr'],
                    'Country_Code': data['country_code'],
                    'Zip_Code': zip_code, # ADDED: Add Zip_Code
                    'sales_weight': data['sales_weight'] * state.get('sales_weight', 1.0)
                })
                geo_id_counter += 1

    geo_locations_df = pd.DataFrame(all_locations_list)
    total_weight = geo_locations_df['sales_weight'].sum()
    geo_locations_df['sales_prob'] = geo_locations_df['sales_weight'] / total_weight

    # Highly efficient vectorized weighted random choice using numpy
    num_sales = 5000000
    sales_indices = np.random.choice(geo_locations_df.index, size=num_sales, p=geo_locations_df['sales_prob'].values)
    
    # FIXED: Ensured that 'Geo_ID' is included from the start, preventing the KeyError.
    fact_sales_df = geo_locations_df.loc[sales_indices, ['Geo_ID', 'Country', 'State_Province_Full', 'State_Province_Abbr']].copy().reset_index(drop=True)
    
    # Generate other facts in a vectorized manner
    date_range_days = (dim_time_df['Date'].max() - dim_time_df['Date'].min()).days
    fact_sales_df['Sale_Date'] = dim_time_df['Date'].min() + pd.to_timedelta(np.random.randint(0, date_range_days, size=num_sales), unit='D')
    
    fact_sales_df['Model_ID'] = np.random.randint(1, len(dim_product_df) + 1, size=num_sales).astype('int32')
    fact_sales_df['Customer_ID'] = np.random.randint(1, len(dim_customer_df) + 1, size=num_sales).astype('int32')
    
    # Quantity distribution (mostly 1, with some 2)
    quantity_dist = np.array([1, 1, 1, 1, 2], dtype='int8')
    fact_sales_df['Sales_Units'] = np.random.choice(quantity_dist, size=num_sales) # RENAMED: Quantity to Sales_Units
    
    # ADDED: Generate Is_Discounted_Sale flag (e.g., 20% of sales are discounted)
    fact_sales_df['Is_Discounted_Sale'] = np.random.choice([True, False], size=num_sales, p=[0.2, 0.8])
    
    # Add Time_ID to fact_sales_df
    fact_sales_df['Time_ID'] = fact_sales_df['Sale_Date'].dt.strftime('%Y%m%d').astype('int32')

    # Generate Dim_Geography table with required columns
    # RENAMED: 'State_Province_Full' to 'State_Province'
    dim_geography_df = geo_locations_df[['Geo_ID', 'Continent', 'Country', 'Country_Code', 'State_Province_Full', 'State_Province_Abbr', 'Zip_Code']].drop_duplicates().reset_index(drop=True)
    dim_geography_df.rename(columns={'State_Province_Full': 'State_Province'}, inplace=True)

    return fact_sales_df, dim_geography_df


# 主函数，执行所有生成任务并保存文件
def main():
    print("正在生成数据表...")
    start_time = datetime.datetime.now()

    dim_product_df = generate_dim_product()
    dim_time_df = generate_dim_time()
    dim_customer_df = generate_dim_customer()

    fact_sales_df, dim_geography_df = generate_sales_and_geography(dim_product_df, dim_time_df, dim_customer_df)

    # 计算销售价格和收入
    # ADDED: Use discounted price logic
    dim_product_map = dim_product_df.set_index('Model_ID')['Model_Base_Price_USD'].to_dict()
    
    fact_sales_df['Base_Price'] = fact_sales_df['Model_ID'].map(dim_product_map)
    
    # Calculate revenue based on discount flag
    discount_factor = 0.8 # 20% discount
    fact_sales_df['Revenue_USD'] = np.where(
        fact_sales_df['Is_Discounted_Sale'],
        fact_sales_df['Base_Price'] * fact_sales_df['Sales_Units'] * discount_factor,
        fact_sales_df['Base_Price'] * fact_sales_df['Sales_Units']
    )
    
    # 选择 Fact_Sales 的最终列并严格排序
    fact_sales_cols = ['Time_ID', 'Geo_ID', 'Model_ID', 'Customer_ID', 'Sales_Units', 'Is_Discounted_Sale', 'Revenue_USD']
    fact_sales_df = fact_sales_df[fact_sales_cols]

    # Dim_Time
    dim_time_cols = ['Time_ID', 'Full_Date', 'Year', 'Quarter', 'Month', 'Day', 'Week_of_Year', 'Day_of_Week', 'Day_Name']
    dim_time_df = dim_time_df[dim_time_cols]
    
    # Dim_Product
    dim_product_cols = ['Model_ID', 'Model_Name', 'Model_Category', 'Model_Base_Price_USD', 'Model_Launch_Date']
    dim_product_df = dim_product_df[dim_product_cols]

    # Dim_Customer
    dim_customer_cols = ['Customer_ID', 'Customer_Name', 'Gender', 'Age_Group', 'Income_Level']
    dim_customer_df = dim_customer_df[dim_customer_cols]

    # Dim_Geography
    dim_geography_cols = ['Geo_ID', 'Continent', 'Country', 'Country_Code', 'State_Province', 'State_Province_Abbr', 'Zip_Code']
    dim_geography_df = dim_geography_df[dim_geography_cols]


    # 保存到CSV
    print("正在保存文件...")
    dim_product_df.to_csv('Dim_Product.csv', index=False, encoding='utf-8')
    dim_time_df.to_csv('Dim_Time.csv', index=False, encoding='utf-8')
    dim_geography_df.to_csv('Dim_Geography.csv', index=False, encoding='utf-8')
    dim_customer_df.to_csv('Dim_Customer.csv', index=False, encoding='utf-8')
    fact_sales_df.to_csv('Fact_Sales.csv', index=False, encoding='utf-8')

    end_time = datetime.datetime.now()
    duration = end_time - start_time
    print(f"数据生成和保存完成，用时 {duration.total_seconds():.2f} 秒。")
    print("已保存文件: Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, Fact_Sales.csv")


if __name__ == "__main__":
    main()

正在生成数据表...
正在保存文件...
数据生成和保存完成，用时 160.02 秒。
已保存文件: Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, Fact_Sales.csv


**CPU优化代码**

In [8]:
# -*- coding: utf-8 -*-
"""Tesla Simulated Sales Data Generator

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1qSg43jfMBtM4DZY_bMze_D93QjNTqAZo
"""

# tesla_sales_data_generator_star_schema.py
# 完整的星型模式数据生成脚本
# 目的：为每个国家的所有州/省/直辖市/特别行政区生成 Tesla 销售记录
# - 确保每个州至少有一条记录（避免地图空白）
# - 优化新西兰邮编生成，尽量使用现实可用的四位格式
# - 生成完整的星型模式：Fact_Sales, Dim_Product, Dim_Time, Dim_Geography, Dim_Prices, Dim_Customer
# 使用：python tesla_sales_data_generator_star_schema.py

import pandas as pd
import numpy as np
import random
import datetime
import math
import os
import copy # 新增：用于深拷贝字典

# 固定随机种子以便可复现（需要不同随机结果可注释掉）
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# --------------------------
# Helper: 生成合理格式的邮编/邮政编码（尽量贴近各国常见格式）
# --------------------------
def generate_plausible_zip(country, state_province_abbr):
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    digits = '0123456789'

    # 逐国家处理（覆盖大多数脚本中使用到的国家）
    if country == 'United States':
        # 5-digit zip (做一些基于州缩写简单偏好)
        if state_province_abbr.startswith(('C', 'I')):
            return f"9{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith(('T', 'L')):
            return f"7{random.randint(5,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('F'):
            return f"3{random.randint(2,4)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('N'):
            return f"1{random.randint(0,4)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('W'):
            return f"98{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('A'):
            return f"85{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('G'):
            return f"30{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('P'):
            return f"15{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        return f"{random.randint(10000, 99999)}"

    elif country == 'Canada':
        # Canadian format A1A 1A1 (模拟)
        province_codes = {
            'ON': ['K','L','M','N','P'], 'QC': ['G','H','J'], 'BC': ['V'], 'AB': ['T'],
            'SK': ['S'], 'MB': ['R'], 'NB': ['E'], 'NS': ['B'], 'NL': ['A'], 'PE': ['C'],
            'YT': ['Y'], 'NT': ['X'], 'NU': ['X']
        }
        first_letter = random.choice(province_codes.get(state_province_abbr, ['A']))
        return f"{first_letter}{random.choice(digits)}{random.choice(letters)} {random.choice(digits)}{random.choice(letters)}{random.choice(digits)}"

    elif country == 'Mexico':
        return f"{random.randint(1000, 99999):05d}"

    elif country == 'United Kingdom':
        # 简化模拟英国邮编
        outward = f"{random.choice(letters)}{random.choice(letters)}{random.randint(1,9)}"
        inward = f"{random.choice(digits)}{random.choice(letters)}{random.choice(letters)}"
        return f"{outward} {inward}"

    elif country in ['China', 'Taiwan']:
        if country == 'China':
            # 6位邮编
            if state_province_abbr.startswith('B'):
                return f"10{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            if state_province_abbr.startswith('S'):
                return f"20{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            if state_province_abbr.startswith('G'):
                return f"51{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            return f"{random.randint(10000, 99999)}"
        else:
            # 台湾较常见的3位或3位+2的格式
            return f"{random.randint(100, 999)}"

    elif country == 'Germany':
        return f"{random.randint(10000, 99999)}"

    elif country == 'Japan':
        return f"{random.randint(100, 999)}-{random.randint(1000, 9999)}"

    elif country == 'Australia':
        return f"{random.randint(1000, 9999)}"

    elif country == 'New Zealand':
        # 更贴近新西兰的 4 位邮编格式 — 使用按州/区的近似区间（便于 Shape Map 匹配）
        # 说明：这些区间为常见/广义区间，足以覆盖可视化配对。可按需替换成精准映射表。
        nz_ranges = {
            'AKL': (600, 2699),   # Auckland (广义区间)
            'NTL': (100, 1099),   # Northland
            'WKO': (3200, 3799),  # Waikato
            'BOP': (3000, 3199),  # Bay of Plenty
            'GIS': (4010, 4199),  # Gisborne / East
            'HKB': (4100, 4299),  # Hawke's Bay
            'MWT': (4400, 4699),  # Manawatū-Whanganui
            'MBH': (7200, 7299),  # Marlborough
            'NSN': (7010, 7099),  # Nelson
            'OTA': (9000, 9799),  # Otago
            'STL': (9800, 9899),  # Southland
            'TKI': (4300, 4399),  # Taranaki
            'TAS': (7100, 7199),  # Tasman
            'WKO': (3200, 3799),  # Waikato (重复键安全)
            'WLG': (5010, 5799),  # Wellington region (广义)
            'WTC': (7800, 7999),  # West Coast
            'CAN': (7000, 7999),  # Canterbury (包含 Christchurch)
        }
        rng = nz_ranges.get(state_province_abbr)
        if rng:
            low, high = rng
            # 填充为 4 位字符串
            val = random.randint(low, high)
            return f"{val:04d}"
        else:
            return f"{random.randint(1000, 99999):05d}"

    elif country in ['France', 'Italy']:
        return f"{random.randint(10000, 99999)}"
    elif country == 'Spain':
        return f"{random.randint(10000, 52999)}"
    elif country == 'South Korea':
        return f"{random.randint(10000, 99999)}"
    elif country == 'Thailand':
        return f"{random.randint(10000, 99999)}"
    else:
        # 默认 5 位模拟
        return f"{random.randint(10000, 99999)}"


# --------------------------
# 定义国家/省/州 字典 (新增 'sales_weight' 字段用于分配销售额)
# --------------------------
tesla_countries = {
    'North America': {
        'United States': {'country_code': 'US', 'states': [
            {'abbr': 'CA', 'full': 'California', 'sales_weight': 10},
            {'abbr': 'TX', 'full': 'Texas', 'sales_weight': 9},
            {'abbr': 'FL', 'full': 'Florida', 'sales_weight': 8},
            {'abbr': 'NY', 'full': 'New York', 'sales_weight': 7},
            {'abbr': 'WA', 'full': 'Washington', 'sales_weight': 6},
            {'abbr': 'IL', 'full': 'Illinois', 'sales_weight': 5},
            {'abbr': 'MA', 'full': 'Massachusetts', 'sales_weight': 5},
            {'abbr': 'NJ', 'full': 'New Jersey', 'sales_weight': 5},
            {'abbr': 'NC', 'full': 'North Carolina', 'sales_weight': 4},
            {'abbr': 'GA', 'full': 'Georgia', 'sales_weight': 4},
            {'abbr': 'PA', 'full': 'Pennsylvania', 'sales_weight': 4},
            {'abbr': 'CO', 'full': 'Colorado', 'sales_weight': 4},
            {'abbr': 'AZ', 'full': 'Arizona', 'sales_weight': 4},
            {'abbr': 'OH', 'full': 'Ohio', 'sales_weight': 3},
            {'abbr': 'MI', 'full': 'Michigan', 'sales_weight': 3},
            {'abbr': 'VA', 'full': 'Virginia', 'sales_weight': 3},
            {'abbr': 'MD', 'full': 'Maryland', 'sales_weight': 3},
            {'abbr': 'OR', 'full': 'Oregon', 'sales_weight': 3},
            {'abbr': 'NV', 'full': 'Nevada', 'sales_weight': 2},
            {'abbr': 'MN', 'full': 'Minnesota', 'sales_weight': 2},
            {'abbr': 'UT', 'full': 'Utah', 'sales_weight': 2},
            {'abbr': 'DC', 'full': 'District of Columbia', 'sales_weight': 2},
            {'abbr': 'AL', 'full': 'Alabama', 'sales_weight': 1},
            {'abbr': 'AK', 'full': 'Alaska', 'sales_weight': 1},
            {'abbr': 'AR', 'full': 'Arkansas', 'sales_weight': 1},
            {'abbr': 'CT', 'full': 'Connecticut', 'sales_weight': 1},
            {'abbr': 'DE', 'full': 'Delaware', 'sales_weight': 1},
            {'abbr': 'HI', 'full': 'Hawaii', 'sales_weight': 1},
            {'abbr': 'ID', 'full': 'Idaho', 'sales_weight': 1},
            {'abbr': 'IN', 'full': 'Indiana', 'sales_weight': 1},
            {'abbr': 'IA', 'full': 'Iowa', 'sales_weight': 1},
            {'abbr': 'KS', 'full': 'Kansas', 'sales_weight': 1},
            {'abbr': 'KY', 'full': 'Kentucky', 'sales_weight': 1},
            {'abbr': 'LA', 'full': 'Louisiana', 'sales_weight': 1},
            {'abbr': 'ME', 'full': 'Maine', 'sales_weight': 1},
            {'abbr': 'MS', 'full': 'Mississippi', 'sales_weight': 1},
            {'abbr': 'MO', 'full': 'Missouri', 'sales_weight': 1},
            {'abbr': 'MT', 'full': 'Montana', 'sales_weight': 1},
            {'abbr': 'NE', 'full': 'Nebraska', 'sales_weight': 1},
            {'abbr': 'NH', 'full': 'New Hampshire', 'sales_weight': 1},
            {'abbr': 'NM', 'full': 'New Mexico', 'sales_weight': 1},
            {'abbr': 'ND', 'full': 'North Dakota', 'sales_weight': 1},
            {'abbr': 'OK', 'full': 'Oklahoma', 'sales_weight': 1},
            {'abbr': 'RI', 'full': 'Rhode Island', 'sales_weight': 1},
            {'abbr': 'SC', 'full': 'South Carolina', 'sales_weight': 1},
            {'abbr': 'SD', 'full': 'South Dakota', 'sales_weight': 1},
            {'abbr': 'TN', 'full': 'Tennessee', 'sales_weight': 1},
            {'abbr': 'VT', 'full': 'Vermont', 'sales_weight': 1},
            {'abbr': 'WV', 'full': 'West Virginia', 'sales_weight': 1},
            {'abbr': 'WI', 'full': 'Wisconsin', 'sales_weight': 1},
            {'abbr': 'WY', 'full': 'Wyoming', 'sales_weight': 1},
        ], 'zip_generator': generate_plausible_zip},
        'Canada': {'country_code': 'CA', 'states': [
            {'abbr': 'ON', 'full': 'Ontario', 'sales_weight': 8},
            {'abbr': 'QC', 'full': 'Quebec', 'sales_weight': 6},
            {'abbr': 'BC', 'full': 'British Columbia', 'sales_weight': 5},
            {'abbr': 'AB', 'full': 'Alberta', 'sales_weight': 4},
            {'abbr': 'MB', 'full': 'Manitoba', 'sales_weight': 2},
            {'abbr': 'SK', 'full': 'Saskatchewan', 'sales_weight': 1},
            {'abbr': 'NB', 'full': 'New Brunswick', 'sales_weight': 1},
            {'abbr': 'NL', 'full': 'Newfoundland and Labrador', 'sales_weight': 1},
            {'abbr': 'NS', 'full': 'Nova Scotia', 'sales_weight': 1},
            {'abbr': 'PE', 'full': 'Prince Edward Island', 'sales_weight': 1},
            {'abbr': 'NT', 'full': 'Northwest Territories', 'sales_weight': 1},
            {'abbr': 'NU', 'full': 'Nunavut', 'sales_weight': 1},
            {'abbr': 'YT', 'full': 'Yukon', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Mexico': {'country_code': 'MX', 'states': [
            {'abbr': 'CDMX', 'full': 'Mexico City', 'sales_weight': 10},
            {'abbr': 'MEX', 'full': 'Mexico State', 'sales_weight': 7},
            {'abbr': 'JAL', 'full': 'Jalisco', 'sales_weight': 6},
            {'abbr': 'NLE', 'full': 'Nuevo León', 'sales_weight': 5},
            {'abbr': 'PUE', 'full': 'Puebla', 'sales_weight': 4},
            {'abbr': 'BC', 'full': 'Baja California', 'sales_weight': 3},
            {'abbr': 'VER', 'full': 'Veracruz', 'sales_weight': 3},
            {'abbr': 'MICH', 'full': 'Michoacán', 'sales_weight': 2},
            {'abbr': 'CHIS', 'full': 'Chiapas', 'sales_weight': 2},
            {'abbr': 'QR', 'full': 'Quintana Roo', 'sales_weight': 2},
            {'abbr': 'GTO', 'full': 'Guanajuato', 'sales_weight': 1},
            {'abbr': 'GRO', 'full': 'Guerrero', 'sales_weight': 1},
            {'abbr': 'BCS', 'full': 'Baja California Sur', 'sales_weight': 1},
            {'abbr': 'CHIH', 'full': 'Chihuahua', 'sales_weight': 1},
            {'abbr': 'SIN', 'full': 'Sinaloa', 'sales_weight': 1},
            {'abbr': 'SON', 'full': 'Sonora', 'sales_weight': 1},
            {'abbr': 'YUC', 'full': 'Yucatán', 'sales_weight': 1},
            {'abbr': 'AGS', 'full': 'Aguascalientes', 'sales_weight': 1},
            {'abbr': 'CAMP', 'full': 'Campeche', 'sales_weight': 1},
            {'abbr': 'COAH', 'full': 'Coahuila', 'sales_weight': 1},
            {'abbr': 'COL', 'full': 'Colima', 'sales_weight': 1},
            {'abbr': 'DUR', 'full': 'Durango', 'sales_weight': 1},
            {'abbr': 'HGO', 'full': 'Hidalgo', 'sales_weight': 1},
            {'abbr': 'MOR', 'full': 'Morelos', 'sales_weight': 1},
            {'abbr': 'NAY', 'full': 'Nayarit', 'sales_weight': 1},
            {'abbr': 'OAX', 'full': 'Oaxaca', 'sales_weight': 1},
            {'abbr': 'QRO', 'full': 'Querétaro', 'sales_weight': 1},
            {'abbr': 'SLP', 'full': 'San Luis Potosí', 'sales_weight': 1},
            {'abbr': 'TAB', 'full': 'Tabasco', 'sales_weight': 1},
            {'abbr': 'TAM', 'full': 'Tamaulipas', 'sales_weight': 1},
            {'abbr': 'TLAX', 'full': 'Tlaxcala', 'sales_weight': 1},
            {'abbr': 'ZAC', 'full': 'Zacatecas', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip}
    },
    'Europe': {
        'Germany': {'country_code': 'DE', 'states': [
            {'abbr': 'NW', 'full': 'North Rhine-Westphalia', 'sales_weight': 8},
            {'abbr': 'BY', 'full': 'Bavaria', 'sales_weight': 7},
            {'abbr': 'BW', 'full': 'Baden-Württemberg', 'sales_weight': 6},
            {'abbr': 'HE', 'full': 'Hesse', 'sales_weight': 5},
            {'abbr': 'NI', 'full': 'Lower Saxony', 'sales_weight': 4},
            {'abbr': 'BE', 'full': 'Berlin', 'sales_weight': 3},
            {'abbr': 'HH', 'full': 'Hamburg', 'sales_weight': 3},
            {'abbr': 'SL', 'full': 'Saarland', 'sales_weight': 2},
            {'abbr': 'HB', 'full': 'Bremen', 'sales_weight': 2},
            {'abbr': 'RP', 'full': 'Rhineland-Palatinate', 'sales_weight': 2},
            {'abbr': 'SH', 'full': 'Schleswig-Holstein', 'sales_weight': 2},
            {'abbr': 'SN', 'full': 'Saxony', 'sales_weight': 1},
            {'abbr': 'TH', 'full': 'Thuringia', 'sales_weight': 1},
            {'abbr': 'BB', 'full': 'Brandenburg', 'sales_weight': 1},
            {'abbr': 'MV', 'full': 'Mecklenburg-Vorpommern', 'sales_weight': 1},
            {'abbr': 'ST', 'full': 'Saxony-Anhalt', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'United Kingdom': {'country_code': 'GB', 'states': [
            {'abbr': 'ENG', 'full': 'England', 'sales_weight': 10},
            {'abbr': 'SCT', 'full': 'Scotland', 'sales_weight': 4},
            {'abbr': 'WLS', 'full': 'Wales', 'sales_weight': 2},
            {'abbr': 'NIR', 'full': 'Northern Ireland', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Norway': {'country_code': 'NO', 'states': [
            {'abbr': 'VL', 'full': 'Viken', 'sales_weight': 5},
            {'abbr': 'OS', 'full': 'Oslo', 'sales_weight': 4},
            {'abbr': 'TR', 'full': 'Trøndelag', 'sales_weight': 3},
            {'abbr': 'VEST', 'full': 'Vestland', 'sales_weight': 3},
            {'abbr': 'RO', 'full': 'Rogaland', 'sales_weight': 2},
            {'abbr': 'MR', 'full': 'Møre og Romsdal', 'sales_weight': 2},
            {'abbr': 'INN', 'full': 'Innlandet', 'sales_weight': 2},
            {'abbr': 'TROM', 'full': 'Troms og Finnmark', 'sales_weight': 1},
            {'abbr': 'VESTF', 'full': 'Vestfold og Telemark', 'sales_weight': 1},
            {'abbr': 'AGD', 'full': 'Agder', 'sales_weight': 1},
            {'abbr': 'NORDL', 'full': 'Nordland', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'France': {'country_code': 'FR', 'states': [
            {'abbr': 'IDF', 'full': 'Île-de-France', 'sales_weight': 10},
            {'abbr': 'NAQ', 'full': 'Nouvelle-Aquitaine', 'sales_weight': 5},
            {'abbr': 'ARA', 'full': 'Auvergne-Rhône-Alpes', 'sales_weight': 4},
            {'abbr': 'PACA', 'full': 'Provence-Alpes-Côte d\'Azur', 'sales_weight': 4},
            {'abbr': 'OCC', 'full': 'Occitanie', 'sales_weight': 3},
            {'abbr': 'HDF', 'full': 'Hauts-de-France', 'sales_weight': 3},
            {'abbr': 'GES', 'full': 'Grand Est', 'sales_weight': 2},
            {'abbr': 'NOR', 'full': 'Normandy', 'sales_weight': 2},
            {'abbr': 'BRE', 'full': 'Brittany', 'sales_weight': 2},
            {'abbr': 'PDL', 'full': 'Pays de la Loire', 'sales_weight': 2},
            {'abbr': 'BFC', 'full': 'Bourgogne-Franche-Comté', 'sales_weight': 1},
            {'abbr': 'CVL', 'full': 'Centre-Val de Loire', 'sales_weight': 1},
            {'abbr': 'COR', 'full': 'Corsica', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Netherlands': {'country_code': 'NL', 'states': [
            {'abbr': 'NH', 'full': 'North Holland', 'sales_weight': 7},
            {'abbr': 'ZH', 'full': 'South Holland', 'sales_weight': 6},
            {'abbr': 'UT', 'full': 'Utrecht', 'sales_weight': 4},
            {'abbr': 'GE', 'full': 'Gelderland', 'sales_weight': 3},
            {'abbr': 'NB', 'full': 'North Brabant', 'sales_weight': 3},
            {'abbr': 'GR', 'full': 'Groningen', 'sales_weight': 2},
            {'abbr': 'OV', 'full': 'Overijssel', 'sales_weight': 2},
            {'abbr': 'FR', 'full': 'Friesland', 'sales_weight': 1},
            {'abbr': 'FL', 'full': 'Flevoland', 'sales_weight': 1},
            {'abbr': 'LB', 'full': 'Limburg', 'sales_weight': 1},
            {'abbr': 'DR', 'full': 'Drenthe', 'sales_weight': 1},
            {'abbr': 'ZE', 'full': 'Zeeland', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Sweden': {'country_code': 'SE', 'states': [
            {'abbr': 'AB', 'full': 'Stockholm County', 'sales_weight': 10},
            {'abbr': 'O', 'full': 'Västra Götaland County', 'sales_weight': 5},
            {'abbr': 'M', 'full': 'Skåne County', 'sales_weight': 4},
            {'abbr': 'D', 'full': 'Södermanland County', 'sales_weight': 2},
            {'abbr': 'E', 'full': 'Östergötland County', 'sales_weight': 2},
            {'abbr': 'U', 'full': 'Västmanland County', 'sales_weight': 2},
            {'abbr': 'I', 'full': 'Gotland County', 'sales_weight': 1},
            {'abbr': 'AC', 'full': 'Västerbotten County', 'sales_weight': 1},
            {'abbr': 'BD', 'full': 'Norrbotten County', 'sales_weight': 1},
            {'abbr': 'C', 'full': 'Uppsala County', 'sales_weight': 1},
            {'abbr': 'F', 'full': 'Jönköping County', 'sales_weight': 1},
            {'abbr': 'G', 'full': 'Kronoberg County', 'sales_weight': 1},
            {'abbr': 'H', 'full': 'Kalmar County', 'sales_weight': 1},
            {'abbr': 'K', 'full': 'Blekinge County', 'sales_weight': 1},
            {'abbr': 'N', 'full': 'Halland County', 'sales_weight': 1},
            {'abbr': 'S', 'full': 'Värmland County', 'sales_weight': 1},
            {'abbr': 'T', 'full': 'Örebro County', 'sales_weight': 1},
            {'abbr': 'W', 'full': 'Dalarna County', 'sales_weight': 1},
            {'abbr': 'X', 'full': 'Gävleborg County', 'sales_weight': 1},
            {'abbr': 'Y', 'full': 'Västernorrland County', 'sales_weight': 1},
            {'abbr': 'Z', 'full': 'Jämtland County', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Switzerland': {'country_code': 'CH', 'states': [
            {'abbr': 'ZH', 'full': 'Zurich', 'sales_weight': 6},
            {'abbr': 'GE', 'full': 'Geneva', 'sales_weight': 4},
            {'abbr': 'VD', 'full': 'Vaud', 'sales_weight': 3},
            {'abbr': 'BE', 'full': 'Bern', 'sales_weight': 3},
            {'abbr': 'BS', 'full': 'Basel-Stadt', 'sales_weight': 2},
            {'abbr': 'LU', 'full': 'Lucerne', 'sales_weight': 2},
            {'abbr': 'AG', 'full': 'Aargau', 'sales_weight': 2},
            {'abbr': 'VS', 'full': 'Valais', 'sales_weight': 2},
            {'abbr': 'ZG', 'full': 'Zug', 'sales_weight': 1},
            {'abbr': 'TI', 'full': 'Ticino', 'sales_weight': 1},
            {'abbr': 'SG', 'full': 'St. Gallen', 'sales_weight': 1},
            {'abbr': 'BL', 'full': 'Basel-Landschaft', 'sales_weight': 1},
            {'abbr': 'TG', 'full': 'Thurgau', 'sales_weight': 1},
            {'abbr': 'SO', 'full': 'Solothurn', 'sales_weight': 1},
            {'abbr': 'SZ', 'full': 'Schwyz', 'sales_weight': 1},
            {'abbr': 'AR', 'full': 'Appenzell Ausserrhoden', 'sales_weight': 1},
            {'abbr': 'AI', 'full': 'Appenzell Innerrhoden', 'sales_weight': 1},
            {'abbr': 'GL', 'full': 'Glarus', 'sales_weight': 1},
            {'abbr': 'JU', 'full': 'Jura', 'sales_weight': 1},
            {'abbr': 'NE', 'full': 'Neuchâtel', 'sales_weight': 1},
            {'abbr': 'OW', 'full': 'Obwalden', 'sales_weight': 1},
            {'abbr': 'NW', 'full': 'Nidwalden', 'sales_weight': 1},
            {'abbr': 'SH', 'full': 'Schaffhausen', 'sales_weight': 1},
            {'abbr': 'GR', 'full': 'Graubünden', 'sales_weight': 1},
            {'abbr': 'UR', 'full': 'Uri', 'sales_weight': 1},
            {'abbr': 'FR', 'full': 'Fribourg', 'sales_weight': 1},
        ], 'zip_generator': generate_plausible_zip},
        'Italy': {'country_code': 'IT', 'states': [
            {'abbr': 'LOM', 'full': 'Lombardy', 'sales_weight': 10},
            {'abbr': 'VEN', 'full': 'Veneto', 'sales_weight': 5},
            {'abbr': 'LAZ', 'full': 'Lazio', 'sales_weight': 4},
            {'abbr': 'PIE', 'full': 'Piedmont', 'sales_weight': 4},
            {'abbr': 'TOS', 'full': 'Tuscany', 'sales_weight': 3},
            {'abbr': 'EMR', 'full': 'Emilia-Romagna', 'sales_weight': 3},
            {'abbr': 'CAM', 'full': 'Campania', 'sales_weight': 2},
            {'abbr': 'SIC', 'full': 'Sicily', 'sales_weight': 2},
            {'abbr': 'PUG', 'full': 'Apulia', 'sales_weight': 2},
            {'abbr': 'FVG', 'full': 'Friuli-Venezia Giulia', 'sales_weight': 2},
            {'abbr': 'SAR', 'full': 'Sardinia', 'sales_weight': 1},
            {'abbr': 'ABR', 'full': 'Abruzzo', 'sales_weight': 1},
            {'abbr': 'BAS', 'full': 'Basilicata', 'sales_weight': 1},
            {'abbr': 'CAL', 'full': 'Calabria', 'sales_weight': 1},
            {'abbr': 'LIG', 'full': 'Liguria', 'sales_weight': 1},
            {'abbr': 'MAR', 'full': 'Marche', 'sales_weight': 1},
            {'abbr': 'MOL', 'full': 'Molise', 'sales_weight': 1},
            {'abbr': 'TAA', 'full': 'Trentino-South Tyrol', 'sales_weight': 1},
            {'abbr': 'UMB', 'full': 'Umbria', 'sales_weight': 1},
            {'abbr': 'VAO', 'full': 'Aosta Valley', 'sales_weight': 1},
        ], 'zip_generator': generate_plausible_zip},
        'Spain': {'country_code': 'ES', 'states': [
            {'abbr': 'MD', 'full': 'Madrid', 'sales_weight': 10},
            {'abbr': 'CT', 'full': 'Catalonia', 'sales_weight': 8},
            {'abbr': 'AN', 'full': 'Andalusia', 'sales_weight': 6},
            {'abbr': 'VC', 'full': 'Valencian Community', 'sales_weight': 5},
            {'abbr': 'PV', 'full': 'Basque Country', 'sales_weight': 4},
            {'abbr': 'CL', 'full': 'Castile and León', 'sales_weight': 3},
            {'abbr': 'GA', 'full': 'Galicia', 'sales_weight': 3},
            {'abbr': 'AR', 'full': 'Aragon', 'sales_weight': 2},
            {'abbr': 'IB', 'full': 'Balearic Islands', 'sales_weight': 2},
            {'abbr': 'CN', 'full': 'Canary Islands', 'sales_weight': 2},
            {'abbr': 'AS', 'full': 'Asturias', 'sales_weight': 1},
            {'abbr': 'CB', 'full': 'Cantabria', 'sales_weight': 1},
            {'abbr': 'CM', 'full': 'Castile-La Mancha', 'sales_weight': 1},
            {'abbr': 'EX', 'full': 'Extremadura', 'sales_weight': 1},
            {'abbr': 'RI', 'full': 'La Rioja', 'sales_weight': 1},
            {'abbr': 'MC', 'full': 'Murcia', 'sales_weight': 1},
            {'abbr': 'NC', 'full': 'Navarre', 'sales_weight': 1},
            {'abbr': 'CE', 'full': 'Ceuta', 'sales_weight': 1},
            {'abbr': 'ML', 'full': 'Melilla', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Belgium': {'country_code': 'BE', 'states': [
            {'abbr': 'VLG', 'full': 'Flanders', 'sales_weight': 7},
            {'abbr': 'WAL', 'full': 'Wallonia', 'sales_weight': 4},
            {'abbr': 'BRU', 'full': 'Brussels-Capital Region', 'sales_weight': 2}
        ], 'zip_generator': generate_plausible_zip},
        'Austria': {'country_code': 'AT', 'states': [
            {'abbr': 'WIE', 'full': 'Vienna', 'sales_weight': 5},
            {'abbr': 'OÖ', 'full': 'Upper Austria', 'sales_weight': 3},
            {'abbr': 'NOE', 'full': 'Lower Austria', 'sales_weight': 2},
            {'abbr': 'TIR', 'full': 'Tyrol', 'sales_weight': 2},
            {'abbr': 'STE', 'full': 'Styria', 'sales_weight': 2},
            {'abbr': 'SAL', 'full': 'Salzburg', 'sales_weight': 1},
            {'abbr': 'KNT', 'full': 'Carinthia', 'sales_weight': 1},
            {'abbr': 'BGL', 'full': 'Burgenland', 'sales_weight': 1},
            {'abbr': 'VOE', 'full': 'Vorarlberg', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Denmark': {'country_code': 'DK', 'states': [
            {'abbr': 'H', 'full': 'Capital Region of Denmark', 'sales_weight': 6},
            {'abbr': 'M', 'full': 'Central Denmark Region', 'sales_weight': 3},
            {'abbr': 'S', 'full': 'Region of Southern Denmark', 'sales_weight': 2},
            {'abbr': 'SJ', 'full': 'Zealand Region', 'sales_weight': 2},
            {'abbr': 'ND', 'full': 'North Denmark Region', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Finland': {'country_code': 'FI', 'states': [
            {'abbr': 'US', 'full': 'Uusimaa', 'sales_weight': 6},
            {'abbr': 'PS', 'full': 'Northern Savonia', 'sales_weight': 2},
            {'abbr': 'LA', 'full': 'Lapland', 'sales_weight': 2},
            {'abbr': 'OL', 'full': 'North Ostrobothnia', 'sales_weight': 2},
            {'abbr': 'HA', 'full': 'Central Ostrobothnia', 'sales_weight': 1},
            {'abbr': 'KA', 'full': 'Kainuu', 'sales_weight': 1},
            {'abbr': 'KE', 'full': 'Central Finland', 'sales_weight': 1},
            {'abbr': 'KM', 'full': 'Kymenlaakso', 'sales_weight': 1},
            {'abbr': 'KU', 'full': 'Pirkanmaa', 'sales_weight': 1},
            {'abbr': 'PH', 'full': 'North Karelia', 'sales_weight': 1},
            {'abbr': 'PV', 'full': 'South Karelia', 'sales_weight': 1},
            {'abbr': 'SA', 'full': 'Satakunta', 'sales_weight': 1},
            {'abbr': 'SS', 'full': 'Southern Savonia', 'sales_weight': 1},
            {'abbr': 'TA', 'full': 'Tavastia Proper', 'sales_weight': 1},
            {'abbr': 'ES', 'full': 'Southern Ostrobothnia', 'sales_weight': 1},
            {'abbr': 'VA', 'full': 'Vaasa', 'sales_weight': 1},
            {'abbr': 'AS', 'full': 'Åland Islands', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Greece': {'country_code': 'GR', 'states': [
            {'abbr': 'AT', 'full': 'Attica', 'sales_weight': 7},
            {'abbr': 'MK', 'full': 'Central Macedonia', 'sales_weight': 3},
            {'abbr': 'CR', 'full': 'Crete', 'sales_weight': 2},
            {'abbr': 'EM', 'full': 'East Macedonia and Thrace', 'sales_weight': 1},
            {'abbr': 'EP', 'full': 'Epirus', 'sales_weight': 1},
            {'abbr': 'IO', 'full': 'Ionian Islands', 'sales_weight': 1},
            {'abbr': 'NA', 'full': 'North Aegean', 'sales_weight': 1},
            {'abbr': 'PC', 'full': 'Peloponnese', 'sales_weight': 1},
            {'abbr': 'SM', 'full': 'South Aegean', 'sales_weight': 1},
            {'abbr': 'TH', 'full': 'Thessaly', 'sales_weight': 1},
            {'abbr': 'WM', 'full': 'West Macedonia', 'sales_weight': 1},
            {'abbr': 'WG', 'full': 'West Greece', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Iceland': {'country_code': 'IS', 'states': [
            {'abbr': 'HO', 'full': 'Capital Region', 'sales_weight': 10},
            {'abbr': 'NV', 'full': 'South Region', 'sales_weight': 2},
            {'abbr': 'SU', 'full': 'Westfjords', 'sales_weight': 1},
            {'abbr': 'V', 'full': 'Western Region', 'sales_weight': 1},
            {'abbr': 'VE', 'full': 'Southern Peninsula', 'sales_weight': 1},
            {'abbr': 'AU', 'full': 'East Region', 'sales_weight': 1},
            {'abbr': 'NO', 'full': 'Northeastern Region', 'sales_weight': 1},
            {'abbr': 'W', 'full': 'Northwestern Region', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Ireland': {'country_code': 'IE', 'states': [
            {'abbr': 'D', 'full': 'Dublin', 'sales_weight': 8},
            {'abbr': 'CO', 'full': 'Cork', 'sales_weight': 4},
            {'abbr': 'G', 'full': 'Galway', 'sales_weight': 3},
            {'abbr': 'L', 'full': 'Limerick', 'sales_weight': 2},
            {'abbr': 'W', 'full': 'Waterford', 'sales_weight': 2},
            {'abbr': 'DL', 'full': 'Donegal', 'sales_weight': 1},
            {'abbr': 'KY', 'full': 'Kerry', 'sales_weight': 1},
            {'abbr': 'KE', 'full': 'Kildare', 'sales_weight': 1},
            {'abbr': 'KK', 'full': 'Kilkenny', 'sales_weight': 1},
            {'abbr': 'LS', 'full': 'Laois', 'sales_weight': 1},
            {'abbr': 'LD', 'full': 'Longford', 'sales_weight': 1},
            {'abbr': 'LH', 'full': 'Louth', 'sales_weight': 1},
            {'abbr': 'MO', 'full': 'Mayo', 'sales_weight': 1},
            {'abbr': 'MH', 'full': 'Meath', 'sales_weight': 1},
            {'abbr': 'MN', 'full': 'Monaghan', 'sales_weight': 1},
            {'abbr': 'OY', 'full': 'Offaly', 'sales_weight': 1},
            {'abbr': 'RN', 'full': 'Roscommon', 'sales_weight': 1},
            {'abbr': 'SO', 'full': 'Sligo', 'sales_weight': 1},
            {'abbr': 'TA', 'full': 'Tipperary', 'sales_weight': 1},
            {'abbr': 'WM', 'full': 'Westmeath', 'sales_weight': 1},
            {'abbr': 'WX', 'full': 'Wexford', 'sales_weight': 1},
            {'abbr': 'WW', 'full': 'Wicklow', 'sales_weight': 1},
            {'abbr': 'CW', 'full': 'Carlow', 'sales_weight': 1},
            {'abbr': 'CN', 'full': 'Cavan', 'sales_weight': 1},
            {'abbr': 'CE', 'full': 'Clare', 'sales_weight': 1},
            {'abbr': 'LM', 'full': 'Leitrim', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Israel': {'country_code': 'IL', 'states': [
            {'abbr': 'TA', 'full': 'Tel Aviv District', 'sales_weight': 6},
            {'abbr': 'CE', 'full': 'Central District', 'sales_weight': 5},
            {'abbr': 'HA', 'full': 'Haifa District', 'sales_weight': 3},
            {'abbr': 'JM', 'full': 'Jerusalem District', 'sales_weight': 3},
            {'abbr': 'NO', 'full': 'Northern District', 'sales_weight': 2},
            {'abbr': 'SO', 'full': 'Southern District', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Luxembourg': {'country_code': 'LU', 'states': [
            {'abbr': 'LUX', 'full': 'Luxembourg District', 'sales_weight': 10},
            {'abbr': 'GR', 'full': 'Grevenmacher District', 'sales_weight': 2},
            {'abbr': 'DS', 'full': 'Diekirch District', 'sales_weight': 2}
        ], 'zip_generator': generate_plausible_zip},
        'Monaco': {'country_code': 'MC', 'states': [{'abbr': 'MCO', 'full': 'Monaco', 'sales_weight': 1}], 'zip_generator': generate_plausible_zip},
        'Poland': {'country_code': 'PL', 'states': [
            {'abbr': 'MZ', 'full': 'Masovian Voivodeship', 'sales_weight': 7},
            {'abbr': 'WP', 'full': 'Greater Poland Voivodeship', 'sales_weight': 4},
            {'abbr': 'LD', 'full': 'Łódź Voivodeship', 'sales_weight': 3},
            {'abbr': 'MA', 'full': 'Lesser Poland Voivodeship', 'sales_weight': 3},
            {'abbr': 'DS', 'full': 'Lower Silesian Voivodeship', 'sales_weight': 3},
            {'abbr': 'SL', 'full': 'Silesian Voivodeship', 'sales_weight': 2},
            {'abbr': 'PM', 'full': 'Pomeranian Voivodeship', 'sales_weight': 2},
            {'abbr': 'KP', 'full': 'Kuyavian-Pomeranian Voivodeship', 'sales_weight': 1},
            {'abbr': 'LU', 'full': 'Lublin Voivodeship', 'sales_weight': 1},
            {'abbr': 'LB', 'full': 'Lubusz Voivodeship', 'sales_weight': 1},
            {'abbr': 'OP', 'full': 'Opole Voivodeship', 'sales_weight': 1},
            {'abbr': 'PK', 'full': 'Subcarpathian Voivodeship', 'sales_weight': 1},
            {'abbr': 'PD', 'full': 'Podlaskie Voivodeship', 'sales_weight': 1},
            {'abbr': 'SW', 'full': 'Świętokrzyskie Voivodeship', 'sales_weight': 1},
            {'abbr': 'WN', 'full': 'Warmian-Masurian Voivodeship', 'sales_weight': 1},
            {'abbr': 'ZP', 'full': 'West Pomeranian Voivodeship', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Portugal': {'country_code': 'PT', 'states': [
            {'abbr': 'LI', 'full': 'Lisbon', 'sales_weight': 8},
            {'abbr': 'PT', 'full': 'Porto', 'sales_weight': 5},
            {'abbr': 'SE', 'full': 'Setúbal', 'sales_weight': 3},
            {'abbr': 'AV', 'full': 'Aveiro', 'sales_weight': 2},
            {'abbr': 'CO', 'full': 'Coimbra', 'sales_weight': 2},
            {'abbr': 'LE', 'full': 'Leiria', 'sales_weight': 2},
            {'abbr': 'FA', 'full': 'Faro', 'sales_weight': 2},
            {'abbr': 'BRG', 'full': 'Braga', 'sales_weight': 2},
            {'abbr': 'AZO', 'full': 'Azores', 'sales_weight': 1},
            {'abbr': 'MADE', 'full': 'Madeira', 'sales_weight': 1},
            {'abbr': 'BA', 'full': 'Beja', 'sales_weight': 1},
            {'abbr': 'BRC', 'full': 'Bragança', 'sales_weight': 1},
            {'abbr': 'CB', 'full': 'Castelo Branco', 'sales_weight': 1},
            {'abbr': 'EV', 'full': 'Évora', 'sales_weight': 1},
            {'abbr': 'GU', 'full': 'Guarda', 'sales_weight': 1},
            {'abbr': 'PO', 'full': 'Portalegre', 'sales_weight': 1},
            {'abbr': 'SA', 'full': 'Santarém', 'sales_weight': 1},
            {'abbr': 'VC', 'full': 'Viana do Castelo', 'sales_weight': 1},
            {'abbr': 'VL', 'full': 'Vila Real', 'sales_weight': 1},
            {'abbr': 'VI', 'full': 'Viseu', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Slovenia': {'country_code': 'SI', 'states': [
            {'abbr': 'LJ', 'full': 'Ljubljana', 'sales_weight': 8},
            {'abbr': 'MB', 'full': 'Maribor', 'sales_weight': 4},
            {'abbr': 'KR', 'full': 'Kranj', 'sales_weight': 2},
            {'abbr': 'CE', 'full': 'Celje', 'sales_weight': 1},
            {'abbr': 'KP', 'full': 'Koper', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Turkey': {'country_code': 'TR', 'states': [
            {'abbr': 'IST', 'full': 'Istanbul', 'sales_weight': 10},
            {'abbr': 'ANK', 'full': 'Ankara', 'sales_weight': 5},
            {'abbr': 'IZM', 'full': 'Izmir', 'sales_weight': 4},
            {'abbr': 'BUR', 'full': 'Bursa', 'sales_weight': 3},
            {'abbr': 'AD', 'full': 'Adana', 'sales_weight': 2},
            {'abbr': 'GAZ', 'full': 'Gaziantep', 'sales_weight': 2},
            {'abbr': 'KOC', 'full': 'Kocaeli', 'sales_weight': 2},
            {'abbr': 'MER', 'full': 'Mersin', 'sales_weight': 2},
            {'abbr': 'SA', 'full': 'Sakarya', 'sales_weight': 1},
            {'abbr': 'SAM', 'full': 'Samsun', 'sales_weight': 1},
            {'abbr': 'KAY', 'full': 'Kayseri', 'sales_weight': 1},
            {'abbr': 'HAT', 'full': 'Hatay', 'sales_weight': 1},
            {'abbr': 'DEN', 'full': 'Denizli', 'sales_weight': 1},
            {'abbr': 'KHO', 'full': 'Konya', 'sales_weight': 1},
            {'abbr': 'ES', 'full': 'Eskişehir', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip}
    },
    'Asia': {
        'China': {'country_code': 'CN', 'states': [
            {'abbr': 'GD', 'full': 'Guangdong', 'sales_weight': 10},
            {'abbr': 'SH', 'full': 'Shanghai', 'sales_weight': 9},
            {'abbr': 'BJ', 'full': 'Beijing', 'sales_weight': 8},
            {'abbr': 'ZJ', 'full': 'Zhejiang', 'sales_weight': 7},
            {'abbr': 'JS', 'full': 'Jiangsu', 'sales_weight': 6},
            {'abbr': 'SC', 'full': 'Sichuan', 'sales_weight': 5},
            {'abbr': 'SD', 'full': 'Shandong', 'sales_weight': 4},
            {'abbr': 'HUB', 'full': 'Hubei', 'sales_weight': 4},
            {'abbr': 'HE', 'full': 'Henan', 'sales_weight': 3},
            {'abbr': 'CQ', 'full': 'Chongqing', 'sales_weight': 3},
            {'abbr': 'FJ', 'full': 'Fujian', 'sales_weight': 3},
            {'abbr': 'AN', 'full': 'Anhui', 'sales_weight': 2},
            {'abbr': 'HUN', 'full': 'Hunan', 'sales_weight': 2},
            {'abbr': 'LN', 'full': 'Liaoning', 'sales_weight': 2},
            {'abbr': 'GZ', 'full': 'Guizhou', 'sales_weight': 1},
            {'abbr': 'GX', 'full': 'Guangxi', 'sales_weight': 1},
            {'abbr': 'HEB', 'full': 'Hebei', 'sales_weight': 1},
            {'abbr': 'HLJ', 'full': 'Heilongjiang', 'sales_weight': 1},
            {'abbr': 'JL', 'full': 'Jilin', 'sales_weight': 1},
            {'abbr': 'GS', 'full': 'Gansu', 'sales_weight': 1},
            {'abbr': 'HAIN', 'full': 'Hainan', 'sales_weight': 1},
            {'abbr': 'JX', 'full': 'Jiangxi', 'sales_weight': 1},
            {'abbr': 'NM', 'full': 'Inner Mongolia', 'sales_weight': 1},
            {'abbr': 'NX', 'full': 'Ningxia', 'sales_weight': 1},
            {'abbr': 'QH', 'full': 'Qinghai', 'sales_weight': 1},
            {'abbr': 'SNX', 'full': 'Shaanxi', 'sales_weight': 1},
            {'abbr': 'SX', 'full': 'Shanxi', 'sales_weight': 1},
            {'abbr': 'TJ', 'full': 'Tianjin', 'sales_weight': 1},
            {'abbr': 'XJ', 'full': 'Xinjiang', 'sales_weight': 1},
            {'abbr': 'XZ', 'full': 'Tibet', 'sales_weight': 1},
            {'abbr': 'YN', 'full': 'Yunnan', 'sales_weight': 1},
            {'abbr': 'HK', 'full': 'Hong Kong', 'sales_weight': 1},
            {'abbr': 'MO', 'full': 'Macau', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Hong Kong': {'country_code': 'HK', 'states': [{'abbr': 'HK', 'full': 'Hong Kong', 'sales_weight': 1}], 'zip_generator': generate_plausible_zip},
        'Macau': {'country_code': 'MO', 'states': [{'abbr': 'MO', 'full': 'Macau', 'sales_weight': 1}], 'zip_generator': generate_plausible_zip},
        'Japan': {'country_code': 'JP', 'states': [
            {'abbr': 'TYO', 'full': 'Tokyo', 'sales_weight': 10},
            {'abbr': 'OSK', 'full': 'Osaka', 'sales_weight': 7},
            {'abbr': 'KAN', 'full': 'Kanagawa', 'sales_weight': 6},
            {'abbr': 'Aichi', 'full': 'Aichi', 'sales_weight': 5},
            {'abbr': 'SIT', 'full': 'Saitama', 'sales_weight': 4},
            {'abbr': 'CHI', 'full': 'Chiba', 'sales_weight': 4},
            {'abbr': 'FUK', 'full': 'Fukuoka', 'sales_weight': 3},
            {'abbr': 'HYO', 'full': 'Hyogo', 'sales_weight': 3},
            {'abbr': 'KYO', 'full': 'Kyoto', 'sales_weight': 2},
            {'abbr': 'HKD', 'full': 'Hokkaido', 'sales_weight': 2},
            {'abbr': 'MYG', 'full': 'Miyagi', 'sales_weight': 1},
            {'abbr': 'FKS', 'full': 'Fukushima', 'sales_weight': 1},
            {'abbr': 'IBR', 'full': 'Ibaraki', 'sales_weight': 1},
            {'abbr': 'TOC', 'full': 'Tochigi', 'sales_weight': 1},
            {'abbr': 'GUN', 'full': 'Gunma', 'sales_weight': 1},
            {'abbr': 'NII', 'full': 'Niigata', 'sales_weight': 1},
            {'abbr': 'TYA', 'full': 'Toyama', 'sales_weight': 1},
            {'abbr': 'ISK', 'full': 'Ishikawa', 'sales_weight': 1},
            {'abbr': 'FUI', 'full': 'Fukui', 'sales_weight': 1},
            {'abbr': 'YMN', 'full': 'Yamanashi', 'sales_weight': 1},
            {'abbr': 'NGN', 'full': 'Nagano', 'sales_weight': 1},
            {'abbr': 'GIF', 'full': 'Gifu', 'sales_weight': 1},
            {'abbr': 'SZO', 'full': 'Shizuoka', 'sales_weight': 1},
            {'abbr': 'MIE', 'full': 'Mie', 'sales_weight': 1},
            {'abbr': 'SGA', 'full': 'Shiga', 'sales_weight': 1},
            {'abbr': 'NAR', 'full': 'Nara', 'sales_weight': 1},
            {'abbr': 'WAK', 'full': 'Wakayama', 'sales_weight': 1},
            {'abbr': 'TTI', 'full': 'Tottori', 'sales_weight': 1},
            {'abbr': 'SMN', 'full': 'Shimane', 'sales_weight': 1},
            {'abbr': 'OKY', 'full': 'Okayama', 'sales_weight': 1},
            {'abbr': 'HIR', 'full': 'Hiroshima', 'sales_weight': 1},
            {'abbr': 'YMG', 'full': 'Yamaguchi', 'sales_weight': 1},
            {'abbr': 'TOK', 'full': 'Tokushima', 'sales_weight': 1},
            {'abbr': 'KGA', 'full': 'Kagawa', 'sales_weight': 1},
            {'abbr': 'EHI', 'full': 'Ehime', 'sales_weight': 1},
            {'abbr': 'KOC', 'full': 'Kochi', 'sales_weight': 1},
            {'abbr': 'SGA', 'full': 'Saga', 'sales_weight': 1},
            {'abbr': 'NGS', 'full': 'Nagasaki', 'sales_weight': 1},
            {'abbr': 'KUM', 'full': 'Kumamoto', 'sales_weight': 1},
            {'abbr': 'OIT', 'full': 'Oita', 'sales_weight': 1},
            {'abbr': 'MYZ', 'full': 'Miyazaki', 'sales_weight': 1},
            {'abbr': 'KAG', 'full': 'Kagoshima', 'sales_weight': 1},
            {'abbr': 'OKI', 'full': 'Okinawa', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'South Korea': {'country_code': 'KR', 'states': [
            {'abbr': 'SEO', 'full': 'Seoul', 'sales_weight': 10},
            {'abbr': 'GG', 'full': 'Gyeonggi Province', 'sales_weight': 8},
            {'abbr': 'BUS', 'full': 'Busan', 'sales_weight': 5},
            {'abbr': 'ICN', 'full': 'Incheon', 'sales_weight': 4},
            {'abbr': 'DAE', 'full': 'Daegu', 'sales_weight': 3},
            {'abbr': 'GWA', 'full': 'Gwangju', 'sales_weight': 2},
            {'abbr': 'ULS', 'full': 'Ulsan', 'sales_weight': 2},
            {'abbr': 'SEJ', 'full': 'Sejong City', 'sales_weight': 1},
            {'abbr': 'GW', 'full': 'Gangwon Province', 'sales_weight': 1},
            {'abbr': 'CB', 'full': 'North Chungcheong Province', 'sales_weight': 1},
            {'abbr': 'CN', 'full': 'South Chungcheong Province', 'sales_weight': 1},
            {'abbr': 'JB', 'full': 'North Jeolla Province', 'sales_weight': 1},
            {'abbr': 'JN', 'full': 'South Jeolla Province', 'sales_weight': 1},
            {'abbr': 'GB', 'full': 'North Gyeongsang Province', 'sales_weight': 1},
            {'abbr': 'GN', 'full': 'South Gyeongsang Province', 'sales_weight': 1},
            {'abbr': 'JJ', 'full': 'Jeju Province', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Malaysia': {'country_code': 'MY', 'states': [
            {'abbr': 'KUL', 'full': 'Kuala Lumpur', 'sales_weight': 8},
            {'abbr': 'SEL', 'full': 'Selangor', 'sales_weight': 7},
            {'abbr': 'JHR', 'full': 'Johor', 'sales_weight': 4},
            {'abbr': 'PNG', 'full': 'Penang', 'sales_weight': 3},
            {'abbr': 'PHG', 'full': 'Pahang', 'sales_weight': 2},
            {'abbr': 'PRK', 'full': 'Perak', 'sales_weight': 2},
            {'abbr': 'SAB', 'full': 'Sabah', 'sales_weight': 2},
            {'abbr': 'SAR', 'full': 'Sarawak', 'sales_weight': 2},
            {'abbr': 'KDH', 'full': 'Kedah', 'sales_weight': 1},
            {'abbr': 'KTN', 'full': 'Kelantan', 'sales_weight': 1},
            {'abbr': 'MLK', 'full': 'Malacca', 'sales_weight': 1},
            {'abbr': 'NSN', 'full': 'Negeri Sembilan', 'sales_weight': 1},
            {'abbr': 'PER', 'full': 'Perlis', 'sales_weight': 1},
            {'abbr': 'TRG', 'full': 'Terengganu', 'sales_weight': 1},
            {'abbr': 'LBN', 'full': 'Labuan', 'sales_weight': 1},
            {'abbr': 'PJY', 'full': 'Putrajaya', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Philippines': {'country_code': 'PH', 'states': [
            {'abbr': 'NCR', 'full': 'Metro Manila', 'sales_weight': 10},
            {'abbr': 'IV-A', 'full': 'Calabarzon', 'sales_weight': 6},
            {'abbr': 'III', 'full': 'Central Luzon', 'sales_weight': 5},
            {'abbr': 'VI', 'full': 'Western Visayas', 'sales_weight': 3},
            {'abbr': 'VII', 'full': 'Central Visayas', 'sales_weight': 3},
            {'abbr': 'XI', 'full': 'Davao Region', 'sales_weight': 2},
            {'abbr': 'X', 'full': 'Northern Mindanao', 'sales_weight': 2},
            {'abbr': 'V', 'full': 'Bicol Region', 'sales_weight': 1},
            {'abbr': 'CAR', 'full': 'Cordillera Administrative Region', 'sales_weight': 1},
            {'abbr': 'I', 'full': 'Ilocos Region', 'sales_weight': 1},
            {'abbr': 'II', 'full': 'Cagayan Valley', 'sales_weight': 1},
            {'abbr': 'IV-B', 'full': 'Mimaropa', 'sales_weight': 1},
            {'abbr': 'VIII', 'full': 'Eastern Visayas', 'sales_weight': 1},
            {'abbr': 'IX', 'full': 'Zamboanga Peninsula', 'sales_weight': 1},
            {'abbr': 'XII', 'full': 'Soccsksargen', 'sales_weight': 1},
            {'abbr': 'XIII', 'full': 'Caraga', 'sales_weight': 1},
            {'abbr': 'BARMM', 'full': 'Bangsamoro Autonomous Region in Muslim Mindanao', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Singapore': {'country_code': 'SG', 'states': [{'abbr': 'SG', 'full': 'Singapore', 'sales_weight': 1}], 'zip_generator': generate_plausible_zip},
        'Thailand': {'country_code': 'TH', 'states': [
            {'abbr': 'BKK', 'full': 'Bangkok', 'sales_weight': 10},
            {'abbr': 'CM', 'full': 'Chiang Mai', 'sales_weight': 4},
            {'abbr': 'HKT', 'full': 'Phuket', 'sales_weight': 3},
            {'abbr': 'CHB', 'full': 'Chonburi', 'sales_weight': 3},
            {'abbr': 'PTY', 'full': 'Pattaya', 'sales_weight': 2},
            {'abbr': 'KHB', 'full': 'Khon Kaen', 'sales_weight': 2},
            {'abbr': 'NKH', 'full': 'Nakhon Ratchasima', 'sales_weight': 1},
            {'abbr': 'UBR', 'full': 'Ubon Ratchathani', 'sales_weight': 1},
            {'abbr': 'CHN', 'full': 'Chiang Rai', 'sales_weight': 1},
            {'abbr': 'SRN', 'full': 'Surat Thani', 'sales_weight': 1},
            {'abbr': 'UTR', 'full': 'Udon Thani', 'sales_weight': 1},
            {'abbr': 'RAY', 'full': 'Rayong', 'sales_weight': 1},
            {'abbr': 'SAM', 'full': 'Samut Prakan', 'sales_weight': 1},
            {'abbr': 'PATH', 'full': 'Pathum Thani', 'sales_weight': 1},
            {'abbr': 'AYT', 'full': 'Phra Nakhon Si Ayutthaya', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Taiwan': {'country_code': 'TW', 'states': [
            {'abbr': 'NTP', 'full': 'New Taipei City', 'sales_weight': 10},
            {'abbr': 'TPE', 'full': 'Taipei City', 'sales_weight': 8},
            {'abbr': 'TXG', 'full': 'Taichung City', 'sales_weight': 7},
            {'abbr': 'KHH', 'full': 'Kaohsiung City', 'sales_weight': 6},
            {'abbr': 'TYC', 'full': 'Taoyuan City', 'sales_weight': 6},
            {'abbr': 'TNA', 'full': 'Tainan City', 'sales_weight': 5},
            {'abbr': 'CHA', 'full': 'Changhua County', 'sales_weight': 3},
            {'abbr': 'HSC', 'full': 'Hsinchu County', 'sales_weight': 2},
            {'abbr': 'YUN', 'full': 'Yunlin County', 'sales_weight': 2},
            {'abbr': 'ILA', 'full': 'Yilan County', 'sales_weight': 1},
            {'abbr': 'MIA', 'full': 'Miaoli County', 'sales_weight': 1},
            {'abbr': 'NAN', 'full': 'Nantou County', 'sales_weight': 1},
            {'abbr': 'CYQ', 'full': 'Chiayi County', 'sales_weight': 1},
            {'abbr': 'PING', 'full': 'Pingtung County', 'sales_weight': 1},
            {'abbr': 'TTC', 'full': 'Taitung County', 'sales_weight': 1},
            {'abbr': 'HUA', 'full': 'Hualien County', 'sales_weight': 1},
            {'abbr': 'PEN', 'full': 'Penghu County', 'sales_weight': 1},
            {'abbr': 'KEE', 'full': 'Keelung City', 'sales_weight': 1},
            {'abbr': 'HSZ', 'full': 'Hsinchu City', 'sales_weight': 1},
            {'abbr': 'CYI', 'full': 'Chiayi City', 'sales_weight': 1},
            {'abbr': 'LIEN', 'full': 'Lienchiang County', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip}
    },
    'Oceania': {
        'Australia': {'country_code': 'AU', 'states': [
            {'abbr': 'NSW', 'full': 'New South Wales', 'sales_weight': 8},
            {'abbr': 'VIC', 'full': 'Victoria', 'sales_weight': 7},
            {'abbr': 'QLD', 'full': 'Queensland', 'sales_weight': 5},
            {'abbr': 'WA', 'full': 'Western Australia', 'sales_weight': 3},
            {'abbr': 'SA', 'full': 'South Australia', 'sales_weight': 2},
            {'abbr': 'TAS', 'full': 'Tasmania', 'sales_weight': 1},
            {'abbr': 'ACT', 'full': 'Australian Capital Territory', 'sales_weight': 1},
            {'abbr': 'NT', 'full': 'Northern Territory', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'New Zealand': {'country_code': 'NZ', 'states': [
            {'abbr': 'AKL', 'full': 'Auckland Region', 'sales_weight': 8},
            {'abbr': 'WLG', 'full': 'Wellington Region', 'sales_weight': 4},
            {'abbr': 'CAN', 'full': 'Canterbury Region', 'sales_weight': 4},
            {'abbr': 'WKO', 'full': 'Waikato Region', 'sales_weight': 3},
            {'abbr': 'BOP', 'full': 'Bay of Plenty Region', 'sales_weight': 2},
            {'abbr': 'OTA', 'full': 'Otago Region', 'sales_weight': 2},
            {'abbr': 'NTL', 'full': 'Northland Region', 'sales_weight': 1},
            {'abbr': 'GIS', 'full': 'Gisborne Region', 'sales_weight': 1},
            {'abbr': 'HKB', 'full': 'Hawke\'s Bay Region', 'sales_weight': 1},
            {'abbr': 'MWT', 'full': 'Manawatū-Whanganui Region', 'sales_weight': 1},
            {'abbr': 'MBH', 'full': 'Marlborough Region', 'sales_weight': 1},
            {'abbr': 'NSN', 'full': 'Nelson Region', 'sales_weight': 1},
            {'abbr': 'STL', 'full': 'Southland Region', 'sales_weight': 1},
            {'abbr': 'TKI', 'full': 'Taranaki Region', 'sales_weight': 1},
            {'abbr': 'TAS', 'full': 'Tasman Region', 'sales_weight': 1},
            {'abbr': 'WTC', 'full': 'West Coast Region', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip}
    },
    'South America': {
        'Chile': {'country_code': 'CL', 'states': [
            {'abbr': 'RM', 'full': 'Santiago Metropolitan Region', 'sales_weight': 8},
            {'abbr': 'VS', 'full': 'Valparaíso Region', 'sales_weight': 4},
            {'abbr': 'LL', 'full': 'Los Lagos Region', 'sales_weight': 2},
            {'abbr': 'BI', 'full': 'Bío Bío Region', 'sales_weight': 2},
            {'abbr': 'AR', 'full': 'Araucanía Region', 'sales_weight': 1},
            {'abbr': 'MA', 'full': 'Magallanes Region', 'sales_weight': 1},
            {'abbr': 'AP', 'full': 'Arica y Parinacota Region', 'sales_weight': 1},
            {'abbr': 'TA', 'full': 'Tarapacá Region', 'sales_weight': 1},
            {'abbr': 'AN', 'full': 'Antofagasta Region', 'sales_weight': 1},
            {'abbr': 'AT', 'full': 'Atacama Region', 'sales_weight': 1},
            {'abbr': 'CO', 'full': 'Coquimbo Region', 'sales_weight': 1},
            {'abbr': 'OH', 'full': 'O\'Higgins Region', 'sales_weight': 1},
            {'abbr': 'MA', 'full': 'Maule Region', 'sales_weight': 1},
            {'abbr': 'ÑB', 'full': 'Ñuble Region', 'sales_weight': 1},
            {'abbr': 'LR', 'full': 'Los Ríos Region', 'sales_weight': 1},
            {'abbr': 'AI', 'full': 'Aysén Region', 'sales_weight': 1}
        ], 'zip_generator': generate_plausible_zip},
        'Colombia': {'country_code': 'CO', 'states': [
            {'abbr': 'DC', 'full': 'Bogotá D.C.', 'sales_weight': 8},
            {'abbr': 'ANT', 'full': 'Antioquia', 'sales_weight': 6},
            {'abbr': 'VAL', 'full': 'Valle del Cauca', 'sales_weight': 4},
            {'abbr': 'CUN', 'full': 'Cundinamarca', 'sales_weight': 3},
            {'abbr': 'ATL', 'full': 'Atlántico', 'sales_weight': 2},
            {'abbr': 'BOL', 'full': 'Bolívar', 'sales_weight': 2},
            {'abbr': 'SAN', 'full': 'Santander', 'sales_weight': 2},
            {'abbr': 'MET', 'full': 'Meta', 'sales_weight': 1},
            {'abbr': 'NAR', 'full': 'Nariño', 'sales_weight': 1},
            {'abbr': 'HUI', 'full': 'Huila', 'sales_weight': 1},
            {'abbr': 'CAU', 'full': 'Cauca', 'sales_weight': 1},
            {'abbr': 'CES', 'full': 'Cesar', 'sales_weight': 1},
            {'abbr': 'COR', 'full': 'Córdoba', 'sales_weight': 1},
            {'abbr': 'MAG', 'full': 'Magdalena', 'sales_weight': 1},
            {'abbr': 'SUC', 'full': 'Sucre', 'sales_weight': 1},
            {'abbr': 'TOL', 'full': 'Tolima', 'sales_weight': 1},
            {'abbr': 'QUI', 'full': 'Quindío', 'sales_weight': 1},
            {'abbr': 'RIS', 'full': 'Risaralda', 'sales_weight': 1},
            {'abbr': 'NSA', 'full': 'Norte de Santander', 'sales_weight': 1},
            {'abbr': 'BOY', 'full': 'Boyacá', 'sales_weight': 1},
            {'abbr': 'CAL', 'full': 'Caldas', 'sales_weight': 1},
            {'abbr': 'LGU', 'full': 'La Guajira', 'sales_weight': 1},
            {'abbr': 'CHO', 'full': 'Chocó', 'sales_weight': 1},
            {'abbr': 'PUT', 'full': 'Putumayo', 'sales_weight': 1},
            {'abbr': 'CAS', 'full': 'Casanare', 'sales_weight': 1},
            {'abbr': 'GUV', 'full': 'Guaviare', 'sales_weight': 1},
            {'abbr': 'CAQ', 'full': 'Caquetá', 'sales_weight': 1},
            {'abbr': 'AMZ', 'full': 'Amazonas', 'sales_weight': 1},
            {'abbr': 'GUA', 'full': 'Guainía', 'sales_weight': 1},
            {'abbr': 'VAU', 'full': 'Vaupés', 'sales_weight': 1},
            {'abbr': 'VID', 'full': 'Vichada', 'sales_weight': 1},
            {'abbr': 'ARA', 'full': 'Arauca', 'sales_weight': 1},
            {'abbr': 'SAP', 'full': 'San Andrés y Providencia', 'sales_weight': 1},
        ], 'zip_generator': generate_plausible_zip}
    }
}

# --------------------------
# 定义 Tesla 的车型基准价和份额（来自你原始脚本）
# --------------------------
tesla_models_base_prices_2020 = {
    'Model 3': {'min_price': 38000, 'max_price': 55000, 'global_sales_share': 0.40, 'category': 'Sedan', 'launch_date': '2017-07-28'},
    'Model Y': {'min_price': 45000, 'max_price': 65000, 'global_sales_share': 0.50, 'category': 'SUV', 'launch_date': '2020-03-13'},
    'Model S': {'min_price': 70000, 'max_price': 95000, 'global_sales_share': 0.05, 'category': 'Sedan', 'launch_date': '2012-06-22'},
    'Model X': {'min_price': 80000, 'max_price': 110000, 'global_sales_share': 0.04, 'category': 'SUV', 'launch_date': '2015-09-29'},
    'Cybertruck': {'min_price': 60000, 'max_price': 80000, 'global_sales_share': 0.01, 'category': 'Truck', 'launch_date': '2023-11-30'}
}

# 年度价格调整系数（示例）
annual_price_adjustments = {
    2017: 1.25,
    2018: 1.20,
    2019: 1.15,
    2020: 1.0,
    2021: 0.98,
    2022: 0.95,
    2023: 0.92,
    2024: 0.90,
    2025: 0.88
}

# --------------------------
# 1. 生成 Dim_Product: 车型维表
# --------------------------
def generate_dim_product():
    dim_product_data = []
    model_id_counter = 1
    for model_name, info in tesla_models_base_prices_2020.items():
        dim_product_data.append({
            'Model_ID': model_id_counter,
            'Model_Name': model_name,
            'Model_Category': info.get('category', 'Unknown'),
            'Model_Base_Price_USD': round(np.mean([info['min_price'], info['max_price']]), 2),
            'Model_Launch_Date': info.get('launch_date', None)
        })
        model_id_counter += 1
    return pd.DataFrame(dim_product_data)

# --------------------------
# 2. 生成 Dim_Time: 按日生成时间维表，时间范围为2017年1月1日至2025年6月30日
# --------------------------
def generate_dim_time():
    dim_time_data = []
    start_date = pd.to_datetime('2017-01-01')
    end_date = pd.to_datetime('2025-06-30')
    dates = pd.date_range(start=start_date, end=end_date, freq='D')

    for date in dates:
        dim_time_data.append({
            'Time_ID': int(date.strftime('%Y%m%d')),
            'Full_Date': date.strftime('%Y-%m-%d'),
            'Year': date.year,
            'Quarter': f"Q{date.quarter}",
            'Month': date.month,
            'Day': date.day,
            'Week_of_Year': date.isocalendar()[1],
            'Day_of_Week': date.isocalendar()[2],
            'Day_Name': date.day_name()
        })
    return pd.DataFrame(dim_time_data)

# --------------------------
# 3. 生成 Dim_Customer: 虚构客户维度表
# --------------------------
def generate_dim_customer(num_customers=50000):
    customer_data = []
    genders = ['Male', 'Female', 'Other']
    age_groups = ['<25', '25-34', '35-44', '45-54', '55-64', '65+']
    income_levels = ['Low', 'Medium', 'High']
    first_names = ['John', 'Jane', 'Michael', 'Emily', 'Chris', 'Jessica', 'David', 'Sarah', 'Daniel', 'Laura']
    last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Rodriguez', 'Martinez']

    for i in range(1, num_customers + 1):
        customer_data.append({
            'Customer_ID': i,
            'Customer_Name': f"{random.choice(first_names)} {random.choice(last_names)}",
            'Gender': random.choice(genders),
            'Age_Group': random.choice(age_groups),
            'Income_Level': random.choice(income_levels)
        })
    return pd.DataFrame(customer_data)


# --------------------------
# 4. 生成 Fact_Sales 和 Dim_Geography
# --------------------------
def generate_sales_and_geography(dim_product_df, dim_time_df, dim_customer_df):
    all_sales_records = []
    dim_geography_data = []
    geo_id_counter = 1
    unique_geos = {}  # key: (continent, country, state_full, zip) -> Geo_ID
    state_default_zip = {}  # key: (country_name, state_full) -> zip
    quarterly_model_prices = {}

    # 收集所有国家的信息到一个列表中，方便后续迭代
    all_countries_info = []
    for continent_name, countries in tesla_countries.items():
        for country_name, country_info in countries.items():
            all_countries_info.append({
                'continent': continent_name,
                'country_name': country_name,
                'info': country_info
            })

    # 根据用户提供的超充桩数据，并补充完整
    charger_data = {
        'United States': 2297, 'Canada': 208, 'Mexico': 24,
        'China': 1902, 'Japan': 69, 'South Korea': 134, 'Taiwan': 77, 'Hong Kong': 10, 'Macau': 2,
        'Germany': 188, 'United Kingdom': 166, 'Norway': 98, 'France': 159, 'Netherlands': 55, 'Sweden': 77, 'Switzerland': 55, 'Italy': 110, 'Spain': 51, 'Belgium': 27, 'Austria': 18, 'Denmark': 35, 'Finland': 21, 'Greece': 10, 'Iceland': 6, 'Ireland': 13, 'Israel': 15, 'Luxembourg': 5, 'Monaco': 1, 'Poland': 12, 'Portugal': 25, 'Slovenia': 5, 'Turkey': 15,
        'Australia': 50, 'New Zealand': 12,
        'Chile': 5, 'Colombia': 3, 'Malaysia': 10, 'Philippines': 5, 'Singapore': 1, 'Thailand': 15
    }
    total_chargers = sum(charger_data.values())

    # 计算每个国家在全球总销售量中的比例（按超充桩数量）
    country_sales_proportion = {country: count / total_chargers for country, count in charger_data.items()}

    # 用户提供的年度/季度总销量数据 (来自截图)
    yearly_quarterly_sales_targets = {
        2017: {4: 103100}, # Model 3 launched late July 2017, so sales are concentrated in Q4.
        2018: {1: 50000, 2: 60000, 3: 65000, 4: 70240},
        2019: {1: 76250, 2: 82500, 3: 90000, 4: 118750},
        2020: {1: 88400, 2: 90650, 3: 139300, 4: 181200},
        2021: {1: 184800, 2: 201300, 3: 241300, 4: 308770},
        2022: {1: 310000, 2: 298000, 3: 343850, 4: 362000},
        2023: {1: 422870, 2: 466140, 3: 435000, 4: 484570},
        2024: {1: 484570, 2: 515430, 3: 620000, 4: 680000}, # 2024年数据为估算
        2025: {1: 412000, 2: 466000}, # 2025年数据来自用户明确指定
    }

    # Helper: 获取或创建 Geo_ID
    def get_or_create_geo_id(continent, country, country_code, state_full, state_abbr, zip_code):
        nonlocal geo_id_counter
        key = (continent, country, state_full, zip_code)
        if key in unique_geos:
            return unique_geos[key]
        else:
            gid = geo_id_counter
            unique_geos[key] = gid
            dim_geography_data.append({
                'Geo_ID': gid,
                'Continent': continent,
                'Country': country,
                'Country_Code': country_code,
                'State_Province': state_full,
                'State_Province_Abbr': state_abbr,
                'Zip_Code': zip_code
            })
            geo_id_counter += 1
            return gid

    # 生成 Dim_Geography 表，确保每个州至少有一个 Geo_ID
    for country_data in all_countries_info:
        continent_name = country_data['continent']
        country_name = country_data['country_name']
        country_info = country_data['info']
        country_code = country_info['country_code']
        states_list = country_info.get('states', [])

        for state_entry in states_list:
            state_abbr = state_entry['abbr']
            state_full = state_entry['full']
            key_state = (country_name, state_full)
            zip_code = country_info['zip_generator'](country_name, state_abbr)
            state_default_zip[key_state] = zip_code
            get_or_create_geo_id(continent_name, country_name, country_code, state_full, state_abbr, zip_code)

    # 根据年度/季度总销量和超充桩比例生成 Fact_Sales 数据
    for year, quarterly_sales in yearly_quarterly_sales_targets.items():
        for q_num, total_sales_for_q in quarterly_sales.items():
            quarter_str = f"Q{q_num}"
            print(f"Generating data for {year}-{quarter_str}...")

            # 获取该季度的时间范围
            q_start_date = pd.to_datetime(f"{year}-{q_num*3-2:02d}-01")
            q_end_date = pd.to_datetime(f"{year}-{q_num*3:02d}-01") - pd.Timedelta(days=1)
            q_dates = pd.date_range(start=q_start_date, end=q_end_date, freq='D')

            # 复制一份完整的模型信息字典
            model_shares = copy.deepcopy(tesla_models_base_prices_2020)

            # 根据年份动态调整销量份额
            if year < 2020:
                # Model Y在2020年3月上市，此前销量为0
                model_shares['Model Y']['global_sales_share'] = 0
            if year < 2023:
                # Cybertruck在2023年11月上市，此前销量为0
                model_shares['Cybertruck']['global_sales_share'] = 0

            # 重新计算权重，确保和为1
            total_model_share = sum(info['global_sales_share'] for info in model_shares.values())
            normalized_model_shares = {model: info['global_sales_share'] / total_model_share for model, info in model_shares.items()}
            model_list = list(normalized_model_shares.keys())
            model_weights = list(normalized_model_shares.values())

            # 按照国家销售比例，计算每个国家的销售记录数量
            sales_per_country = {country_name: round(total_sales_for_q * proportion)
                                 for country_name, proportion in country_sales_proportion.items()}

            # 确保总数与目标一致
            total_generated = sum(sales_per_country.values())
            if total_generated != total_sales_for_q:
                diff = total_sales_for_q - total_generated
                # 将差异分配给销量最高的国家
                highest_sales_country = max(sales_per_country, key=sales_per_country.get)
                sales_per_country[highest_sales_country] += diff

            # 遍历每个国家，生成其对应的销售记录
            for country_data in all_countries_info:
                country_name = country_data['country_name']
                country_info = country_data['info']
                sales_units_for_country_q = sales_per_country.get(country_name, 0)

                if sales_units_for_country_q == 0:
                    continue

                states_list = country_info.get('states', [])
                if not states_list:
                    continue

                # 根据州/省的销售权重重新分配销量
                state_weights = [s.get('sales_weight', 1) for s in states_list]
                total_state_weight = sum(state_weights)

                if total_state_weight == 0:
                    continue

                state_proportions = [w / total_state_weight for w in state_weights]

                # 批量生成地理位置数据 (根据州/省权重进行分配)
                chosen_states_for_batch = random.choices(states_list, weights=state_proportions, k=sales_units_for_country_q)

                batch_records = []
                # 批量生成其他维度数据
                num_records = len(chosen_states_for_batch)

                # 生成时间ID和客户ID
                time_ids = [int(d.strftime('%Y%m%d')) for d in random.choices(q_dates, k=num_records)]
                customer_ids = np.random.randint(1, dim_customer_df['Customer_ID'].max() + 1, size=num_records)

                # 生成车型ID和价格
                chosen_models = random.choices(model_list, weights=model_weights, k=num_records)
                model_ids = [int(dim_product_df[dim_product_df['Model_Name'] == model]['Model_ID'].iloc[0]) for model in chosen_models]

                # 批量生成价格
                batch_prices = []
                for model in chosen_models:
                    quarter_price_key = (year, q_num, model)
                    if quarter_price_key not in quarterly_model_prices:
                        model_base_info = tesla_models_base_prices_2020[model]
                        annual_adj = annual_price_adjustments.get(year, 1.0)
                        annual_price = np.random.uniform(model_base_info['min_price'], model_base_info['max_price']) * annual_adj
                        standard_price_usd = round(annual_price * np.random.uniform(0.98, 1.02), 2)
                        discounted_price_usd = round(standard_price_usd * 0.95, 2)
                        quarterly_model_prices[quarter_price_key] = {
                            'standard': standard_price_usd,
                            'discounted': discounted_price_usd
                        }
                    batch_prices.append(quarterly_model_prices[quarter_price_key])

                is_discounted_flags = np.random.rand(num_records) < 0.20
                revenues = [p['discounted'] if d else p['standard'] for p, d in zip(batch_prices, is_discounted_flags)]

                for i in range(num_records):
                    state_entry = chosen_states_for_batch[i]
                    state_full = state_entry['full']
                    geo_key = (country_data['continent'], country_name, state_full, state_default_zip.get((country_name, state_full)))
                    geo_id = unique_geos.get(geo_key)

                    batch_records.append({
                        'Time_ID': time_ids[i],
                        'Geo_ID': geo_id,
                        'Model_ID': model_ids[i],
                        'Customer_ID': customer_ids[i],
                        'Sales_Units': 1,
                        'Is_Discounted_Sale': bool(is_discounted_flags[i]),
                        'Revenue_USD': round(revenues[i], 2)
                    })

                all_sales_records.extend(batch_records)

    dim_geography_df = pd.DataFrame(dim_geography_data)
    fact_sales_df = pd.DataFrame(all_sales_records)

    # 5. 生成 Dim_Prices
    dim_prices_data = []
    for (year, quarter, model_name), prices in quarterly_model_prices.items():
        model_id = int(dim_product_df[dim_product_df['Model_Name'] == model_name]['Model_ID'].iloc[0])
        quarter_start_date = pd.to_datetime(f"{year}-{quarter*3-2:02d}-01").strftime('%Y-%m-%d')
        dim_prices_data.append({
            'Model_ID': model_id,
            'Quarter_Start_Date': quarter_start_date,
            'Standard_Price_USD': prices['standard'],
            'Discounted_Price_USD': prices['discounted']
        })
    dim_prices_df = pd.DataFrame(dim_prices_data)

    return fact_sales_df, dim_geography_df, dim_prices_df

# 主函数，执行所有生成任务并保存文件
def main():
    print("正在生成数据表...")

    dim_product_df = generate_dim_product()
    dim_time_df = generate_dim_time()
    dim_customer_df = generate_dim_customer()

    fact_sales_df, dim_geography_df, dim_prices_df = generate_sales_and_geography(dim_product_df, dim_time_df, dim_customer_df)

    # 保存到CSV
    dim_product_df.to_csv('Dim_Product.csv', index=False, encoding='utf-8')
    dim_time_df.to_csv('Dim_Time.csv', index=False, encoding='utf-8')
    dim_geography_df.to_csv('Dim_Geography.csv', index=False, encoding='utf-8')
    dim_prices_df.to_csv('Dim_Prices.csv', index=False, encoding='utf-8')
    dim_customer_df.to_csv('Dim_Customer.csv', index=False, encoding='utf-8')
    fact_sales_df.to_csv('Fact_Sales.csv', index=False, encoding='utf-8')

    print("所有数据表 (Dim_Product.csv, Dim_Time.csv, Dim_Geography.csv, Fact_Sales.csv, Dim_Prices.csv, Dim_Customer.csv) 已成功生成。")
    print(f"输出文件路径：{os.path.abspath('.')}")

if __name__ == "__main__":
    main()

正在生成数据表...
Generating data for 2017-Q4...
Generating data for 2018-Q1...
Generating data for 2018-Q2...
Generating data for 2018-Q3...
Generating data for 2018-Q4...


KeyboardInterrupt: 

In [7]:
import time

start = time.time()  # 记录开始时间

# 这里放你的代码
total = 0
for i in range(1000000):
    total += i

end = time.time()    # 记录结束时间
print("执行时间: {:.6f} 秒".format(end - start))


执行时间: 0.045187 秒
