In [4]:
import torch
import cupy as cp
from numba import cuda

# PyTorch
print("PyTorch CUDA 可用:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU 名称:", torch.cuda.get_device_name(0))

# CuPy
x = cp.arange(10**6, dtype=cp.float32)
y = cp.arange(10**6, dtype=cp.float32)
print("CuPy GPU 运算测试:", (x + y)[:5])

# Numba
print("Numba GPU 数量:", len(cuda.gpus))

ModuleNotFoundError: No module named 'torch'

**GPU测试**

In [5]:
# -*- coding: utf-8 -*-
"""
一键修复 CuPy 并测试 GPU
"""

import os
import sys
import subprocess

def run_cmd(cmd):
    """运行命令行并打印输出"""
    print(f">>> {cmd}")
    result = subprocess.run(cmd, shell=True)
    return result.returncode

def check_conflicting_files():
    """检查当前目录是否有 cupy.py 或 cupy 文件夹"""
    cwd = os.getcwd()
    conflicts = []
    for name in os.listdir(cwd):
        if name.lower() == "cupy.py" or name.lower() == "cupy":
            conflicts.append(name)
    if conflicts:
        print("⚠️ 发现可能冲突的文件/文件夹:", conflicts)
        print("请删除或重命名它们后再运行此脚本")
        return False
    return True

def reinstall_cupy():
    """卸载并重新安装 CuPy"""
    print("卸载残留的 CuPy...")
    run_cmd("pip uninstall cupy cupy-cuda12x -y")
    print("重新安装 cupy-cuda12x...")
    ret = run_cmd("pip install cupy-cuda12x")
    return ret == 0

def test_cupy_gpu():
    """测试 CuPy 和 GPU 是否可用"""
    try:
        import cupy as cp
        x = cp.arange(10)
        print("✅ CuPy 导入成功，示例数组：", x)
        print("检测 GPU 设备数量:", cp.cuda.runtime.getDeviceCount())
        print("当前 GPU 名称:", cp.cuda.runtime.getDeviceProperties(0)['name'].decode())
        return True
    except Exception as e:
        print("❌ CuPy 测试失败:", e)
        return False

if __name__ == "__main__":
    print("=== 一键修复 CuPy 并测试 GPU ===")
    if not check_conflicting_files():
        sys.exit(1)

    if reinstall_cupy():
        print("CuPy 安装完成，开始测试 GPU...")
        if test_cupy_gpu():
            print("🎉 GPU 测试成功！可以开始使用 CuPy 进行加速计算。")
        else:
            print("❌ GPU 测试失败，请检查 CUDA 驱动和显卡环境。")
    else:
        print("❌ CuPy 安装失败，请检查网络或 Python 环境。")


=== 一键修复 CuPy 并测试 GPU ===
卸载残留的 CuPy...
>>> pip uninstall cupy cupy-cuda12x -y


[0m

Found existing installation: cupy-cuda12x 13.6.0
Uninstalling cupy-cuda12x-13.6.0:
  Successfully uninstalled cupy-cuda12x-13.6.0
重新安装 cupy-cuda12x...
>>> pip install cupy-cuda12x
Collecting cupy-cuda12x
  Using cached cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_x86_64.whl.metadata (2.4 kB)
Using cached cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_x86_64.whl (112.9 MB)
Installing collected packages: cupy-cuda12x
Successfully installed cupy-cuda12x-13.6.0
CuPy 安装完成，开始测试 GPU...
✅ CuPy 导入成功，示例数组： [0 1 2 3 4 5 6 7 8 9]
检测 GPU 设备数量: 1
当前 GPU 名称: NVIDIA GeForce RTX 4070 Laptop GPU
🎉 GPU 测试成功！可以开始使用 CuPy 进行加速计算。


In [6]:
import cupy as cp

x = cp.arange(10)
print(x)
print("GPU count:", cp.cuda.runtime.getDeviceCount())
print("GPU name:", cp.cuda.runtime.getDeviceProperties(0)['name'])


[0 1 2 3 4 5 6 7 8 9]
GPU count: 1
GPU name: b'NVIDIA GeForce RTX 4070 Laptop GPU'


**GPU加速生成100万行数据**

In [7]:
# -*- coding: utf-8 -*-
"""Tesla Simulated Sales Data Generator

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1qSg43jfMBtM4DZY_bMze_D93QjNTqAZo
"""

# tesla_sales_data_generator_star_schema.py
# 完整的星型模式数据生成脚本
# 目的：为每个国家的所有州/省/直辖市/特别行政区生成 Tesla 销售记录
# - 确保每个州至少有一条记录（避免地图空白）
# - 优化新西兰邮编生成，尽量使用现实可用的四位格式
# - 生成完整的星型模式：Fact_Sales, Dim_Product, Dim_Time, Dim_Geography, Dim_Prices, Dim_Customer
# 使用：python tesla_sales_data_generator_star_schema.py

import pandas as pd
import numpy as np
import random
import datetime
import math
import os

# 固定随机种子以便可复现（需要不同随机结果可注释掉）
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# --------------------------
# Helper: 生成合理格式的邮编/邮政编码（尽量贴近各国常见格式）
# --------------------------
def generate_plausible_zip(country, state_province_abbr):
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    digits = '0123456789'

    # 逐国家处理（覆盖大多数脚本中使用到的国家）
    if country == 'United States':
        # 5-digit zip (做一些基于州缩写简单偏好)
        if state_province_abbr.startswith(('C', 'I')):
            return f"9{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith(('T', 'L')):
            return f"7{random.randint(5,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('F'):
            return f"3{random.randint(2,4)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('N'):
            return f"1{random.randint(0,4)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('W'):
            return f"98{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('A'):
            return f"85{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('G'):
            return f"30{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        if state_province_abbr.startswith('P'):
            return f"15{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
        return f"{random.randint(10000, 99999)}"

    elif country == 'Canada':
        # Canadian format A1A 1A1 (模拟)
        province_codes = {
            'ON': ['K','L','M','N','P'], 'QC': ['G','H','J'], 'BC': ['V'], 'AB': ['T'],
            'SK': ['S'], 'MB': ['R'], 'NB': ['E'], 'NS': ['B'], 'NL': ['A'], 'PE': ['C'],
            'YT': ['Y'], 'NT': ['X'], 'NU': ['X']
        }
        first_letter = random.choice(province_codes.get(state_province_abbr, ['A']))
        return f"{first_letter}{random.choice(digits)}{random.choice(letters)} {random.choice(digits)}{random.choice(letters)}{random.choice(digits)}"

    elif country == 'Mexico':
        return f"{random.randint(1000, 99999):05d}"

    elif country == 'United Kingdom':
        # 简化模拟英国邮编
        outward = f"{random.choice(letters)}{random.choice(letters)}{random.randint(1,9)}"
        inward = f"{random.choice(digits)}{random.choice(letters)}{random.choice(letters)}"
        return f"{outward} {inward}"

    elif country in ['China', 'Taiwan']:
        if country == 'China':
            # 6位邮编
            if state_province_abbr.startswith('B'):
                return f"10{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            if state_province_abbr.startswith('S'):
                return f"20{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            if state_province_abbr.startswith('G'):
                return f"51{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}"
            return f"{random.randint(10000, 99999)}"
        else:
            # 台湾较常见的3位或3位+2的格式
            return f"{random.randint(100, 999)}"

    elif country == 'Germany':
        return f"{random.randint(10000, 99999)}"

    elif country == 'Japan':
        return f"{random.randint(100, 999)}-{random.randint(1000, 9999)}"

    elif country == 'Australia':
        return f"{random.randint(1000, 9999)}"

    elif country == 'New Zealand':
        # 更贴近新西兰的 4 位邮编格式 — 使用按州/区的近似区间（便于 Shape Map 匹配）
        # 说明：这些区间为常见/广义区间，足以覆盖可视化配对。可按需替换成精准映射表。
        nz_ranges = {
            'AUK': (600, 2699),   # Auckland (广义区间)
            'NTL': (100, 1099),   # Northland
            'WKO': (3200, 3799),  # Waikato
            'BOP': (3000, 3199),  # Bay of Plenty
            'GIS': (4010, 4199),  # Gisborne / East
            'HKB': (4100, 4299),  # Hawke's Bay
            'MWT': (4400, 4699),  # Manawatū-Whanganui
            'MBH': (7200, 7299),  # Marlborough
            'NSN': (7010, 7099),  # Nelson
            'OTA': (9000, 9799),  # Otago
            'STL': (9800, 9899),  # Southland
            'TKI': (4300, 4399),  # Taranaki
            'TAS': (7100, 7199),  # Tasman
            'WAI': (3200, 3799),  # Waikato (重复键安全)
            'WLG': (5010, 5799),  # Wellington region (广义)
            'WTC': (7800, 7999),  # West Coast
            'CAN': (7000, 7999),  # Canterbury (包含 Christchurch)
        }
        rng = nz_ranges.get(state_province_abbr)
        if rng:
            low, high = rng
            # 填充为 4 位字符串
            val = random.randint(low, high)
            return f"{val:04d}"
        else:
            return f"{random.randint(1000, 99999):05d}"

    elif country in ['France', 'Italy']:
        return f"{random.randint(10000, 99999)}"
    elif country == 'Spain':
        return f"{random.randint(10000, 52999)}"
    elif country == 'South Korea':
        return f"{random.randint(10000, 99999)}"
    elif country == 'Thailand':
        return f"{random.randint(10000, 99999)}"
    else:
        # 默认 5 位模拟
        return f"{random.randint(10000, 99999)}"


# --------------------------
# 定义国家/省/州 字典 (使用你原始脚本的详尽列表并确保使用英文全称和缩写)
# --------------------------

tesla_countries = {
    'North America': {
        'United States': {'country_code': 'US', 'states': [
            {'abbr': 'AL', 'full': 'Alabama'}, {'abbr': 'AK', 'full': 'Alaska'}, {'abbr': 'AZ', 'full': 'Arizona'},
            {'abbr': 'AR', 'full': 'Arkansas'}, {'abbr': 'CA', 'full': 'California'}, {'abbr': 'CO', 'full': 'Colorado'},
            {'abbr': 'CT', 'full': 'Connecticut'}, {'abbr': 'DE', 'full': 'Delaware'}, {'abbr': 'FL', 'full': 'Florida'},
            {'abbr': 'GA', 'full': 'Georgia'}, {'abbr': 'HI', 'full': 'Hawaii'}, {'abbr': 'ID', 'full': 'Idaho'},
            {'abbr': 'IL', 'full': 'Illinois'}, {'abbr': 'IN', 'full': 'Indiana'}, {'abbr': 'IA', 'full': 'Iowa'},
            {'abbr': 'KS', 'full': 'Kansas'}, {'abbr': 'KY', 'full': 'Kentucky'}, {'abbr': 'LA', 'full': 'Louisiana'},
            {'abbr': 'ME', 'full': 'Maine'}, {'abbr': 'MD', 'full': 'Maryland'}, {'abbr': 'MA', 'full': 'Massachusetts'},
            {'abbr': 'MI', 'full': 'Michigan'}, {'abbr': 'MN', 'full': 'Minnesota'}, {'abbr': 'MS', 'full': 'Mississippi'},
            {'abbr': 'MO', 'full': 'Missouri'}, {'abbr': 'MT', 'full': 'Montana'}, {'abbr': 'NE', 'full': 'Nebraska'},
            {'abbr': 'NV', 'full': 'Nevada'}, {'abbr': 'NH', 'full': 'New Hampshire'}, {'abbr': 'NJ', 'full': 'New Jersey'},
            {'abbr': 'NM', 'full': 'New Mexico'}, {'abbr': 'NY', 'full': 'New York'}, {'abbr': 'NC', 'full': 'North Carolina'},
            {'abbr': 'ND', 'full': 'North Dakota'}, {'abbr': 'OH', 'full': 'Ohio'}, {'abbr': 'OK', 'full': 'Oklahoma'},
            {'abbr': 'OR', 'full': 'Oregon'}, {'abbr': 'PA', 'full': 'Pennsylvania'}, {'abbr': 'RI', 'full': 'Rhode Island'},
            {'abbr': 'SC', 'full': 'South Carolina'}, {'abbr': 'SD', 'full': 'South Dakota'}, {'abbr': 'TN', 'full': 'Tennessee'},
            {'abbr': 'TX', 'full': 'Texas'}, {'abbr': 'UT', 'full': 'Utah'}, {'abbr': 'VT', 'full': 'Vermont'},
            {'abbr': 'VA', 'full': 'Virginia'}, {'abbr': 'WA', 'full': 'Washington'}, {'abbr': 'WV', 'full': 'West Virginia'},
            {'abbr': 'WI', 'full': 'Wisconsin'}, {'abbr': 'WY', 'full': 'Wyoming'}, {'abbr': 'DC', 'full': 'District of Columbia'}
        ], 'zip_generator': generate_plausible_zip},
        'Canada': {'country_code': 'CA', 'states': [
            {'abbr': 'AB', 'full': 'Alberta'}, {'abbr': 'BC', 'full': 'British Columbia'}, {'abbr': 'MB', 'full': 'Manitoba'},
            {'abbr': 'NB', 'full': 'New Brunswick'}, {'abbr': 'NL', 'full': 'Newfoundland and Labrador'}, {'abbr': 'NS', 'full': 'Nova Scotia'},
            {'abbr': 'ON', 'full': 'Ontario'}, {'abbr': 'PE', 'full': 'Prince Edward Island'}, {'abbr': 'QC', 'full': 'Quebec'},
            {'abbr': 'SK', 'full': 'Saskatchewan'}, {'abbr': 'NT', 'full': 'Northwest Territories'}, {'abbr': 'NU', 'full': 'Nunavut'},
            {'abbr': 'YT', 'full': 'Yukon'}
        ], 'zip_generator': generate_plausible_zip},
        'Mexico': {'country_code': 'MX', 'states': [
            {'abbr': 'AGS', 'full': 'Aguascalientes'}, {'abbr': 'BC', 'full': 'Baja California'}, {'abbr': 'BCS', 'full': 'Baja California Sur'},
            {'abbr': 'CAMP', 'full': 'Campeche'}, {'abbr': 'CHIS', 'full': 'Chiapas'}, {'abbr': 'CHIH', 'full': 'Chihuahua'},
            {'abbr': 'COAH', 'full': 'Coahuila'}, {'abbr': 'COL', 'full': 'Colima'}, {'abbr': 'DUR', 'full': 'Durango'},
            {'abbr': 'GTO', 'full': 'Guanajuato'}, {'abbr': 'GRO', 'full': 'Guerrero'}, {'abbr': 'HGO', 'full': 'Hidalgo'},
            {'abbr': 'JAL', 'full': 'Jalisco'}, {'abbr': 'MEX', 'full': 'Mexico State'}, {'abbr': 'MICH', 'full': 'Michoacán'},
            {'abbr': 'MOR', 'full': 'Morelos'}, {'abbr': 'NAY', 'full': 'Nayarit'}, {'abbr': 'NLE', 'full': 'Nuevo León'},
            {'abbr': 'OAX', 'full': 'Oaxaca'}, {'abbr': 'PUE', 'full': 'Puebla'}, {'abbr': 'QRO', 'full': 'Querétaro'},
            {'abbr': 'QR', 'full': 'Quintana Roo'}, {'abbr': 'SLP', 'full': 'San Luis Potosí'}, {'abbr': 'SIN', 'full': 'Sinaloa'},
            {'abbr': 'SON', 'full': 'Sonora'}, {'abbr': 'TAB', 'full': 'Tabasco'}, {'abbr': 'TAM', 'full': 'Tamaulipas'},
            {'abbr': 'TLAX', 'full': 'Tlaxcala'}, {'abbr': 'VER', 'full': 'Veracruz'}, {'abbr': 'YUC', 'full': 'Yucatán'},
            {'abbr': 'ZAC', 'full': 'Zacatecas'}, {'abbr': 'CDMX', 'full': 'Mexico City'}
        ], 'zip_generator': generate_plausible_zip}
    },
    'Europe': {
        'Germany': {'country_code': 'DE', 'states': [
            {'abbr': 'BW', 'full': 'Baden-Württemberg'}, {'abbr': 'BY', 'full': 'Bavaria'}, {'abbr': 'BE', 'full': 'Berlin'},
            {'abbr': 'BB', 'full': 'Brandenburg'}, {'abbr': 'HB', 'full': 'Bremen'}, {'abbr': 'HH', 'full': 'Hamburg'},
            {'abbr': 'HE', 'full': 'Hesse'}, {'abbr': 'MV', 'full': 'Mecklenburg-Vorpommern'}, {'abbr': 'NI', 'full': 'Lower Saxony'},
            {'abbr': 'NW', 'full': 'North Rhine-Westphalia'}, {'abbr': 'RP', 'full': 'Rhineland-Palatinate'},
            {'abbr': 'SL', 'full': 'Saarland'}, {'abbr': 'SN', 'full': 'Saxony'}, {'abbr': 'ST', 'full': 'Saxony-Anhalt'},
            {'abbr': 'SH', 'full': 'Schleswig-Holstein'}, {'abbr': 'TH', 'full': 'Thuringia'}
        ], 'zip_generator': generate_plausible_zip},
        'United Kingdom': {'country_code': 'GB', 'states': [
            {'abbr': 'ENG', 'full': 'England'}, {'abbr': 'SCT', 'full': 'Scotland'}, {'abbr': 'WLS', 'full': 'Wales'},
            {'abbr': 'NIR', 'full': 'Northern Ireland'}
        ], 'zip_generator': generate_plausible_zip},
        'Norway': {'country_code': 'NO', 'states': [
            {'abbr': 'OS', 'full': 'Oslo'}, {'abbr': 'VL', 'full': 'Viken'}, {'abbr': 'TR', 'full': 'Trøndelag'}, {'abbr': 'RO', 'full': 'Rogaland'},
            {'abbr': 'MR', 'full': 'Møre og Romsdal'}, {'abbr': 'INN', 'full': 'Innlandet'}, {'abbr': 'TROM', 'full': 'Troms og Finnmark'},
            {'abbr': 'VEST', 'full': 'Vestland'}, {'abbr': 'VESTF', 'full': 'Vestfold og Telemark'}, {'abbr': 'AGD', 'full': 'Agder'},
            {'abbr': 'NORDL', 'full': 'Nordland'}
        ], 'zip_generator': generate_plausible_zip},
        'France': {'country_code': 'FR', 'states': [
            {'abbr': 'ARA', 'full': 'Auvergne-Rhône-Alpes'}, {'abbr': 'BFC', 'full': 'Bourgogne-Franche-Comté'},
            {'abbr': 'BRE', 'full': 'Brittany'}, {'abbr': 'CVL', 'full': 'Centre-Val de Loire'}, {'abbr': 'COR', 'full': 'Corsica'},
            {'abbr': 'GES', 'full': 'Grand Est'}, {'abbr': 'HDF', 'full': 'Hauts-de-France'}, {'abbr': 'IDF', 'full': 'Île-de-France'},
            {'abbr': 'NOR', 'full': 'Normandy'}, {'abbr': 'NAQ', 'full': 'Nouvelle-Aquitaine'}, {'abbr': 'OCC', 'full': 'Occitanie'},
            {'abbr': 'PDL', 'full': 'Pays de la Loire'}, {'abbr': 'PAC', 'full': 'Provence-Alpes-Côte d\'Azur'}
        ], 'zip_generator': generate_plausible_zip},
        'Netherlands': {'country_code': 'NL', 'states': [
            {'abbr': 'DR', 'full': 'Drenthe'}, {'abbr': 'FL', 'full': 'Flevoland'}, {'abbr': 'FR', 'full': 'Friesland'},
            {'abbr': 'GE', 'full': 'Gelderland'}, {'abbr': 'GR', 'full': 'Groningen'}, {'abbr': 'LB', 'full': 'Limburg'},
            {'abbr': 'NB', 'full': 'North Brabant'}, {'abbr': 'NH', 'full': 'North Holland'}, {'abbr': 'OV', 'full': 'Overijssel'},
            {'abbr': 'UT', 'full': 'Utrecht'}, {'abbr': 'ZE', 'full': 'Zeeland'}, {'abbr': 'ZH', 'full': 'South Holland'}
        ], 'zip_generator': generate_plausible_zip},
        'Sweden': {'country_code': 'SE', 'states': [
            {'abbr': 'AB', 'full': 'Stockholm County'}, {'abbr': 'AC', 'full': 'Västerbotten County'},
            {'abbr': 'BD', 'full': 'Norrbotten County'}, {'abbr': 'C', 'full': 'Uppsala County'},
            {'abbr': 'D', 'full': 'Södermanland County'}, {'abbr': 'E', 'full': 'Östergötland County'},
            {'abbr': 'F', 'full': 'Jönköping County'}, {'abbr': 'G', 'full': 'Kronoberg County'},
            {'abbr': 'H', 'full': 'Kalmar County'}, {'abbr': 'I', 'full': 'Gotland County'},
            {'abbr': 'K', 'full': 'Blekinge County'}, {'abbr': 'M', 'full': 'Skåne County'},
            {'abbr': 'N', 'full': 'Halland County'}, {'abbr': 'O', 'full': 'Västra Götaland County'},
            {'abbr': 'S', 'full': 'Värmland County'}, {'abbr': 'T', 'full': 'Örebro County'},
            {'abbr': 'U', 'full': 'Västmanland County'}, {'abbr': 'W', 'full': 'Dalarna County'},
            {'abbr': 'X', 'full': 'Gävleborg County'}, {'abbr': 'Y', 'full': 'Västernorrland County'},
            {'abbr': 'Z', 'full': 'Jämtland County'}
        ], 'zip_generator': generate_plausible_zip},
        'Switzerland': {'country_code': 'CH', 'states': [
            {'abbr': 'ZH', 'full': 'Zurich'}, {'abbr': 'BE', 'full': 'Bern'}, {'abbr': 'LU', 'full': 'Lucerne'},
            {'abbr': 'UR', 'full': 'Uri'}, {'abbr': 'SZ', 'full': 'Schwyz'}, {'abbr': 'OW', 'full': 'Obwalden'},
            {'abbr': 'NW', 'full': 'Nidwalden'}, {'abbr': 'GL', 'full': 'Glarus'}, {'abbr': 'ZG', 'full': 'Zug'},
            {'abbr': 'FR', 'full': 'Fribourg'}, {'abbr': 'SO', 'full': 'Solothurn'}, {'abbr': 'BS', 'full': 'Basel-Stadt'},
            {'abbr': 'BL', 'full': 'Basel-Landschaft'}, {'abbr': 'SH', 'full': 'Schaffhausen'}, {'abbr': 'AR', 'full': 'Appenzell Ausserrhoden'},
            {'abbr': 'AI', 'full': 'Appenzell Innerrhoden'}, {'abbr': 'SG', 'full': 'St. Gallen'}, {'abbr': 'GR', 'full': 'Graubünden'},
            {'abbr': 'AG', 'full': 'Aargau'}, {'abbr': 'TG', 'full': 'Thurgau'}, {'abbr': 'TI', 'full': 'Ticino'},
            {'abbr': 'VD', 'full': 'Vaud'}, {'abbr': 'VS', 'full': 'Valais'}, {'abbr': 'NE', 'full': 'Neuchâtel'},
            {'abbr': 'GE', 'full': 'Geneva'}, {'abbr': 'JU', 'full': 'Jura'}
        ], 'zip_generator': generate_plausible_zip},
        'Italy': {'country_code': 'IT', 'states': [
            {'abbr': 'ABR', 'full': 'Abruzzo'}, {'abbr': 'BAS', 'full': 'Basilicata'}, {'abbr': 'CAL', 'full': 'Calabria'},
            {'abbr': 'CAM', 'full': 'Campania'}, {'abbr': 'EMR', 'full': 'Emilia-Romagna'}, {'abbr': 'FVG', 'full': 'Friuli-Venezia Giulia'},
            {'abbr': 'LAZ', 'full': 'Lazio'}, {'abbr': 'LIG', 'full': 'Liguria'}, {'abbr': 'LOM', 'full': 'Lombardy'},
            {'abbr': 'MAR', 'full': 'Marche'}, {'abbr': 'MOL', 'full': 'Molise'}, {'abbr': 'PIE', 'full': 'Piedmont'},
            {'abbr': 'PUG', 'full': 'Apulia'}, {'abbr': 'SAR', 'full': 'Sardinia'}, {'abbr': 'SIC', 'full': 'Sicily'},
            {'abbr': 'TOS', 'full': 'Tuscany'}, {'abbr': 'TAA', 'full': 'Trentino-South Tyrol'}, {'abbr': 'UMB', 'full': 'Umbria'},
            {'abbr': 'VAO', 'full': 'Aosta Valley'}, {'abbr': 'VEN', 'full': 'Veneto'}
        ], 'zip_generator': generate_plausible_zip},
        'Spain': {'country_code': 'ES', 'states': [
            {'abbr': 'AN', 'full': 'Andalusia'}, {'abbr': 'AR', 'full': 'Aragon'}, {'abbr': 'AS', 'full': 'Asturias'},
            {'abbr': 'CB', 'full': 'Cantabria'}, {'abbr': 'CM', 'full': 'Castile-La Mancha'}, {'abbr': 'CL', 'full': 'Castile and León'},
            {'abbr': 'CT', 'full': 'Catalonia'}, {'abbr': 'EX', 'full': 'Extremadura'}, {'abbr': 'GA', 'full': 'Galicia'},
            {'abbr': 'IB', 'full': 'Balearic Islands'}, {'abbr': 'RI', 'full': 'La Rioja'}, {'abbr': 'MD', 'full': 'Madrid'},
            {'abbr': 'MC', 'full': 'Murcia'}, {'abbr': 'NC', 'full': 'Navarre'}, {'abbr': 'PV', 'full': 'Basque Country'},
            {'abbr': 'VC', 'full': 'Valencian Community'}, {'abbr': 'CE', 'full': 'Ceuta'}, {'abbr': 'ML', 'full': 'Melilla'},
            {'abbr': 'CN', 'full': 'Canary Islands'}
        ], 'zip_generator': generate_plausible_zip},
        'Belgium': {'country_code': 'BE', 'states': [
            {'abbr': 'BRU', 'full': 'Brussels-Capital Region'}, {'abbr': 'WAL', 'full': 'Wallonia'}, {'abbr': 'VLG', 'full': 'Flanders'}
        ], 'zip_generator': generate_plausible_zip},
        'Austria': {'country_code': 'AT', 'states': [
            {'abbr': 'BGL', 'full': 'Burgenland'}, {'abbr': 'KNT', 'full': 'Carinthia'}, {'abbr': 'NOE', 'full': 'Lower Austria'},
            {'abbr': 'OÖ', 'full': 'Upper Austria'}, {'abbr': 'SAL', 'full': 'Salzburg'}, {'abbr': 'STE', 'full': 'Styria'},
            {'abbr': 'TIR', 'full': 'Tyrol'}, {'abbr': 'VOE', 'full': 'Vorarlberg'}, {'abbr': 'WIE', 'full': 'Vienna'}
        ], 'zip_generator': generate_plausible_zip},
        'Denmark': {'country_code': 'DK', 'states': [
            {'abbr': 'H', 'full': 'Capital Region of Denmark'}, {'abbr': 'M', 'full': 'Central Denmark Region'},
            {'abbr': 'S', 'full': 'Region of Southern Denmark'}, {'abbr': 'ND', 'full': 'North Denmark Region'},
            {'abbr': 'SJ', 'full': 'Zealand Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Finland': {'country_code': 'FI', 'states': [
            {'abbr': 'ES', 'full': 'Southern Ostrobothnia'}, {'abbr': 'LS', 'full': 'South Karelia'}, {'abbr': 'OL', 'full': 'North Ostrobothnia'},
            {'abbr': 'VA', 'full': 'Vaasa'}, {'abbr': 'AS', 'full': 'Åland Islands'}, {'abbr': 'HA', 'full': 'Central Ostrobothnia'},
            {'abbr': 'KA', 'full': 'Kainuu'}, {'abbr': 'KE', 'full': 'Central Finland'}, {'abbr': 'KM', 'full': 'Kymenlaakso'},
            {'abbr': 'KU', 'full': 'Pirkanmaa'}, {'abbr': 'LA', 'full': 'Lapland'}, {'abbr': 'PH', 'full': 'North Karelia'},
            {'abbr': 'PS', 'full': 'Northern Savonia'}, {'abbr': 'PV', 'full': 'South Karelia'}, {'abbr': 'SA', 'full': 'Satakunta'},
            {'abbr': 'SS', 'full': 'Southern Savonia'}, {'abbr': 'TA', 'full': 'Tavastia Proper'}, {'abbr': 'US', 'full': 'Uusimaa'}
        ], 'zip_generator': generate_plausible_zip},
        'Greece': {'country_code': 'GR', 'states': [
            {'abbr': 'AT', 'full': 'Attica'}, {'abbr': 'MK', 'full': 'Central Macedonia'}, {'abbr': 'CR', 'full': 'Crete'},
            {'abbr': 'EM', 'full': 'East Macedonia and Thrace'}, {'abbr': 'EP', 'full': 'Epirus'},
            {'abbr': 'IO', 'full': 'Ionian Islands'}, {'abbr': 'NA', 'full': 'North Aegean'},
            {'abbr': 'PC', 'full': 'Peloponnese'}, {'abbr': 'SM', 'full': 'South Aegean'},
            {'abbr': 'TH', 'full': 'Thessaly'}, {'abbr': 'WM', 'full': 'West Macedonia'},
            {'abbr': 'WG', 'full': 'West Greece'}
        ], 'zip_generator': generate_plausible_zip},
        'Iceland': {'country_code': 'IS', 'states': [
            {'abbr': 'HO', 'full': 'Capital Region'}, {'abbr': 'NV', 'full': 'South Region'}, {'abbr': 'SU', 'full': 'Westfjords'},
            {'abbr': 'V', 'full': 'Western Region'}, {'abbr': 'VE', 'full': 'Southern Peninsula'},
            {'abbr': 'AU', 'full': 'East Region'}, {'abbr': 'NO', 'full': 'Northeastern Region'},
            {'abbr': 'W', 'full': 'Northwestern Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Ireland': {'country_code': 'IE', 'states': [
            {'abbr': 'CW', 'full': 'Carlow'}, {'abbr': 'CN', 'full': 'Cavan'}, {'abbr': 'CE', 'full': 'Clare'},
            {'abbr': 'CO', 'full': 'Cork'}, {'abbr': 'DL', 'full': 'Donegal'}, {'abbr': 'D', 'full': 'Dublin'},
            {'abbr': 'G', 'full': 'Galway'}, {'abbr': 'KY', 'full': 'Kerry'}, {'abbr': 'KE', 'full': 'Kildare'},
            {'abbr': 'KK', 'full': 'Kilkenny'}, {'abbr': 'LS', 'full': 'Laois'}, {'abbr': 'LM', 'full': 'Leitrim'},
            {'abbr': 'L', 'full': 'Limerick'}, {'abbr': 'LD', 'full': 'Longford'}, {'abbr': 'LH', 'full': 'Louth'},
            {'abbr': 'MO', 'full': 'Mayo'}, {'abbr': 'MH', 'full': 'Meath'}, {'abbr': 'MN', 'full': 'Monaghan'},
            {'abbr': 'OY', 'full': 'Offaly'}, {'abbr': 'RN', 'full': 'Roscommon'}, {'abbr': 'SO', 'full': 'Sligo'},
            {'abbr': 'TA', 'full': 'Tipperary'}, {'abbr': 'W', 'full': 'Waterford'}, {'abbr': 'WM', 'full': 'Westmeath'},
            {'abbr': 'WX', 'full': 'Wexford'}, {'abbr': 'WW', 'full': 'Wicklow'}
        ], 'zip_generator': generate_plausible_zip},
        'Israel': {'country_code': 'IL', 'states': [
            {'abbr': 'HA', 'full': 'Haifa District'}, {'abbr': 'JM', 'full': 'Jerusalem District'},
            {'abbr': 'NO', 'full': 'Northern District'}, {'abbr': 'CE', 'full': 'Central District'},
            {'abbr': 'TA', 'full': 'Tel Aviv District'}, {'abbr': 'SO', 'full': 'Southern District'}
        ], 'zip_generator': generate_plausible_zip},
        'Luxembourg': {'country_code': 'LU', 'states': [
            {'abbr': 'DS', 'full': 'Diekirch District'}, {'abbr': 'LUX', 'full': 'Luxembourg District'},
            {'abbr': 'GR', 'full': 'Grevenmacher District'}
        ], 'zip_generator': generate_plausible_zip},
        'Monaco': {'country_code': 'MC', 'states': [{'abbr': 'MCO', 'full': 'Monaco'}], 'zip_generator': generate_plausible_zip},
        'Poland': {'country_code': 'PL', 'states': [
            {'abbr': 'DS', 'full': 'Lower Silesian Voivodeship'}, {'abbr': 'KP', 'full': 'Kuyavian-Pomeranian Voivodeship'},
            {'abbr': 'LD', 'full': 'Łódź Voivodeship'}, {'abbr': 'LU', 'full': 'Lublin Voivodeship'},
            {'abbr': 'LB', 'full': 'Lubusz Voivodeship'}, {'abbr': 'MA', 'full': 'Lesser Poland Voivodeship'},
            {'abbr': 'MZ', 'full': 'Masovian Voivodeship'}, {'abbr': 'OP', 'full': 'Opole Voivodeship'},
            {'abbr': 'PK', 'full': 'Subcarpathian Voivodeship'}, {'abbr': 'PD', 'full': 'Podlaskie Voivodeship'},
            {'abbr': 'PM', 'full': 'Pomeranian Voivodeship'}, {'abbr': 'SL', 'full': 'Silesian Voivodeship'},
            {'abbr': 'SW', 'full': 'Świętokrzyskie Voivodeship'}, {'abbr': 'WN', 'full': 'Warmian-Masurian Voivodeship'},
            {'abbr': 'WP', 'full': 'Greater Poland Voivodeship'}, {'abbr': 'ZP', 'full': 'West Pomeranian Voivodeship'}
        ], 'zip_generator': generate_plausible_zip},
        'Portugal': {'country_code': 'PT', 'states': [
            {'abbr': 'AV', 'full': 'Aveiro'}, {'abbr': 'BA', 'full': 'Beja'}, {'abbr': 'BRG', 'full': 'Braga'},
            {'abbr': 'BRC', 'full': 'Bragança'}, {'abbr': 'CB', 'full': 'Castelo Branco'}, {'abbr': 'CO', 'full': 'Coimbra'},
            {'abbr': 'EV', 'full': 'Évora'}, {'abbr': 'FA', 'full': 'Faro'}, {'abbr': 'GU', 'full': 'Guarda'},
            {'abbr': 'LE', 'full': 'Leiria'}, {'abbr': 'LI', 'full': 'Lisbon'}, {'abbr': 'PO', 'full': 'Portalegre'},
            {'abbr': 'PT', 'full': 'Porto'}, {'abbr': 'SA', 'full': 'Santarém'}, {'abbr': 'ST', 'full': 'Setúbal'},
            {'abbr': 'VC', 'full': 'Viana do Castelo'}, {'abbr': 'VL', 'full': 'Vila Real'}, {'abbr': 'VI', 'full': 'Viseu'},
            {'abbr': 'AZO', 'full': 'Azores'}, {'abbr': 'MADE', 'full': 'Madeira'}
        ], 'zip_generator': generate_plausible_zip},
        'Slovenia': {'country_code': 'SI', 'states': [
            {'abbr': 'LJ', 'full': 'Ljubljana'}, {'abbr': 'MB', 'full': 'Maribor'}, {'abbr': 'KR', 'full': 'Kranj'},
            {'abbr': 'CE', 'full': 'Celje'}, {'abbr': 'KPR', 'full': 'Koper'}, {'abbr': 'NGR', 'full': 'Nova Gorica'},
            {'abbr': 'MS', 'full': 'Murska Sobota'}, {'abbr': 'NM', 'full': 'Novo mesto'}, {'abbr': 'PO', 'full': 'Postojna'}
        ], 'zip_generator': generate_plausible_zip},
        'Slovakia': {'country_code': 'SK', 'states': [
            {'abbr': 'BA', 'full': 'Bratislava Region'}, {'abbr': 'TT', 'full': 'Trnava Region'},
            {'abbr': 'NR', 'full': 'Nitra Region'}, {'abbr': 'TN', 'full': 'Trenčín Region'},
            {'abbr': 'ZA', 'full': 'Žilina Region'}, {'abbr': 'BC', 'full': 'Banská Bystrica Region'},
            {'abbr': 'PO', 'full': 'Prešov Region'}, {'abbr': 'KE', 'full': 'Košice Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Czech Republic': {'country_code': 'CZ', 'states': [
            {'abbr': 'PR', 'full': 'Prague'}, {'abbr': 'ST', 'full': 'Central Bohemian Region'},
            {'abbr': 'JC', 'full': 'South Bohemian Region'}, {'abbr': 'PL', 'full': 'Plzeň Region'},
            {'abbr': 'KA', 'full': 'Karlovy Vary Region'}, {'abbr': 'US', 'full': 'Ústí nad Labem Region'},
            {'abbr': 'LI', 'full': 'Liberec Region'}, {'abbr': 'KR', 'full': 'Hradec Králové Region'},
            {'abbr': 'PA', 'full': 'Pardubice Region'}, {'abbr': 'OL', 'full': 'Olomouc Region'},
            {'abbr': 'ZL', 'full': 'Zlín Region'}, {'abbr': 'JM', 'full': 'South Moravian Region'},
            {'abbr': 'MO', 'full': 'Moravian-Silesian Region'}, {'abbr': 'VY', 'full': 'Vysočina Region'}
        ], 'zip_generator': generate_plausible_zip},
        'Hungary': {'country_code': 'HU', 'states': [
            {'abbr': 'BU', 'full': 'Budapest'}, {'abbr': 'BA', 'full': 'Baranya County'}, {'abbr': 'BCS', 'full': 'Bács-Kiskun County'},
            {'abbr': 'BE', 'full': 'Békés County'}, {'abbr': 'BZ', 'full': 'Borsod-Abaúj-Zemplén County'},
            {'abbr': 'CS', 'full': 'Csongrád-Csanád County'}, {'abbr': 'FE', 'full': 'Fejér County'},
            {'abbr': 'GS', 'full': 'Győr-Moson-Sopron County'}, {'abbr': 'HB', 'full': 'Hajdú-Bihar County'},
            {'abbr': 'HE', 'full': 'Heves County'}, {'abbr': 'JN', 'full': 'Jász-Nagykun-Szolnok County'},
            {'abbr': 'KO', 'full': 'Komárom-Esztergom County'}, {'abbr': 'NO', 'full': 'Nógrád County'},
            {'abbr': 'PE', 'full': 'Pest County'}, {'abbr': 'SO', 'full': 'Somogy County'}, {'abbr': 'SZ', 'full': 'Szabolcs-Szatmár-Bereg County'},
            {'abbr': 'TO', 'full': 'Tolna County'}, {'abbr': 'VA', 'full': 'Vas County'}, {'abbr': 'VE', 'full': 'Veszprém County'},
            {'abbr': 'ZA', 'full': 'Zala County'}
        ], 'zip_generator': generate_plausible_zip},
    },
    'Asia Pacific': {
        'China': {'country_code': 'CN', 'states': [
            {'abbr': 'AH', 'full': 'Anhui'}, {'abbr': 'BJ', 'full': 'Beijing'}, {'abbr': 'CQ', 'full': 'Chongqing'},
            {'abbr': 'FJ', 'full': 'Fujian'}, {'abbr': 'GS', 'full': 'Gansu'}, {'abbr': 'GD', 'full': 'Guangdong'},
            {'abbr': 'GX', 'full': 'Guangxi'}, {'abbr': 'GZ', 'full': 'Guizhou'}, {'abbr': 'HA', 'full': 'Hainan'},
            {'abbr': 'HEB', 'full': 'Hebei'}, {'abbr': 'HLJ', 'full': 'Heilongjiang'}, {'abbr': 'HEN', 'full': 'Henan'},
            {'abbr': 'HK', 'full': 'Hong Kong'}, {'abbr': 'HUB', 'full': 'Hubei'}, {'abbr': 'HUN', 'full': 'Hunan'},
            {'abbr': 'NM', 'full': 'Inner Mongolia'}, {'abbr': 'JS', 'full': 'Jiangsu'}, {'abbr': 'JX', 'full': 'Jiangxi'},
            {'abbr': 'JL', 'full': 'Jilin'}, {'abbr': 'LN', 'full': 'Liaoning'}, {'abbr': 'MC', 'full': 'Macao'},
            {'abbr': 'NX', 'full': 'Ningxia'}, {'abbr': 'QH', 'full': 'Qinghai'}, {'abbr': 'SN', 'full': 'Shaanxi'},
            {'abbr': 'SD', 'full': 'Shandong'}, {'abbr': 'SH', 'full': 'Shanghai'}, {'abbr': 'SX', 'full': 'Shanxi'},
            {'abbr': 'SC', 'full': 'Sichuan'}, {'abbr': 'TJ', 'full': 'Tianjin'}, {'abbr': 'XJ', 'full': 'Xinjiang'},
            {'abbr': 'XZ', 'full': 'Tibet'}, {'abbr': 'YN', 'full': 'Yunnan'}, {'abbr': 'ZJ', 'full': 'Zhejiang'}
        ], 'zip_generator': generate_plausible_zip},
        'Taiwan': {'country_code': 'TW', 'states': [
            {'abbr': 'TP', 'full': 'Taipei City'}, {'abbr': 'KS', 'full': 'Kaohsiung City'},
            {'abbr': 'TC', 'full': 'Taichung City'}, {'abbr': 'TN', 'full': 'Tainan City'},
            {'abbr': 'TY', 'full': 'Taoyuan City'}, {'abbr': 'HSZ', 'full': 'Hsinchu City'},
            {'abbr': 'CY', 'full': 'Chiayi City'}, {'abbr': 'KE', 'full': 'Keelung City'},
            {'abbr': 'TWP', 'full': 'Taiwan Province'}, {'abbr': 'FUK', 'full': 'Fukien Province (Kinmen & Lienchiang)'}
        ], 'zip_generator': generate_plausible_zip},
        'South Korea': {'country_code': 'KR', 'states': [
            {'abbr': 'SO', 'full': 'Seoul'}, {'abbr': 'BU', 'full': 'Busan'}, {'abbr': 'DAE', 'full': 'Daegu'},
            {'abbr': 'IN', 'full': 'Incheon'}, {'abbr': 'GJ', 'full': 'Gwangju'}, {'abbr': 'DJ', 'full': 'Daejeon'},
            {'abbr': 'UL', 'full': 'Ulsan'}, {'abbr': 'SEJ', 'full': 'Sejong City'}, {'abbr': 'GG', 'full': 'Gyeonggi Province'},
            {'abbr': 'GW', 'full': 'Gangwon Province'}, {'abbr': 'CB', 'full': 'North Chungcheong Province'},
            {'abbr': 'CN', 'full': 'South Chungcheong Province'}, {'abbr': 'JB', 'full': 'North Jeolla Province'},
            {'abbr': 'JN', 'full': 'South Jeolla Province'}, {'abbr': 'GB', 'full': 'North Gyeongsang Province'},
            {'abbr': 'GN', 'full': 'South Gyeongsang Province'}, {'abbr': 'JJ', 'full': 'Jeju Province'}
        ], 'zip_generator': generate_plausible_zip},
        'Japan': {'country_code': 'JP', 'states': [
            {'abbr': 'HK', 'full': 'Hokkaido'}, {'abbr': 'AO', 'full': 'Aomori'}, {'abbr': 'IW', 'full': 'Iwate'},
            {'abbr': 'MI', 'full': 'Miyagi'}, {'abbr': 'AK', 'full': 'Akita'}, {'abbr': 'YA', 'full': 'Yamagata'},
            {'abbr': 'FU', 'full': 'Fukushima'}, {'abbr': 'IB', 'full': 'Ibaraki'}, {'abbr': 'TC', 'full': 'Tochigi'},
            {'abbr': 'GU', 'full': 'Gunma'}, {'abbr': 'SA', 'full': 'Saitama'}, {'abbr': 'CH', 'full': 'Chiba'},
            {'abbr': 'TO', 'full': 'Tokyo'}, {'abbr': 'KA', 'full': 'Kanagawa'}, {'abbr': 'NI', 'full': 'Niigata'},
            {'abbr': 'TOY', 'full': 'Toyama'}, {'abbr': 'IS', 'full': 'Ishikawa'}, {'abbr': 'FU_2', 'full': 'Fukui'},
            {'abbr': 'YA_2', 'full': 'Yamanashi'}, {'abbr': 'NA', 'full': 'Nagano'}, {'abbr': 'GI', 'full': 'Gifu'},
            {'abbr': 'SH', 'full': 'Shizuoka'}, {'abbr': 'AI', 'full': 'Aichi'}, {'abbr': 'MI_2', 'full': 'Mie'},
            {'abbr': 'SH_2', 'full': 'Shiga'}, {'abbr': 'KY', 'full': 'Kyoto'}, {'abbr': 'OS', 'full': 'Osaka'},
            {'abbr': 'HY', 'full': 'Hyogo'}, {'abbr': 'NA_2', 'full': 'Nara'}, {'abbr': 'WA', 'full': 'Wakayama'},
            {'abbr': 'TO_2', 'full': 'Tottori'}, {'abbr': 'SH_3', 'full': 'Shimane'}, {'abbr': 'OK', 'full': 'Okayama'},
            {'abbr': 'HI', 'full': 'Hiroshima'}, {'abbr': 'YA_3', 'full': 'Yamaguchi'}, {'abbr': 'TO_3', 'full': 'Tokushima'},
            {'abbr': 'KA_2', 'full': 'Kagawa'}, {'abbr': 'EH', 'full': 'Ehime'}, {'abbr': 'KO', 'full': 'Kochi'},
            {'abbr': 'FU_3', 'full': 'Fukuoka'}, {'abbr': 'SA_2', 'full': 'Saga'}, {'abbr': 'NA_3', 'full': 'Nagasaki'},
            {'abbr': 'KU', 'full': 'Kumamoto'}, {'abbr': 'OI', 'full': 'Oita'}, {'abbr': 'MI_3', 'full': 'Miyazaki'},
            {'abbr': 'KA_3', 'full': 'Kagoshima'}, {'abbr': 'OKI', 'full': 'Okinawa'}
        ], 'zip_generator': generate_plausible_zip},
        'Australia': {'country_code': 'AU', 'states': [
            {'abbr': 'NSW', 'full': 'New South Wales'}, {'abbr': 'VIC', 'full': 'Victoria'},
            {'abbr': 'QLD', 'full': 'Queensland'}, {'abbr': 'SA', 'full': 'South Australia'},
            {'abbr': 'WA', 'full': 'Western Australia'}, {'abbr': 'TAS', 'full': 'Tasmania'},
            {'abbr': 'ACT', 'full': 'Australian Capital Territory'}, {'abbr': 'NT', 'full': 'Northern Territory'}
        ], 'zip_generator': generate_plausible_zip},
        'New Zealand': {'country_code': 'NZ', 'states': [
            {'abbr': 'AUK', 'full': 'Auckland'}, {'abbr': 'BOP', 'full': 'Bay of Plenty'}, {'abbr': 'CAN', 'full': 'Canterbury'},
            {'abbr': 'GIS', 'full': 'Gisborne'}, {'abbr': 'HKB', 'full': 'Hawke\'s Bay'}, {'abbr': 'MWT', 'full': 'Manawatū-Whanganui'},
            {'abbr': 'MBH', 'full': 'Marlborough'}, {'abbr': 'NSN', 'full': 'Nelson'}, {'abbr': 'NTL', 'full': 'Northland'},
            {'abbr': 'OTA', 'full': 'Otago'}, {'abbr': 'STL', 'full': 'Southland'}, {'abbr': 'TKI', 'full': 'Taranaki'},
            {'abbr': 'TAS', 'full': 'Tasman'}, {'abbr': 'WAI', 'full': 'Waikato'}, {'abbr': 'WLG', 'full': 'Wellington'},
            {'abbr': 'WTC', 'full': 'West Coast'}
        ], 'zip_generator': generate_plausible_zip},
        'Thailand': {'country_code': 'TH', 'states': [
            {'abbr': 'BKK', 'full': 'Bangkok'}, {'abbr': 'CM', 'full': 'Chiang Mai'}, {'abbr': 'CN', 'full': 'Chiang Rai'},
            {'abbr': 'TRT', 'full': 'Trat'}, {'abbr': 'PN', 'full': 'Pattani'}, {'abbr': 'PH', 'full': 'Phuket'},
            {'abbr': 'URT', 'full': 'Surat Thani'}, {'abbr': 'SN', 'full': 'Songkhla'}, {'abbr': 'UBN', 'full': 'Ubon Ratchathani'}
        ], 'zip_generator': generate_plausible_zip},
        'Singapore': {'country_code': 'SG', 'states': [{'abbr': 'SGP', 'full': 'Singapore'}], 'zip_generator': generate_plausible_zip},
        'Malaysia': {'country_code': 'MY', 'states': [
            {'abbr': 'JHR', 'full': 'Johor'}, {'abbr': 'KDH', 'full': 'Kedah'}, {'abbr': 'KLT', 'full': 'Kelantan'},
            {'abbr': 'MLK', 'full': 'Malacca'}, {'abbr': 'NSN', 'full': 'Negeri Sembilan'}, {'abbr': 'PHG', 'full': 'Pahang'},
            {'abbr': 'PRK', 'full': 'Perak'}, {'abbr': 'PLS', 'full': 'Perlis'}, {'abbr': 'PNG', 'full': 'Penang'},
            {'abbr': 'SBA', 'full': 'Sabah'}, {'abbr': 'SWK', 'full': 'Sarawak'}, {'abbr': 'SGR', 'full': 'Selangor'},
            {'abbr': 'TRG', 'full': 'Terengganu'}, {'abbr': 'KUL', 'full': 'Kuala Lumpur'}, {'abbr': 'LBN', 'full': 'Labuan'},
            {'abbr': 'PUT', 'full': 'Putrajaya'}
        ], 'zip_generator': generate_plausible_zip}
    }
}


# --------------------------
# Generates the dimension tables
# --------------------------

def generate_dim_product():
    """生成 Dim_Product 维度表"""
    # 原始车型数据
    products_data = [
        {'Model_ID': 1, 'Model_Name': 'Model S', 'Standard_Price_USD': 74990, 'Launch_Date': '2012-06-01', 'Description': 'Luxury electric sedan'},
        {'Model_ID': 2, 'Model_Name': 'Model 3', 'Standard_Price_USD': 38990, 'Launch_Date': '2017-07-28', 'Description': 'Affordable electric sedan'},
        {'Model_ID': 3, 'Model_Name': 'Model X', 'Standard_Price_USD': 79990, 'Launch_Date': '2015-09-29', 'Description': 'Luxury electric SUV with Falcon Wing doors'},
        {'Model_ID': 4, 'Model_Name': 'Model Y', 'Standard_Price_USD': 43990, 'Launch_Date': '2020-03-13', 'Description': 'Compact electric SUV'},
        {'Model_ID': 5, 'Model_Name': 'Cybertruck', 'Standard_Price_USD': 60990, 'Launch_Date': '2023-11-30', 'Description': 'Futuristic electric pickup truck'}
    ]
    dim_product_df = pd.DataFrame(products_data)

    # 1. ADDED: Add 'Model_Category' column based on 'Model_Name'
    def get_category(model_name):
        if 'Model S' in model_name or 'Model 3' in model_name:
            return 'Sedan'
        elif 'Model X' in model_name or 'Model Y' in model_name:
            return 'SUV'
        elif 'Cybertruck' in model_name:
            return 'Truck'
        else:
            return 'Other'

    dim_product_df['Model_Category'] = dim_product_df['Model_Name'].apply(get_category)
    
    # 确保列顺序，以便在 Power BI 中更容易管理
    dim_product_df = dim_product_df[['Model_ID', 'Model_Name', 'Model_Category', 'Standard_Price_USD', 'Launch_Date', 'Description']]

    return dim_product_df


def generate_dim_time(start_date, end_date):
    """
    生成 Dim_Time 维度表
    """
    # 将日期范围转换为日期序列
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    dim_time_df = pd.DataFrame({'Date': date_range})

    # 2. ADDED: Add 'Time_ID' column (as integer YYYYMMDD)
    dim_time_df['Time_ID'] = dim_time_df['Date'].dt.strftime('%Y%m%d').astype(int)

    # 添加其他时间属性列
    dim_time_df['Full_Date'] = dim_time_df['Date'].dt.strftime('%Y-%m-%d')
    dim_time_df['Year'] = dim_time_df['Date'].dt.year
    dim_time_df['Quarter'] = dim_time_df['Date'].dt.quarter
    dim_time_df['Month'] = dim_time_df['Date'].dt.month
    dim_time_df['Day_of_Month'] = dim_time_df['Date'].dt.day
    dim_time_df['Day_of_Week'] = dim_time_df['Date'].dt.dayofweek + 1 # Monday=1, Sunday=7
    dim_time_df['Week_of_Year'] = dim_time_df['Date'].dt.isocalendar().week.astype(int)
    dim_time_df['Day_Name'] = dim_time_df['Date'].dt.day_name()
    dim_time_df['Month_Name'] = dim_time_df['Date'].dt.month_name()
    dim_time_df['Quarter_Name'] = 'Q' + dim_time_df['Quarter'].astype(str)
    
    # 确保列顺序
    dim_time_df = dim_time_df[['Time_ID', 'Date', 'Full_Date', 'Year', 'Quarter', 'Month', 'Day_of_Month', 'Day_of_Week', 'Week_of_Year', 'Day_Name', 'Month_Name', 'Quarter_Name']]

    return dim_time_df


def generate_dim_customer(num_customers):
    """
    生成 Dim_Customer 维度表
    """
    first_names = ['Liam', 'Olivia', 'Noah', 'Emma', 'Oliver', 'Charlotte', 'Elijah', 'Amelia', 'James', 'Ava', 'William', 'Sophia']
    last_names = ['Smith', 'Jones', 'Williams', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson']

    customers = []
    for i in range(1, num_customers + 1):
        customer_id = i
        # 3. ADDED: Add 'Customer_Name'
        customer_name = f"{random.choice(first_names)} {random.choice(last_names)}"
        customers.append({
            'Customer_ID': customer_id,
            'Customer_Name': customer_name,
            'Loyalty_Status': random.choice(['Bronze', 'Silver', 'Gold', 'Platinum'])
        })
    dim_customer_df = pd.DataFrame(customers)
    return dim_customer_df


def generate_sales_and_geography(num_sales, start_date, end_date):
    """
    生成 Fact_Sales 和 Dim_Geography 表
    """
    sales_data = []
    
    # 获取所有地理位置的列表
    all_locations = []
    for region, countries in tesla_countries.items():
        for country, data in countries.items():
            for state in data['states']:
                all_locations.append({
                    'Country': country,
                    'Country_Code': data['country_code'],
                    'State_Province_Abbr': state['abbr'],
                    'State_Province_Full': state['full'],
                    'Zip_Code': data['zip_generator'](country, state['abbr'])
                })

    # 为确保每个州都有数据，先为每个州生成一条销售记录
    for i, loc in enumerate(all_locations):
        sale_date = start_date + datetime.timedelta(days=random.randint(0, (end_date - start_date).days))
        sales_data.append({
            'Sale_Date': sale_date,
            'Product_ID': random.randint(1, 5),
            'Customer_ID': random.randint(1, 200000),
            'Quantity': 1,
            'Sale_Price_USD': 0, # Placeholder, will be calculated later
            'Country': loc['Country'],
            'State_Province': loc['State_Province_Full'],
            'Zip_Code': loc['Zip_Code']
        })

    # 生成剩余的销售数据
    for _ in range(num_sales - len(all_locations)):
        loc = random.choice(all_locations)
        sale_date = start_date + datetime.timedelta(days=random.randint(0, (end_date - start_date).days))
        sales_data.append({
            'Sale_Date': sale_date,
            'Product_ID': random.randint(1, 5),
            'Customer_ID': random.randint(1, 200000),
            'Quantity': random.choice([1, 1, 1, 1, 2]),
            'Sale_Price_USD': 0,
            'Country': loc['Country'],
            'State_Province': loc['State_Province_Full'],
            'Zip_Code': loc['Zip_Code']
        })

    fact_sales_df = pd.DataFrame(sales_data)
    
    # ADDED: 将 'Sale_Date' 列转换为日期时间类型以启用 `.dt` 属性
    fact_sales_df['Sale_Date'] = pd.to_datetime(fact_sales_df['Sale_Date'])
    
    # 生成 Dim_Geography
    dim_geography_data = []
    
    # 4. ADDED: Create a unique 'Geo_ID' for each unique location
    unique_locations = fact_sales_df[['Country', 'State_Province', 'Zip_Code']].drop_duplicates().reset_index(drop=True)
    unique_locations['Geo_ID'] = unique_locations.index + 1  # 确保ID从1开始

    # 5. ADDED: Add 'Geo_ID' to Fact_Sales table
    fact_sales_df = pd.merge(fact_sales_df, unique_locations, on=['Country', 'State_Province', 'Zip_Code'], how='left')

    # 将唯一的地理位置信息保存到 Dim_Geography
    dim_geography_df = unique_locations.rename(columns={'State_Province': 'State_Province_Full'})
    
    # 确保 Dim_Geography 有 'State_Province_Abbr' 列以便在 Power BI 中使用
    state_map = {state['full']: state['abbr'] for region, countries in tesla_countries.items() for country, data in countries.items() for state in data['states']}
    dim_geography_df['State_Province_Abbr'] = dim_geography_df['State_Province_Full'].map(state_map).fillna('')
    
    # 确保列顺序
    dim_geography_df = dim_geography_df[['Geo_ID', 'Country', 'State_Province_Full', 'State_Province_Abbr', 'Zip_Code']]
    
    # 添加 'Time_ID' 到事实表
    fact_sales_df['Time_ID'] = fact_sales_df['Sale_Date'].dt.strftime('%Y%m%d').astype(int)
    
    return fact_sales_df, dim_geography_df


# --------------------------
# Main function
# --------------------------

def main():
    print("正在生成数据表...")

    # 设置日期范围和数据行数
    start_date = datetime.date(2023, 1, 1)
    end_date = datetime.date(2025, 12, 31)
    num_sales = 1000000 # 100万行数据

    # 生成维度表和事实表
    dim_product_df = generate_dim_product()
    dim_time_df = generate_dim_time(start_date, end_date)
    dim_customer_df = generate_dim_customer(num_customers=200000)

    # 生成销售事实表和地理维度表
    fact_sales_df, dim_geography_df = generate_sales_and_geography(num_sales, start_date, end_date)

    # 计算销售价格
    dim_product_map = dim_product_df.set_index('Model_ID')['Standard_Price_USD'].to_dict()
    
    # 确保 fact_sales_df['Product_ID'] 的数据类型是可哈希的，以便进行映射
    fact_sales_df['Sale_Price_USD'] = fact_sales_df['Product_ID'].map(dim_product_map)
    fact_sales_df['Sale_Price_USD'] = fact_sales_df['Sale_Price_USD'] * fact_sales_df['Quantity'] * (1 - np.random.rand(len(fact_sales_df)) * 0.1)
    fact_sales_df['Sale_Price_USD'] = fact_sales_df['Sale_Price_USD'].apply(lambda x: math.ceil(x / 100) * 100) # 模拟价格调整

    # 添加外键
    fact_sales_df = pd.merge(fact_sales_df, dim_customer_df[['Customer_ID', 'Customer_Name']], on='Customer_ID', how='left')

    # 选择 Fact_Sales 的最终列
    fact_sales_df = fact_sales_df[['Sale_Date', 'Time_ID', 'Product_ID', 'Customer_ID', 'Customer_Name', 'Geo_ID', 'Quantity', 'Sale_Price_USD']]


    # 保存到CSV
    dim_product_df.to_csv('Dim_Product.csv', index=False, encoding='utf-8')
    dim_time_df.to_csv('Dim_Time.csv', index=False, encoding='utf-8')
    dim_customer_df.to_csv('Dim_Customer.csv', index=False, encoding='utf-8')
    dim_geography_df.to_csv('Dim_Geography.csv', index=False, encoding='utf-8')
    fact_sales_df.to_csv('Fact_Sales.csv', index=False, encoding='utf-8')

    print("数据生成完成，文件已保存到 Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, and Fact_Sales.csv")


if __name__ == "__main__":
    main()

正在生成数据表...
数据生成完成，文件已保存到 Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, and Fact_Sales.csv


**第二版GPU加速代码**

In [6]:
# -*- coding: utf-8 -*-
"""Tesla Simulated Sales Data Generator

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1qSg43jfMBtM4DZY_bMze_D93QjNTqAZo
"""

# tesla_sales_data_generator_star_schema.py
# 完整的星型模式数据生成脚本
# 目的：为每个国家的所有州/省/直辖市/特别行政区生成 Tesla 销售记录
# - 确保每个州至少有一条记录（避免地图空白）
# - 优化新西兰邮编生成，尽量使用现实可用的四位格式
# - 生成完整的星型模式：Fact_Sales, Dim_Product, Dim_Time, Dim_Geography, Dim_Prices, Dim_Customer
# 使用：python tesla_sales_data_generator_star_schema.py

import pandas as pd
import numpy as np
import random
import datetime
import math
import os
import copy # 新增：用于深拷贝字典

# 固定随机种子以便可复现（需要不同随机结果可注释掉）
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# --------------------------
# Helper: 生成合理格式的邮编/邮政编码（尽量贴近各国常见格式）
# --------------------------
def generate_plausible_zip(country, state_province_abbr):
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    digits = '0123456789'

    # 逐国家处理（覆盖大多数脚本中使用到的国家）
    if country == 'United States':
        # 5-digit US ZIP code
        return ''.join(random.choices(digits, k=5))
    if country == 'Canada':
        # A1A 1A1 format
        return ''.join(random.choices(letters, k=1)) + ''.join(random.choices(digits, k=1)) + ''.join(random.choices(letters, k=1)) + ' ' + \
               ''.join(random.choices(digits, k=1)) + ''.join(random.choices(letters, k=1)) + ''.join(random.choices(digits, k=1))
    if country == 'Germany':
        # 5-digit German postal code
        return ''.join(random.choices(digits, k=5))
    if country == 'United Kingdom':
        # Variable length UK postal code (e.g., SW1A 0AA)
        return ''.join(random.choices(letters, k=1)) + ''.join(random.choices(letters + digits, k=1)) + ''.join(random.choices(digits, k=1)) + ' ' + \
               ''.join(random.choices(digits, k=1)) + ''.join(random.choices(letters, k=2))
    if country == 'Norway':
        # 4-digit Norwegian postal code
        return ''.join(random.choices(digits, k=4))
    if country == 'China':
        # 6-digit Chinese postal code
        return ''.join(random.choices(digits, k=6))
    if country == 'Japan':
        # 7-digit Japanese postal code (e.g., 100-0001)
        return ''.join(random.choices(digits, k=3)) + '-' + ''.join(random.choices(digits, k=4))
    if country == 'Australia':
        # 4-digit Australian postal code
        return ''.join(random.choices(digits, k=4))
    if country == 'Mexico':
        # 5-digit Mexican postal code
        return ''.join(random.choices(digits, k=5))
    if country == 'New Zealand':
        # 4-digit New Zealand postal code (e.g., 1010)
        return ''.join(random.choices(digits, k=4))
    
    # fallback to a simple format for other countries
    return ''.join(random.choices(digits, k=5))

# --------------------------
# 数据集配置
# --------------------------

# ADDED: Add 'sales_weight' to each country and state/province to guide sales distribution.
# Weights are relative and based on a mix of market size, economic level, and population.
tesla_countries = {
    'North America': {
        'United States': {'country_code': 'US', 'sales_weight': 0.45, 'states': [
            {'abbr': 'AL', 'full': 'Alabama', 'sales_weight': 0.005}, {'abbr': 'AK', 'full': 'Alaska', 'sales_weight': 0.001},
            {'abbr': 'AZ', 'full': 'Arizona', 'sales_weight': 0.02}, {'abbr': 'CA', 'full': 'California', 'sales_weight': 0.18}, # High weight
            {'abbr': 'TX', 'full': 'Texas', 'sales_weight': 0.12}, # High weight
            {'abbr': 'FL', 'full': 'Florida', 'sales_weight': 0.07}, # High weight
            {'abbr': 'NY', 'full': 'New York', 'sales_weight': 0.05}, {'abbr': 'IL', 'full': 'Illinois', 'sales_weight': 0.03},
            # ... (rest of the US states with relative weights)
        ], 'zip_generator': generate_plausible_zip},
        'Canada': {'country_code': 'CA', 'sales_weight': 0.10, 'states': [
            {'abbr': 'ON', 'full': 'Ontario', 'sales_weight': 0.4}, # High weight for Ontario
            {'abbr': 'QC', 'full': 'Quebec', 'sales_weight': 0.25},
            {'abbr': 'BC', 'full': 'British Columbia', 'sales_weight': 0.2},
            # ... (rest of the Canadian provinces)
        ], 'zip_generator': generate_plausible_zip},
        'Mexico': {'country_code': 'MX', 'sales_weight': 0.02, 'states': [
            {'abbr': 'CDMX', 'full': 'Mexico City', 'sales_weight': 0.2},
            # ... (rest of the Mexican states)
        ], 'zip_generator': generate_plausible_zip}
    },
    'Europe': {
        'Germany': {'country_code': 'DE', 'sales_weight': 0.12, 'states': [
            {'abbr': 'BY', 'full': 'Bavaria', 'sales_weight': 0.15}, {'abbr': 'BW', 'full': 'Baden-Württemberg', 'sales_weight': 0.12},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'United Kingdom': {'country_code': 'GB', 'sales_weight': 0.08, 'states': [
            {'abbr': 'ENG', 'full': 'England', 'sales_weight': 0.8},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'Norway': {'country_code': 'NO', 'sales_weight': 0.05, 'states': [
            {'abbr': 'OS', 'full': 'Oslo', 'sales_weight': 0.3},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        # ... (rest of the European countries with relative weights)
    },
    'Asia Pacific': {
        'China': {'country_code': 'CN', 'sales_weight': 0.20, 'states': [
            {'abbr': 'SH', 'full': 'Shanghai', 'sales_weight': 0.15}, {'abbr': 'BJ', 'full': 'Beijing', 'sales_weight': 0.1},
            {'abbr': 'GD', 'full': 'Guangdong', 'sales_weight': 0.18},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'Japan': {'country_code': 'JP', 'sales_weight': 0.05, 'states': [
            {'abbr': 'TO', 'full': 'Tokyo', 'sales_weight': 0.2},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        'Australia': {'country_code': 'AU', 'sales_weight': 0.03, 'states': [
            {'abbr': 'NSW', 'full': 'New South Wales', 'sales_weight': 0.4},
            # ...
        ], 'zip_generator': generate_plausible_zip},
        # ... (rest of the APAC countries with relative weights)
    }
}

# --------------------------
# Helper: Generates the dimension tables
# --------------------------
def generate_dim_product():
    products_data = [
        {'Model_ID': 1, 'Model_Name': 'Model S', 'Standard_Price_USD': 74990, 'Launch_Date': '2012-06-01', 'Description': 'Luxury electric sedan'},
        {'Model_ID': 2, 'Model_Name': 'Model 3', 'Standard_Price_USD': 38990, 'Launch_Date': '2017-07-28', 'Description': 'Affordable electric sedan'},
        {'Model_ID': 3, 'Model_Name': 'Model X', 'Standard_Price_USD': 79990, 'Launch_Date': '2015-09-29', 'Description': 'Luxury electric SUV'},
        {'Model_ID': 4, 'Model_Name': 'Model Y', 'Standard_Price_USD': 43990, 'Launch_Date': '2020-03-13', 'Description': 'Compact electric SUV'},
        {'Model_ID': 5, 'Model_Name': 'Cybertruck', 'Standard_Price_USD': 60990, 'Launch_Date': '2023-11-30', 'Description': 'Futuristic electric pickup truck'}
    ]
    dim_product_df = pd.DataFrame(products_data)
    
    # ADDED: Add the required 'Model_Base_Price_USD' column
    dim_product_df['Model_Base_Price_USD'] = dim_product_df['Standard_Price_USD']
    # ADDED: Rename 'Launch_Date' to 'Model_Launch_Date'
    dim_product_df.rename(columns={'Launch_Date': 'Model_Launch_Date'}, inplace=True)

    # ADDED: Categorical assignment
    model_cat = np.empty(len(dim_product_df), dtype=object)
    model_cat[dim_product_df['Model_Name'].str.contains('Model S|Model 3').values] = 'Sedan'
    model_cat[dim_product_df['Model_Name'].str.contains('Model X|Model Y').values] = 'SUV'
    model_cat[dim_product_df['Model_Name'].str.contains('Cybertruck').values] = 'Truck'
    dim_product_df['Model_Category'] = model_cat

    return dim_product_df

def generate_dim_time():
    start_date = datetime.date(2023, 1, 1)
    end_date = datetime.date(2025, 12, 31)
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    dim_time_df = pd.DataFrame({'Date': date_range})

    dim_time_df['Time_ID'] = dim_time_df['Date'].dt.strftime('%Y%m%d').astype('int32')
    dim_time_df['Full_Date'] = dim_time_df['Date'].dt.strftime('%Y-%m-%d')
    dim_time_df['Year'] = dim_time_df['Date'].dt.year
    dim_time_df['Quarter'] = dim_time_df['Date'].dt.quarter
    dim_time_df['Month'] = dim_time_df['Date'].dt.month
    dim_time_df['Day'] = dim_time_df['Date'].dt.day
    dim_time_df['Day_of_Month'] = dim_time_df['Date'].dt.day
    dim_time_df['Day_of_Week'] = dim_time_df['Date'].dt.dayofweek + 1
    dim_time_df['Week_of_Year'] = dim_time_df['Date'].dt.isocalendar().week.astype('int32')
    dim_time_df['Day_Name'] = dim_time_df['Date'].dt.day_name()
    dim_time_df['Month_Name'] = dim_time_df['Date'].dt.month_name()
    dim_time_df['Quarter_Name'] = 'Q' + dim_time_df['Quarter'].astype(str)
    
    return dim_time_df

def generate_dim_customer():
    num_customers = 200000
    first_names = ['Liam', 'Olivia', 'Noah', 'Emma', 'Oliver', 'Charlotte', 'Elijah', 'Amelia', 'James', 'Ava', 'William', 'Sophia']
    last_names = ['Smith', 'Jones', 'Williams', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson']

    customer_ids = np.arange(1, num_customers + 1)
    
    # Vectorized generation of names and loyalty status
    first_name_cp = np.random.choice(first_names, size=num_customers)
    last_name_cp = np.random.choice(last_names, size=num_customers)
    
    # ADDED: Add the required 'Gender' column
    gender_list = ['Male', 'Female', 'Other']
    gender_cp = np.random.choice(gender_list, size=num_customers, p=[0.49, 0.49, 0.02])
    
    # ADDED: Add the required 'Age_Group' column
    age_groups = ['<25', '25-34', '35-44', '45-54', '55+']
    age_group_cp = np.random.choice(age_groups, size=num_customers, p=[0.2, 0.3, 0.25, 0.15, 0.1])
    
    # ADDED: Add the required 'Income_Level' column
    income_levels = ['Low', 'Medium', 'High']
    income_level_cp = np.random.choice(income_levels, size=num_customers, p=[0.2, 0.5, 0.3])


    dim_customer_df = pd.DataFrame({
        'Customer_ID': customer_ids,
        'Customer_Name': first_name_cp + ' ' + last_name_cp,
        'Gender': gender_cp,
        'Age_Group': age_group_cp,
        'Income_Level': income_level_cp
    })
    return dim_customer_df


def generate_sales_and_geography(dim_product_df, dim_time_df, dim_customer_df):
    """Generates Fact_Sales and Dim_Geography tables with weighted distribution."""
    
    # ADDED: A flat list of all locations to build the Dim_Geography table, including Zip_Code
    all_locations_list = []
    geo_id_counter = 1
    for region, countries in tesla_countries.items():
        for country, data in countries.items():
            for state in data['states']:
                # ADDED: Generate a plausible zip code for each unique location
                zip_code = data['zip_generator'](country, state['abbr'])
                all_locations_list.append({
                    'Geo_ID': geo_id_counter,
                    'Continent': region,
                    'Country': country,
                    'State_Province_Full': state['full'],
                    'State_Province_Abbr': state['abbr'],
                    'Country_Code': data['country_code'],
                    'Zip_Code': zip_code, # ADDED: Add Zip_Code
                    'sales_weight': data['sales_weight'] * state.get('sales_weight', 1.0)
                })
                geo_id_counter += 1

    geo_locations_df = pd.DataFrame(all_locations_list)
    total_weight = geo_locations_df['sales_weight'].sum()
    geo_locations_df['sales_prob'] = geo_locations_df['sales_weight'] / total_weight

    # Highly efficient vectorized weighted random choice using numpy
    num_sales = 5000000
    sales_indices = np.random.choice(geo_locations_df.index, size=num_sales, p=geo_locations_df['sales_prob'].values)
    
    # FIXED: Ensured that 'Geo_ID' is included from the start, preventing the KeyError.
    fact_sales_df = geo_locations_df.loc[sales_indices, ['Geo_ID', 'Country', 'State_Province_Full', 'State_Province_Abbr']].copy().reset_index(drop=True)
    
    # Generate other facts in a vectorized manner
    date_range_days = (dim_time_df['Date'].max() - dim_time_df['Date'].min()).days
    fact_sales_df['Sale_Date'] = dim_time_df['Date'].min() + pd.to_timedelta(np.random.randint(0, date_range_days, size=num_sales), unit='D')
    
    fact_sales_df['Model_ID'] = np.random.randint(1, len(dim_product_df) + 1, size=num_sales).astype('int32')
    fact_sales_df['Customer_ID'] = np.random.randint(1, len(dim_customer_df) + 1, size=num_sales).astype('int32')
    
    # Quantity distribution (mostly 1, with some 2)
    quantity_dist = np.array([1, 1, 1, 1, 2], dtype='int8')
    fact_sales_df['Sales_Units'] = np.random.choice(quantity_dist, size=num_sales) # RENAMED: Quantity to Sales_Units
    
    # ADDED: Generate Is_Discounted_Sale flag (e.g., 20% of sales are discounted)
    fact_sales_df['Is_Discounted_Sale'] = np.random.choice([True, False], size=num_sales, p=[0.2, 0.8])
    
    # Add Time_ID to fact_sales_df
    fact_sales_df['Time_ID'] = fact_sales_df['Sale_Date'].dt.strftime('%Y%m%d').astype('int32')

    # Generate Dim_Geography table with required columns
    # RENAMED: 'State_Province_Full' to 'State_Province'
    dim_geography_df = geo_locations_df[['Geo_ID', 'Continent', 'Country', 'Country_Code', 'State_Province_Full', 'State_Province_Abbr', 'Zip_Code']].drop_duplicates().reset_index(drop=True)
    dim_geography_df.rename(columns={'State_Province_Full': 'State_Province'}, inplace=True)

    return fact_sales_df, dim_geography_df


# 主函数，执行所有生成任务并保存文件
def main():
    print("正在生成数据表...")
    start_time = datetime.datetime.now()

    dim_product_df = generate_dim_product()
    dim_time_df = generate_dim_time()
    dim_customer_df = generate_dim_customer()

    fact_sales_df, dim_geography_df = generate_sales_and_geography(dim_product_df, dim_time_df, dim_customer_df)

    # 计算销售价格和收入
    # ADDED: Use discounted price logic
    dim_product_map = dim_product_df.set_index('Model_ID')['Model_Base_Price_USD'].to_dict()
    
    fact_sales_df['Base_Price'] = fact_sales_df['Model_ID'].map(dim_product_map)
    
    # Calculate revenue based on discount flag
    discount_factor = 0.8 # 20% discount
    fact_sales_df['Revenue_USD'] = np.where(
        fact_sales_df['Is_Discounted_Sale'],
        fact_sales_df['Base_Price'] * fact_sales_df['Sales_Units'] * discount_factor,
        fact_sales_df['Base_Price'] * fact_sales_df['Sales_Units']
    )
    
    # 选择 Fact_Sales 的最终列并严格排序
    fact_sales_cols = ['Time_ID', 'Geo_ID', 'Model_ID', 'Customer_ID', 'Sales_Units', 'Is_Discounted_Sale', 'Revenue_USD']
    fact_sales_df = fact_sales_df[fact_sales_cols]

    # Dim_Time
    dim_time_cols = ['Time_ID', 'Full_Date', 'Year', 'Quarter', 'Month', 'Day', 'Week_of_Year', 'Day_of_Week', 'Day_Name']
    dim_time_df = dim_time_df[dim_time_cols]
    
    # Dim_Product
    dim_product_cols = ['Model_ID', 'Model_Name', 'Model_Category', 'Model_Base_Price_USD', 'Model_Launch_Date']
    dim_product_df = dim_product_df[dim_product_cols]

    # Dim_Customer
    dim_customer_cols = ['Customer_ID', 'Customer_Name', 'Gender', 'Age_Group', 'Income_Level']
    dim_customer_df = dim_customer_df[dim_customer_cols]

    # Dim_Geography
    dim_geography_cols = ['Geo_ID', 'Continent', 'Country', 'Country_Code', 'State_Province', 'State_Province_Abbr', 'Zip_Code']
    dim_geography_df = dim_geography_df[dim_geography_cols]


    # 保存到CSV
    print("正在保存文件...")
    dim_product_df.to_csv('Dim_Product.csv', index=False, encoding='utf-8')
    dim_time_df.to_csv('Dim_Time.csv', index=False, encoding='utf-8')
    dim_geography_df.to_csv('Dim_Geography.csv', index=False, encoding='utf-8')
    dim_customer_df.to_csv('Dim_Customer.csv', index=False, encoding='utf-8')
    fact_sales_df.to_csv('Fact_Sales.csv', index=False, encoding='utf-8')

    end_time = datetime.datetime.now()
    duration = end_time - start_time
    print(f"数据生成和保存完成，用时 {duration.total_seconds():.2f} 秒。")
    print("已保存文件: Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, Fact_Sales.csv")


if __name__ == "__main__":
    main()

正在生成数据表...
正在保存文件...
数据生成和保存完成，用时 160.02 秒。
已保存文件: Dim_Product.csv, Dim_Time.csv, Dim_Customer.csv, Dim_Geography.csv, Fact_Sales.csv


**CPU优化代码**

In [7]:
import time

start = time.time()  # 记录开始时间

# 这里放你的代码
total = 0
for i in range(1000000):
    total += i

end = time.time()    # 记录结束时间
print("执行时间: {:.6f} 秒".format(end - start))


执行时间: 0.045187 秒
