# 아마존 세일즈 데이터 EDA - 보고서 작성용

## Introduction

> 앞선 3주 동안 약 **1400개** 행의 소규모 데이터를 분석하며, 기본적인 통계기법과 클러스터링에 대해 파악함.   
> 이번 EDA의 목적은 2023년의 인도, 영국, 미국, 캐나다의 **대규모** 마켓 세일즈 데이터(~150만개)를 분석함에 있음.

|#| Table of Contents | Finished |
|:--|:--:|:--:|
|1| Install Necessary Packages & Load Data | &check; |
|2| Acknowledge Characteristics of Data | &cross;|


&copy; 2024 Yoori Choi <it.glasschoi@gmail.com>
* * *

## 1. Install Necessary Packages & Load Data

In [41]:
# Requirements.txt에 적힌 패키지들이 다 설치되어 있는지 확인
import functions.check_requirements as chk
import unittest
suite = unittest.TestLoader().loadTestsFromModule(chk)
unittest.TextTestRunner().run(suite)

.

forex-python is NOT installed
pycountry is NOT installed
simplejson is NOT installed



----------------------------------------------------------------------
Ran 1 test in 0.086s

OK


<unittest.runner.TextTestResult run=1 errors=0 failures=0>

In [21]:
# Import Necessary Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import warnings
import json

# from tqdm import tqdm_notebook
warnings.simplefilter(action='ignore')
%matplotlib inline

In [17]:
%load_ext autoreload
%autoreload 2
import sys
import os

# Get the full path to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))

# Add the project root to sys.path
sys.path.insert(0, project_root)

# Now try to import
from functions.amazon_analysis import AmazonAnalyzer, AmazonDataframe

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
project_root

'/Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning'

In [5]:
analyzer = AmazonAnalyzer.from_config('config.json', project_root)

Attempting to access: /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_india.csv
Loaded India from cache
Successfully added dataframe: India
Attempting to access: /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_usa.csv
Loaded US from cache
Successfully added dataframe: US
Attempting to access: /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_uk.csv
Loaded UK from cache
Successfully added dataframe: UK
Attempting to access: /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_canada.csv
Loaded Canada from cache
Successfully added dataframe: Canada
Attempting to access: /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon.csv
Loaded India_2022 from cache
Successfully added dataframe: India_2022


In [6]:
if analyzer:
    print("Loaded dataframes:")
    for name, df_obj in analyzer.dataframes.items():
        try:
            print(f"- {name}: {df_obj.df.shape}")
        except Exception as e:
            print(f"- {name}: Error loading - {str(e)}")
    
    result = analyzer.compare_columns_presence()
    if result is not None:
        print(result)
    else:
        print("Failed to compare columns presence")
else:
    print("Failed to initialize analyzer from config.")

Loaded dataframes:
Successfully loaded CSV from /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_india.csv
- India: (1497145, 14)
Successfully loaded CSV from /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_usa.csv
- US: (1393614, 14)
Successfully loaded CSV from /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_uk.csv
- UK: (2222724, 11)
Successfully loaded CSV from /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon_canada.csv
- Canada: (1988016, 14)
Successfully loaded CSV from /Users/yoorichoi/Documents/ds_study/Amazon_sales_machine_learning/data/amazon.csv
- India_2022: (1465, 16)
                      India     US     UK  Canada  India_2022
user_name             False  False  False   False        True
actual_price           True   True  False    True        True
boughtInLastMonth      True   True   True    True       False
discounted_price_KRW   True   True   True    T

In [7]:
india_df = analyzer.get_dataframe("India")
us_df = analyzer.get_dataframe("US")
uk_df = analyzer.get_dataframe("UK")
canada_df = analyzer.get_dataframe("Canada")
india_2022_df = analyzer.get_dataframe("India_2022")

In [8]:
# analyzer.clear_cache()

#### 나중에 prep_functions 파일이나 amazon_analysis로 옮길 수 있는 함수들

In [9]:
def format_korean_number(number):
    if number == 0:
        return "0원"
    
    units = ['원', '만', '억', '조']
    result = []
    
    # 음수 체크
    is_negative = number < 0
    number = abs(number)
    
    for i, unit in enumerate(units):
        unit_value = number % 10000
        if unit_value > 0:
            if i == 0:  # '원' 단위일 때는 쉼표 포함
                result.append(f"{unit_value:,}{unit}")
            else:
                result.append(f"{unit_value}{unit}")
        number //= 10000
        if number == 0:
            break
    
    # 결과를 역순으로 조합
    final_result = ' '.join(reversed(result))
    
    # 음수면 앞에 마이너스 표시
    if is_negative:
        final_result = f"-{final_result}"
        
    return final_result

In [10]:
# 결과를 예쁘게 포맷팅하여 출력
def print_top_items(results):
    if isinstance(results, pd.DataFrame):
        for idx, (name, price) in enumerate(zip(results[results.columns[0]], results[results.columns[1]]), 1):
            formatted_price = format_korean_number(price)  # 이전에 만든 함수 사용
            print(f"{idx}. {name}: {formatted_price}")
    else:
        for idx, (item, count) in enumerate(results.items(), 1):
            print(f"{idx}. {item}: {count}")

* * *
## 2. Acknowledge Characteristics of Data

In [11]:
# Compare columns across all dataframes
presence_df = analyzer.compare_columns_presence()
presence_df

Unnamed: 0,India,US,UK,Canada,India_2022
user_name,False,False,False,False,True
actual_price,True,True,False,True,True
boughtInLastMonth,True,True,True,True,False
discounted_price_KRW,True,True,True,True,False
discount_percentage,True,True,False,True,True
discounted_price,True,True,True,True,True
review_id,False,False,False,False,True
review_content,False,False,False,False,True
user_id,False,False,False,False,True
reviews,True,True,True,True,False


In [12]:
def get_common_columns_by_threshold(analyzer, min_true_count=4):
    """
    Get columns that are present (True) in at least min_true_count dataframes
    
    Parameters:
    analyzer (AmazonAnalyzer): The analyzer instance
    min_true_count (int): Minimum number of True values required (default=4)
    
    Returns:
    list: Column names that meet the threshold criteria
    """
    presence_df = analyzer.compare_columns_presence()
    if presence_df is None:
        return []
        
    # Sum True values across each row
    true_counts = presence_df.sum(axis=1)
    
    # Filter rows where count >= min_true_count
    # filtered_columns = presence_df[true_counts >= min_true_count].index.tolist()
    filtered_columns = presence_df[true_counts >= min_true_count]
    
    return filtered_columns

In [13]:
get_common_columns_by_threshold(analyzer, 5)

Unnamed: 0,India,US,UK,Canada,India_2022
discounted_price,True,True,True,True,True
img_link,True,True,True,True,True
category,True,True,True,True,True
rating,True,True,True,True,True
product_name,True,True,True,True,True
product_link,True,True,True,True,True
product_id,True,True,True,True,True


In [14]:
get_common_columns_by_threshold(analyzer, 4)

Unnamed: 0,India,US,UK,Canada,India_2022
actual_price,True,True,False,True,True
boughtInLastMonth,True,True,True,True,False
discounted_price_KRW,True,True,True,True,False
discount_percentage,True,True,False,True,True
discounted_price,True,True,True,True,True
reviews,True,True,True,True,False
img_link,True,True,True,True,True
category,True,True,True,True,True
rating,True,True,True,True,True
isBestSeller,True,True,True,True,False


In [22]:
# Check if cached results exist before running analysis
cache_path = os.path.join(analyzer.cache_dir, 'category_mapping_suggestions.json')
if os.path.exists(cache_path):
    with open(cache_path, 'r') as f:
        mapping_results = json.load(f)
else:
    # Print results with customized filtering
    analyzer.print_mapping_suggestions(
        min_similarity=0.7,  # Only show matches with 70% or higher similarity
        max_suggestions=3    # Show maximum 3 suggestions per category
    )

    # Or get raw results for further processing
    mapping_results = analyzer.get_category_mapping_suggestions(batch_size=2000)


Mapping suggestions for India_vs_US:

Vacuum and Flor Care matches:
- Vacuum Cleaners & Floor Care (similarity: 0.79)

Makeup matches:
- Makeup (similarity: 1.00)

Action camera accessories matches:
- Mac Games & Accessories (similarity: 0.75)
- PC Games & Accessories (similarity: 0.72)

Laptop matches:
- Laptop Bags (similarity: 0.71)

Men's shirts matches:
- Men's Shoes (similarity: 0.78)
- Women's Shoes (similarity: 0.72)

Kitchen and dining matches:
- Kitchen & Dining (similarity: 0.88)

Networking equipment matches:
- Electrical Equipment (similarity: 0.70)

Men shoes matches:
- Men's Shoes (similarity: 0.90)
- Women's Shoes (similarity: 0.82)

Male watches matches:
- Men's Watches (similarity: 0.80)
- Women's Watches (similarity: 0.74)

Kids shoes matches:
- Girls' Shoes (similarity: 0.73)

Diet and nutrition matches:
- Diet & Sports Nutrition (similarity: 0.73)

Men's casual shoes matches:
- Men's Shoes (similarity: 0.76)
- Women's Shoes (similarity: 0.71)

Craft Materials matc

* * *
### 국가 별 데이터 파악 - (1) 미국

In [135]:
us_df.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,reviews,img_link,product_link,isBestSeller,boughtInLastMonth,discounted_price_KRW,actual_price_KRW
0,B014TMV5YE,"Sion Softside Expandable Roller Luggage, Black...",Suitcases,139.99,0.0,0,4.5,0,https://m.media-amazon.com/images/I/815dLQKYIY...,https://www.amazon.com/dp/B014TMV5YE,False,2000,190722,0
1,B07GDLCQXV,Luggage Sets Expandable PC+ABS Durable Suitcas...,Suitcases,169.99,209.99,19,4.5,0,https://m.media-amazon.com/images/I/81bQlm7vf6...,https://www.amazon.com/dp/B07GDLCQXV,False,1000,231594,286089
2,B07XSCCZYG,Platinum Elite Softside Expandable Checked Lug...,Suitcases,365.49,429.99,15,4.6,0,https://m.media-amazon.com/images/I/71EA35zvJB...,https://www.amazon.com/dp/B07XSCCZYG,False,300,497942,585817


In [136]:
category_analysis_us = analyzer.analyze_categories('US', top_n=20)


=== US 카테고리 분석 ===
총 카테고리 수: 248개
총 상품 수: 1,393,614개

상위 20개 카테고리:
 1. Girls' Clothing                          28,289개 (2.03%)
 2. Boys' Clothing                           24,165개 (1.73%)
 3. Toys & Games                             20,657개 (1.48%)
 4. Men's Shoes                              18,780개 (1.35%)
 5. Women's Handbags                         18,695개 (1.34%)
 6. Girls' Jewelry                           18,441개 (1.32%)
 7. Men's Clothing                           17,924개 (1.29%)
 8. Men's Accessories                        17,482개 (1.25%)
 9. Women's Clothing                         17,281개 (1.24%)
10. Women's Jewelry                          16,948개 (1.22%)
11. Travel Accessories                       16,099개 (1.16%)
12. Women's Shoes                            15,102개 (1.08%)
13. Home Storage & Organization              15,099개 (1.08%)
14. Women's Accessories                      13,845개 (0.99%)
15. Men's Watches                            12,427개 (0.89%)
16. Boys' Jewelry

In [137]:
analyzer.analyze_category_metrics('US', 'discounted_price_KRW')  # 카테고리별 평균 가격


=== 카테고리별 평균 discounted_price_KRW 분석 (상위 20개) ===
 1. Computer Servers                         212만 7,416원 (상품수: 281.0개)
 2. Computers & Tablets                      77만 9,109원 (상품수: 7,962.0개)
 3. Smart Home: Home Entertainment           54만 1,835원 (상품수: 154.0개)
 4. Smart Home: Vacuums and Mops             46만 9,971원 (상품수: 50.0개)
 5. Smart Home - Heating & Cooling           35만 6,582원 (상품수: 235.0개)
 6. Luggage Sets                             32만 3,651원 (상품수: 144.0개)
 7. Computer Monitors                        31만 5,491원 (상품수: 3,402.0개)
 8. Smart Home Thermostats - Compatibility Checker 24만 8,662원 (상품수: 18.0개)
 9. Computer External Components             23만 6,096원 (상품수: 2,366.0개)
10. Video Projectors                         23만 4,727원 (상품수: 787.0개)
11. Smart Home: Smart Locks and Entry        22만 7,556원 (상품수: 270.0개)
12. Camera & Photo                           22만 4,331원 (상품수: 6,016.0개)
13. Data Storage                             21만 9,734원 (상품수: 5,851.0개)
14. Smart Home: Lawn and

In [138]:
analyzer.analyze_category_metrics('US', 'rating') # 카테고리별 평균 평점


=== 카테고리별 평균 rating 분석 (상위 20개) ===
 1. Gift Cards                               4.83 (상품수: 139.0개)
 2. Health & Household                       4.57 (상품수: 714.0개)
 3. Industrial & Scientific                  4.55 (상품수: 4,412.0개)
 4. Household Supplies                       4.54 (상품수: 1,908.0개)
 5. Kitchen & Dining                         4.54 (상품수: 4,864.0개)
 6. Electrical Equipment                     4.52 (상품수: 7,820.0개)
 7. Food Service Equipment & Supplies        4.52 (상품수: 5,896.0개)
 8. Sports & Outdoors                        4.50 (상품수: 2,638.0개)
 9. Power Tools & Hand Tools                 4.50 (상품수: 3,797.0개)
10. Electronic Components                    4.49 (상품수: 1,751.0개)
11. Automotive Replacement Parts             4.48 (상품수: 8,267.0개)
12. Home Appliances                          4.48 (상품수: 6,945.0개)
13. Kids' Home Store                         4.47 (상품수: 1,861.0개)
14. Tools & Home Improvement                 4.46 (상품수: 1,705.0개)
15. Industrial Power & Hand Tools          

In [139]:
print(f"=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===")

top_bestseller_cat_us = analyzer.top_items_multi_filter(
    df_name='US',
    column='category',
    sort_by='discounted_price_KRW',
    n=10
)

print_top_items(top_bestseller_cat_us)

=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===
1. Data Storage: 2688만 2,574원
2. Fabric Decorating: 2643만 517원
3. Computer Servers: 2243만 6,920원
4. Computer Servers: 1720만 6,621원
5. Packaging & Shipping Supplies: 1705만 7,166원
6. Office Electronics: 1634만 6,049원
7. Data Storage: 1527만 8,092원
8. Computer Servers: 1479만 4,400원
9. Data Storage: 1378만 5,013원
10. PlayStation 3 Games, Consoles & Accessories: 1363만 7,602원


In [140]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===")

top_bestseller_us = analyzer.top_items_multi_filter(
    df_name='US',                       # 데이터프레임 이름
    column='product_name',              # 보여줄 컬럼
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_us)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===
1. Overland Storage Neoxl80 Storagelibrary Lto8 SAS: 2688만 2,574원
2. replicas Marinas: 2643만 517원
3. CISCO Systems 1 Port ATM Enhanced Oc12/Stm4 Single Mode Intermediate Reach: 2243만 6,920원
4. Cisco Systems 7140 Router Dual 10/100 Fe Dual Ac IP Sw: 1720만 6,621원
5. PARTNERS BRAND Corrugated Trash Can Plain - 40 Gallon: 1705만 7,166원
6. Sony VPL-XW6000ES 4K HDR Laser Home Theater Projector with Native 4K SXRD Panel, White: 1634만 6,049원
7. HPE MSA 2062 10GBASE-T iSCSI SFF Storage: 1527만 8,092원
8. CISCO Systems Catalyst 5000 Family Supervisor Iii with Enhanced Nffc Ii: 1479만 4,400원
9. Aruba a Hewlett Packard Enterprise company HPE 108 TB Hard Drive - 3.5" Internal - SAS (12Gb/s SAS): 1378만 5,013원
10. A-PS-SS72T-100, Black: 1363만 7,602원


In [168]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Computers & Tablets')  ===")

# 가방 카테고리에서 할인가격이 가장 비싼 상품 10개
filters = {
    'category': 'Computers & Tablets',
    # 'isBestSeller': True  # 필터 추가/삭제 가능
}

top_bestseller_computers_us = analyzer.top_items_multi_filter(
    df_name='US',                       # 데이터프레임 이름
    column='product_name',              # 보여줄 컬럼
    filters=filters,
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_computers_us)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Computers & Tablets')  ===
1. Toughbook Panasonic 40, FZ-40 MK1, Intel® i7-1185G7, 14” Touch, 16GB, 512GB Opal SSD, 4G LTE: 796만 3,201원
2. MSI Creator Z17 17" Professional Creator Laptop: Intel Core i9-12900H RTX 3080 Ti 64GB DDR5 2TB NVMe SSD, QHD+ 165hz Touch 100% DCI-P3, Thunderbolt 4 w/ PD Charging, Win 11 Pro: Lunar Gray A12UHST-046: 733만 3,705원
3. Razer Blade 18 Gaming Laptop: NVIDIA GeForce RTX 4090-13th Gen Intel 24-Core i9 HX CPU - QHD+ 240Hz - 64GB RAM - 2TB SSD - CNC Aluminum - Compact GaN Charger - Windows 11 - Chroma RGB: 681만 1,975원
4. Dell Precision 7780 Mobile Workstation - 17.3" FHD AG Display Display - Intel Core i7-13850HX 20-Core (13th Gen) - 1TB SSD - 64GB DDR5 RAM - NV RTX 5000 ADA (16GB GDDR6) - Windows 11 pro: 647만 1,389원
5. Cooler Master 30th Year Anniversary Cosmos Infinity Gaming PC Desktop – AMD Ryzen 9 7950X - NVIDIA GeForce RTX 4090-32GB DDR5-2TB M.2 NVMe SSD – WiFi - Windows 11 Pro: 639만 7,316원
6. Corsair Vengeance a8100

* * *
### 국가 별 데이터 파악 - (2) 캐나다

In [142]:
canada_df.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,reviews,img_link,product_link,isBestSeller,boughtInLastMonth,discounted_price_KRW,actual_price_KRW
0,B07CV4L6HX,"Green Leaf WW3D Wonder Extension Cord Winder, ...",Industrial Scientific,47.69,0.0,0,4.4,2876,https://m.media-amazon.com/images/I/81cRe0AVC4...,https://www.amazon.ca/dp/B07CV4L6HX,False,0,47217,0
1,B09N1HGY74,8pcs Toilet Seat Bumpers Universal Toilet Repl...,Industrial Scientific,10.99,0.0,0,3.8,55,https://m.media-amazon.com/images/I/512CpB-EP5...,https://www.amazon.ca/dp/B09N1HGY74,False,100,10881,0
2,B087P7538J,YaeCCC 19 Pcs Hole Saw Kit 3/4''(19mm)- 6''(15...,Industrial Scientific,25.99,27.99,7,4.0,126,https://m.media-amazon.com/images/I/71P+qLFDFc...,https://www.amazon.ca/dp/B087P7538J,False,50,25732,27712


In [143]:
category_analysis_canada = analyzer.analyze_categories('Canada', top_n=20)


=== Canada 카테고리 분석 ===
총 카테고리 수: 266개
총 상품 수: 1,988,016개

상위 20개 카테고리:
 1. Baby                                     24,077개 (1.21%)
 2. Luggage  Travel Gear                     22,449개 (1.13%)
 3. Handmade Home Décor                      19,143개 (0.96%)
 4. Handmade Kitchen  Dining                 19,074개 (0.96%)
 5. Handmade Jewellery                       18,545개 (0.93%)
 6. Beauty                                   18,320개 (0.92%)
 7. Men's Jewelry                            17,969개 (0.9%)
 8. Women's Watches                          17,386개 (0.87%)
 9. Boys                                     17,233개 (0.87%)
10. Electronics                              16,393개 (0.82%)
11. Men's Watches                            16,385개 (0.82%)
12. Uniforms, Work  Safety                   16,150개 (0.81%)
13. Men's Shoes                              16,049개 (0.81%)
14. Women's Shoes                            15,964개 (0.8%)
15. Beauty Tools  Accessories                15,868개 (0.8%)
16. Men         

In [144]:
analyzer.analyze_category_metrics('Canada', 'discounted_price_KRW')  # 카테고리별 평균 가격


=== 카테고리별 평균 discounted_price_KRW 분석 (상위 20개) ===
 1. Children's Outdoor Inflatable Bouncers  Bouncy Castles 392만 7,029원 (상품수: 8,630.0개)
 2. Desktop Computers                        250만 3,634원 (상품수: 7,924.0개)
 3. Laptop Computers                         145만 6,723원 (상품수: 6,947.0개)
 4. Handmade Furniture                       128만 278원 (상품수: 2,216.0개)
 5. Outdoor Storage  Housing                 73만 8,927원 (상품수: 7,057.0개)
 6. Outdoor Heating  Cooling                 72만 8,889원 (상품수: 8,555.0개)
 7. Home Audio Speakers                      65만 1,611원 (상품수: 7,060.0개)
 8. Ladders                                  60만 8,737원 (상품수: 8,657.0개)
 9. Material Transport Equipment             58만 9,074원 (상품수: 8,825.0개)
10. Salon  Spa Equipment                     55만 8,076원 (상품수: 15,417.0개)
11. Computer Monitors                        52만 6,511원 (상품수: 7,233.0개)
12. Scaffolding Equipment                    49만 3,423원 (상품수: 1,613.0개)
13. Smart Home                               45만 7,354원 (상품수: 434.0개

In [145]:
analyzer.analyze_category_metrics('Canada', 'rating') # 카테고리별 평균 평점


=== 카테고리별 평균 rating 분석 (상위 20개) ===
 1. Health  Personal Care                    4.42 (상품수: 2,213.0개)
 2. Office Products                          4.41 (상품수: 6,787.0개)
 3. Home  Kitchen                            4.40 (상품수: 3,484.0개)
 4. Kitchen  Dining                          4.39 (상품수: 1,417.0개)
 5. Tools  Home Improvement                  4.38 (상품수: 2,997.0개)
 6. Industrial  Scientific                   4.38 (상품수: 5,277.0개)
 7. Home Storage  Organization               4.34 (상품수: 5,798.0개)
 8. Computer Accessories                     4.32 (상품수: 1,321.0개)
 9. Sewing, Craft  Hobby                     4.31 (상품수: 6,398.0개)
10. Toys  Games                              4.29 (상품수: 5,476.0개)
11. Power Tools  Hand Tools                  4.29 (상품수: 2,261.0개)
12. Electronics                              4.27 (상품수: 16,393.0개)
13. Automotive                               4.25 (상품수: 3,389.0개)
14. Beauty                                   4.24 (상품수: 18,320.0개)
15. Sports  Outdoors                 

In [146]:
print(f"=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===")

top_bestseller_cat_canada = analyzer.top_items_multi_filter(
    df_name='Canada',
    column='category',
    sort_by='discounted_price_KRW',
    n=10
)

print_top_items(top_bestseller_cat_canada)

=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===
1. Outdoor Storage  Housing: 4049만 5,049원
2. Outdoor Storage  Housing: 3421만 396원
3. Television Projectors: 3176만 2,277원
4. Salon  Spa Equipment: 2921만 6,782원
5. Television Projectors: 2773만 5,099원
6. Salon  Spa Equipment: 2761만 4,653원
7. Salon  Spa Equipment: 2723만 2,089원
8. Salon  Spa Equipment: 2713만 8,564원
9. Salon  Spa Equipment: 2670만 8,584원
10. Television Projectors: 2611만 5,148원


In [147]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===")

top_bestseller_canada = analyzer.top_items_multi_filter(
    df_name='Canada',                       # 데이터프레임 이름
    column='product_name',              # 보여줄 컬럼
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_canada)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===
1. ECOHOUSEMART | Wooden CARPORT for 2 Vehicles & Patio Cover 24 X 24 X 16'6" | Engineered Wood, GLT | PREFABRICATED DIY: 4049만 5,049원
2. ECOHOUSEMART | Wooden CARPORT for 2 Vehicles & Patio Cover 20 X 22 X 16 | Engineered Wood, GLT | PREFABRICATED DIY: 3421만 396원
3. Panasonic Solutions Company PT-MZ16KLBU Projector/Panel: 3176만 2,277원
4. ZHAOLEI Zero SL-Track Full Body Massage Chair w/Voice Control Heat Roller: 2921만 6,782원
5. Projector Projector 1080P Home Theater Cinema 1280 * 720 Video: 2773만 5,099원
6. VENBER Electric Massage Chair Zero Gravity Massage Chair Electric Heating Recline Full Body Massage Chair Home Automatic Zero Gravity Massage Chair Electric: 2761만 4,653원
7. VENBER Electric Massage Chair Zero Gravity Electric Full Body Massage Chair Electric Professional Shiatsu Back Relaxing Rocking Portable Smart: 2723만 2,089원
8. VENBER Electric Massage Chair Electric Massage Chair Full Body 4D Zero Gravity Multi-Functional Latest Leather T

In [169]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Laptop Computers')  ===")

# 가방 카테고리에서 할인가격이 가장 비싼 상품 10개
filters = {
    'category': 'Laptop Computers',
    # 'isBestSeller': True  # 필터 추가/삭제 가능
}

top_bestseller_laptop_canada = analyzer.top_items_multi_filter(
    df_name='Canada',                       # 데이터프레임 이름
    column='product_name',              # 보여줄 컬럼
    filters=filters,
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_laptop_canada)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Laptop Computers')  ===
1. MSI Raider GE68HX 16" QHD+ (2560 x 1600) 240Hz Gaming Laptop - 13th Gen Intel Core i9-13950HX 24-Core up to 5.50 GHz CPU, 64GB DDR5 RAM, 8TB NVMe SSD, GeForce RTX 4070 8GB GDDR6, Windows 11 Home: 854만 1,940원
2. ASUS ROG Zephyrus G15 Gaming & Business Laptop (AMD Ryzen 9 5900HS 8-Core, 16GB RAM, 2x4TB PCIe SSD (8TB), 15.6" 2K Quad HD (2560x1440), GeForce RTX 3080, WiFi, Bluetooth, Win 11 Pro): 809만 4,207원
3. Toughbook Panasonic 40, FZ-40 MK1, Intel i5-1145G7, 14” Touch, 32GB, 1TB Opal SSD, 4G LTE: 803만 9,594원
4. ASUS ROG Zephyrus G15 Gaming & Business Laptop (AMD Ryzen 9 5900HS 8-Core, 16GB RAM, 2x4TB PCIe SSD (8TB), 15.6" 2K Quad HD (2560x1440), GeForce RTX 3080, WiFi, Bluetooth, Win 11 Home): 787만 4,405원
5. Toughbook Panasonic 40, FZ-40 MK1, Intel i5-1145G7, 14” Touch, 32GB, 1TB Opal SSD: 748만 5,138원
6. Panasonic Toughbook 40, FZ-40 MK1, Intel® i5-1145G7, 14” Touch, 16GB, 512GB Opal SSD, 4G LTE: 707만 9,207원
7. SAMSUNG 85-In

* * *
### 국가 별 데이터 파악 - (3) 영국

In [149]:
uk_df.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,rating,reviews,img_link,product_link,isBestSeller,boughtInLastMonth,discounted_price_KRW
0,B09B96TG33,"Echo Dot (5th generation, 2022 release) | Big ...",False,21.99,4.7,15308,https://m.media-amazon.com/images/I/71C3lbbeLs...,https://www.amazon.co.uk/dp/B09B96TG33,0,Hi-Fi Speakers,39128
1,B01HTH3C8S,"Anker Soundcore mini, Super-Portable Bluetooth...",True,23.99,4.7,98099,https://m.media-amazon.com/images/I/61c5rSxwP0...,https://www.amazon.co.uk/dp/B01HTH3C8S,0,Hi-Fi Speakers,42686
2,B09B8YWXDF,"Echo Dot (5th generation, 2022 release) | Big ...",False,21.99,4.7,15308,https://m.media-amazon.com/images/I/61j3SEUjMJ...,https://www.amazon.co.uk/dp/B09B8YWXDF,0,Hi-Fi Speakers,39128


In [150]:
category_analysis = analyzer.analyze_categories('UK', top_n=20)


=== UK 카테고리 분석 ===
총 카테고리 수: 2개
총 상품 수: 2,222,724개

상위 20개 카테고리:
 1. 0                                        2,216,707개 (99.73%)
 2. 1                                        6,017개 (0.27%)

기타 카테고리: 0개 (0.0%)


In [151]:
analyzer.analyze_category_metrics('UK', 'discounted_price_KRW')  # 카테고리별 평균 가격


=== 카테고리별 평균 discounted_price_KRW 분석 (상위 20개) ===
 1. 0                                        16만 8,060원 (상품수: 2,216,707.0개)
 2. 1                                        4만 1,599원 (상품수: 6,017.0개)


In [152]:
analyzer.analyze_category_metrics('UK', 'rating') # 카테고리별 평균 평점


=== 카테고리별 평균 rating 분석 (상위 20개) ===
 1. 1                                        4.42 (상품수: 6,017.0개)
 2. 0                                        2.03 (상품수: 2,216,707.0개)


In [153]:
print(f"=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===")

top_bestseller_cat_uk = analyzer.top_items_multi_filter(
    df_name='UK',
    column='category',
    sort_by='discounted_price_KRW',
    n=10
)

print_top_items(top_bestseller_cat_uk)

=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===
1. False: 1억 7793만 5,943원
2. False: 1억 3834만 8,754원
3. False: 4390만 4,964원
4. False: 4178만 4,217원
5. False: 4080만 6,797원
6. False: 3914만 4,128원
7. False: 3914만 4,128원
8. False: 3819만 3,629원
9. False: 3819만 3,629원
10. False: 3819만 3,629원


In [154]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===")

top_bestseller_uk = analyzer.top_items_multi_filter(
    df_name='UK',                       # 데이터프레임 이름
    column='product_name',              # 보여줄 컬럼
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_uk)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===
1. HB FBA Test Treadmill: 1억 7793만 5,943원
2. Générique L.A 350° - I5 11400F - RTX 3050 8G - 16GB RAM - NVME 1TB: 1억 3834만 8,754원
3. Abaodam decor christmas ornaments Desk Ornament Designed Storm Bottle Desktop Adornment Glass Storm Bottle Home Ornament girl dining table white globe High borosilicate glass: 4390만 4,964원
4. BM4445-FI1-01200R10-03-E03 Driver: 4178만 4,217원
5. HEDUSA Halloween 3D Holographic Fan 3D Hologram Fan Display 3D Animation Video Projector Advertising Light Led Signboard Board Transmit Picture Video For Ads Display (Size : 150cm): 4080만 6,797원
6. YXHUI Kids Girls Figure Skating Dress Long Sleeves Rhinestone Flowers Splice Ice Skating Roller Skating Ballet Dress,Pink-XL: 3914만 4,128원
7. YXHUI Kids Girls Figure Skating Dress Long Sleeves Rhinestone Flowers Splice Ice Skating Roller Skating Ballet Dress,Pink-XS: 3914만 4,128원
8. Outdoor Furniture Wood Porch Swing Black Brown White 22" H x 52" W x 24.5" D (Color : E) (E): 3819만 3

In [155]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Suitcases')  ===")

# 가방 카테고리에서 할인가격이 가장 비싼 상품 10개
filters = {
    'category': 'Suitcases',
    # 'isBestSeller': True  # 필터 추가/삭제 가능
}

top_bestseller_suitcases_uk = analyzer.top_items_multi_filter(
    df_name='UK',                       # 데이터프레임 이름
    column='product_name',              # 보여줄 컬럼
    filters=filters,
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_suitcases_uk)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Suitcases')  ===


* * *
### 국가 별 데이터 파악 - (4) 인도 (2023)

In [156]:
india_df.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,reviews,img_link,product_link,isBestSeller,boughtInLastMonth,discounted_price_KRW,actual_price_KRW
0,B08VJFZQ9S,प्लेन कैज़ुअल वियर बेसबॉल कैप पुरुषों और महिला...,Men's hats and caps,299.0,499.0,40,0.0,0,https://m.media-amazon.com/images/I/61DK1GchGF...,https://www.amazon.in/dp/B08VJFZQ9S,False,0,4838,8074
1,B08VJFYW5Q,"यूनीसेक्स कॉटन एडजस्टेबल बेसबॉल कैप (काला, फ़्...",Men's hats and caps,299.0,499.0,40,0.0,0,https://m.media-amazon.com/images/I/61+nwjWLUg...,https://www.amazon.in/dp/B08VJFYW5Q,False,0,4838,8074
2,B08VJFYVX9,प्लेन कैज़ुअल वियर बेसबॉल कैप पुरुषों और महिला...,Men's hats and caps,275.0,300.0,8,0.0,0,https://m.media-amazon.com/images/I/61DK1GchGF...,https://www.amazon.in/dp/B08VJFYVX9,False,0,4449,4854


In [157]:
category_analysis = analyzer.analyze_categories('India', top_n=20)


=== India 카테고리 분석 ===
총 카테고리 수: 209개
총 상품 수: 1,497,145개

상위 20개 카테고리:
 1. Sports, fitness and outdoor              35,357개 (2.36%)
 2. Home and kitchen                         31,358개 (2.09%)
 3. Men's sunglasses                         26,812개 (1.79%)
 4. Men shoes                                26,679개 (1.78%)
 5. Fashion jewelery                         24,353개 (1.63%)
 6. Women's shoes                            23,296개 (1.56%)
 7. Suitcase, check in and straw             23,194개 (1.55%)
 8. Handbags and Purses                      20,323개 (1.36%)
 9. Amazon fashion                           20,111개 (1.34%)
10. Makeup                                   19,748개 (1.32%)
11. Men's T-shirts and polose                18,979개 (1.27%)
12. Kids shoes                               18,805개 (1.26%)
13. Fragrance                                18,506개 (1.24%)
14. Accessory                                18,480개 (1.23%)
15. Men's innerwear                          18,078개 (1.21%)
16. Men's shir

In [158]:
analyzer.analyze_category_metrics('India', 'discounted_price_KRW')  # 카테고리별 평균 가격


=== 카테고리별 평균 discounted_price_KRW 분석 (상위 20개) ===
 1. Digital SLR camera                       182만 9,320원 (상품수: 115.0개)
 2. Laptop                                   122만 2,970원 (상품수: 6,837.0개)
 3. Gamzon                                   108만 1,576원 (상품수: 112.0개)
 4. Air conditioner                          62만 3,799원 (상품수: 146.0개)
 5. Television                               54만 1,466원 (상품수: 597.0개)
 6. Desktop                                  52만 7,177원 (상품수: 2,926.0개)
 7. Dining table                             44만 6,929원 (상품수: 2,523.0개)
 8. Refrigerator                             40만 9,138원 (상품수: 2,513.0개)
 9. Exercise and Fitness Aerobic Training Machine 37만 950원 (상품수: 2,605.0개)
10. Fine art                                 33만 3,941원 (상품수: 591.0개)
11. Presius jewelery                         30만 6,714원 (상품수: 11,736.0개)
12. Washing machines and dryers              27만 6,644원 (상품수: 911.0개)
13. Tablet                                   24만 1,058원 (상품수: 1,212.0개)
14. Printer       

In [159]:
analyzer.analyze_category_metrics('India', 'rating') # 카테고리별 평균 평점


=== 카테고리별 평균 rating 분석 (상위 20개) ===
 1. Men's winterwear                         4.52 (상품수: 4,115.0개)
 2. Men's clothes                            4.51 (상품수: 1,966.0개)
 3. Women's kurtas and kurtis                4.45 (상품수: 2,033.0개)
 4. Baby - Pharmacy                          4.41 (상품수: 99.0개)
 5. Male watches                             4.33 (상품수: 1,093.0개)
 6. Vacuum and Flor Care                     4.31 (상품수: 2,115.0개)
 7. EveryD Ectailles                         4.26 (상품수: 19.0개)
 8. Bags and backpacks                       4.22 (상품수: 7,838.0개)
 9. Deep Fat Frears                          4.20 (상품수: 210.0개)
10. Grocery and Tasty Foods                  4.13 (상품수: 1,479.0개)
11. Sanskincare                              4.12 (상품수: 65.0개)
12. Sanskriti_coffi                          4.12 (상품수: 45.0개)
13. Sanskriti & spreads                      4.08 (상품수: 149.0개)
14. Room heater                              4.08 (상품수: 66.0개)
15. Mixer Grinders                           4.05 (상품수: 38

In [160]:
print(f"=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===")

top_bestseller_cat_india = analyzer.top_items_multi_filter(
    df_name='India',
    column='category',
    sort_by='discounted_price_KRW',
    n=10
)

print_top_items(top_bestseller_cat_india)

=== 할인 가격 기준 카테고리 순위 (상위 10개)  ===
1. Boys jewelry and jewelery: 1686만 2,605원
2. Loose jamstone and diamond: 1626만 5,517원
3. Boys jewelry and jewelery: 1618만 4,773원
4. Loose jamstone and diamond: 1618만 1,229원
5. Girls' jewelry and jewelery: 1617만 9,530원
6. Boys jewelry and jewelery: 1594만 1,893원
7. Boys jewelry and jewelery: 1464만 7,233원
8. Accessory: 1456만 3,090원
9. Accessory: 1448만 2,184원
10. Accessory: 1312만 3,122원


In [163]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===")

top_bestseller_india = analyzer.top_items_multi_filter(
    df_name='India',                       # 데이터프레임 이름
    column='product_id',              # 보여줄 컬럼
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_india)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리 무관)  ===
1. B0BNTQL4JW: 1686만 2,605원
2. B0BLYV28JP: 1626만 5,517원
3. B0BMVDKXPF: 1618만 4,773원
4. B0BD1C336T: 1618만 1,229원
5. B0CCP6JSPN: 1617만 9,530원
6. B0BMVDLL7F: 1594만 1,893원
7. B0BMVF3SBW: 1464만 7,233원
8. B0BS97VQLX: 1456만 3,090원
9. B0BS98RLP3: 1448만 2,184원
10. B0BYZ2F8SQ: 1312만 3,122원


In [165]:
print(f"=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Laptop')  ===")

# 가방 카테고리에서 할인가격이 가장 비싼 상품 10개
filters = {
    'category': 'Laptop',
    # 'isBestSeller': True  # 필터 추가/삭제 가능
}

top_bestseller_laptop_india = analyzer.top_items_multi_filter(
    df_name='India',                       # 데이터프레임 이름
    column='product_name',              # 보여줄 컬럼
    filters=filters,
    sort_by='discounted_price_KRW',     # 정렬 기준
    n=10                                # 결과 개수
)

print_top_items(top_bestseller_laptop_india)

=== 할인 가격 기준 제품 순위 (상위 10개, 카테고리: 'Laptop')  ===
1. MSI Titan GT77 HX, Intel 13th Gen. i9-13980HX, 44CM FHD 144Hz मिनी LED, HDR 1000 गेमिंग लैपटॉप (64GB/4TB NVMe SSD/Windows 11 Home/Nvidia GeForce RTX4090, 16GB GDDR6/कोर काला/3.3kg), 13VI-092IN: 922만 3,139원
2. Lenovo ThinkPad T15g Gen 2 20YS002YUS 15.6" टचस्क्रीन नोटबुक - 4K UHD - 3840 x 2160 - Intel Core i9 11th Gen i9-11950H ऑक्टा-कोर (8 कोर) 2.60 GHz - 32 GB RAM - 1 TB SSD - काला: 865만 1,116원
3. ASUS ROG Strix निशान 18 (2023) गेमिंग लैपटॉप, 18 "नेबुला डिस्प्ले 16:10 QHD 240Hz/3ms, GeForce RTX 4090, Intel Core i9-13980HX, 32GB DDR5, 2TB PCIe SSD, वाई-फाई 6E, विंडोज 11 प्रो, G834JY-XS97, काला: 837만 8,187원
4. ASUS ROG Zephyrus S अल्ट्रा स्लिम गेमिंग लैपटॉप, 15.6 "144Hz IPS-टाइप फुल HD, GeForce RTX 2080, Intel Core i7-8750H CPU, 16GB DDR4, 512GB PCIe Nvme SSD, ऑरा सिंक RGB, विंडोज 10 प्रो - GX531GX-XS74: 742만 4,466원
5. Dell Alienware m18 R1 गेमिंग लैपटॉप, Intel Core i9-13980HX प्रोसेसर/64GB/1TB SSD/18.0" (45.72cm) QHD+ FHD 165Hz/NVIDIA 