In [None]:
import requests
import zipfile
import io
import xml.etree.ElementTree as ET
import pandas as pd
import json
from pathlib import Path

# IMF WEO 2025년 4월 SDMX 데이터 ZIP 파일 다운로드 URL
url = "https://www.imf.org/-/media/Files/Publications/WEO/WEO-Database/2025/april/WEOAPR2025-SDMXData.ashx"
resp = requests.get(url, timeout=60)    # URL로부터 파일 다운로드 (최대 60초 대기)
resp.raise_for_status()

# 압축 해제
with zipfile.ZipFile(io.BytesIO(resp.content)) as z:    # 다운로드 받은 ZIP 파일을 메모리 내에서 읽어 압축 해제
    z.extractall("weo_sdmx")    # 'weo_sdmx' 폴더에 압축을 해제함

In [None]:
# 국가 코드와 국가명을 문자열로 저장
raw_data = """
001	World
110	Advanced Economies
111	United States
112	United Kingdom
119	G7
122	Austria
123	Other Advanced Economies (Advanced Economies excluding G7 and Euro Area countries)
124	Belgium
128	Denmark
132	France
134	Germany
135	San Marino
136	Italy
137	Luxembourg
138	Netherlands
142	Norway
144	Sweden
146	Switzerland
156	Canada
158	Japan
163	Euro area
171	Andorra
172	Finland
174	Greece
176	Iceland
178	Ireland
181	Malta
182	Portugal
184	Spain
186	Türkiye
193	Australia
196	New Zealand
199	South Africa
200	Emerging Market and Developing Economies
205	Latin America and the Caribbean
213	Argentina
218	Bolivia
223	Brazil
228	Chile
233	Colombia
238	Costa Rica
243	Dominican Republic
248	Ecuador
253	El Salvador
258	Guatemala
263	Haiti
268	Honduras
273	Mexico
278	Nicaragua
283	Panama
288	Paraguay
293	Peru
298	Uruguay
299	Venezuela
311	Antigua and Barbuda
313	Bahamas, The
314	Aruba
316	Barbados
321	Dominica
328	Grenada
336	Guyana
339	Belize
343	Jamaica
359	Puerto Rico
361	St. Kitts and Nevis
362	St. Lucia
364	St. Vincent and the Grenadines
366	Suriname
369	Trinidad and Tobago
400	Middle East and Central Asia (MECA)
419	Bahrain
423	Cyprus
429	Iran
433	Iraq
436	Israel
439	Jordan
443	Kuwait
446	Lebanon
449	Oman
453	Qatar
456	Saudi Arabia
463	Syria
466	United Arab Emirates
469	Egypt
474	Yemen
487	West Bank and Gaza
505	Emerging and Developing Asia
510	ASEAN-5
512	Afghanistan
513	Bangladesh
514	Bhutan
516	Brunei Darussalam
518	Myanmar
522	Cambodia
524	Sri Lanka
528	Taiwan Province of China
532	Hong Kong SAR
534	India
536	Indonesia
537	Timor-Leste
542	Korea
544	Lao P.D.R.
546	Macao SAR
548	Malaysia
556	Maldives
558	Nepal
564	Pakistan
565	Palau
566	Philippines
576	Singapore
578	Thailand
582	Vietnam
603	Sub-Sahara Africa
611	Djibouti
612	Algeria
614	Angola
616	Botswana
618	Burundi
622	Cameroon
624	Cabo Verde
626	Central African Republic
628	Chad
632	Comoros
634	Congo, Republic of
636	Congo, Democratic Republic of the
638	Benin
642	Equatorial Guinea
643	Eritrea
644	Ethiopia
646	Gabon
648	Gambia, The
652	Ghana
654	Guinea-Bissau
656	Guinea
662	Côte d'Ivoire
664	Kenya
666	Lesotho
668	Liberia
672	Libya
674	Madagascar
676	Malawi
678	Mali
682	Mauritania
684	Mauritius
686	Morocco
688	Mozambique
692	Niger
694	Nigeria
698	Zimbabwe
714	Rwanda
716	São Tomé and Príncipe
718	Seychelles
722	Senegal
724	Sierra Leone
726	Somalia
728	Namibia
732	Sudan
733	South Sudan
734	Eswatini
738	Tanzania
742	Togo
744	Tunisia
746	Uganda
748	Burkina Faso
754	Zambia
813	Solomon Islands
819	Fiji
826	Kiribati
836	Nauru
846	Vanuatu
853	Papua New Guinea
862	Samoa
866	Tonga
867	Marshall Islands
868	Micronesia
869	Tuvalu
903	Emerging and Developing Europe
911	Armenia
912	Azerbaijan
913	Belarus
914	Albania
915	Georgia
916	Kazakhstan
917	Kyrgyz Republic
918	Bulgaria
921	Moldova
922	Russia
923	Tajikistan
924	China
925	Turkmenistan
926	Ukraine
927	Uzbekistan
935	Czech Republic
936	Slovak Republic
939	Estonia
941	Latvia
942	Serbia
943	Montenegro, Rep. of
944	Hungary
946	Lithuania
948	Mongolia
960	Croatia
961	Slovenia
962	North Macedonia
963	Bosnia and Herzegovina
964	Poland
967	Kosovo
968	Romania
998	European Union
"""

# JSON 변환
country_dict = {}
for line in raw_data.strip().split('\n'):
    code, name = line.strip().split('\t', 1)
    country_dict[code] = name

# JSON 파일로 저장
with open("country_mapping.json", "w", encoding="utf-8") as f:
    json.dump(country_dict, f, ensure_ascii=False, indent=2)

print("JSON 파일이 성공적으로 생성되었습니다: country_mapping.json")

JSON 파일이 성공적으로 생성되었습니다: country_mapping.json


In [None]:
# XML 파일 경로
xml_path = Path("/Users/minwoo/Desktop/취준/data_engineering_course_materials/missions/W1/weo_sdmx/WEOAPR2025/xmlfile_APR2025.xml")

# 국가 코드 → 이름 매핑 JSON 경로
json_path = Path("country_mapping.json")

# XML 파싱
tree = ET.parse(xml_path)
root = tree.getroot()

records = []

# Series 순회
for series in root.iterfind(".//Series"):
    concept = series.attrib.get("CONCEPT")
    unit = series.attrib.get("UNIT")
    scale = float(series.attrib.get("SCALE", "1"))
    country = series.attrib.get("REF_AREA")

    # 명목 GDP + 달러 단위만 필터
    if concept == "NGDPD" and unit == "P":
        for obs in series.findall("Obs"):
            year = obs.attrib.get("TIME_PERIOD")
            value = obs.attrib.get("OBS_VALUE")

            # 결측치 처리
            try:
                gdp = float(value) * scale
                records.append((country, year, gdp))
            except (ValueError, TypeError):
                continue

# DataFrame 생성
df = pd.DataFrame(records, columns=["CountryCode", "Year", "GDP_USD"])

# 2025년도만 필터
df_2025 = df[df["Year"] == "2025"].copy()

# USD → Billion USD로 변환 및 소숫점 반올림
df_2025["GDP_Billion_USD"] = (df_2025["GDP_USD"] / 1e9).round(2)

# 국가 매핑 JSON 로드
with open(json_path, encoding="utf-8") as f:
    country_dict = json.load(f)

# 국가 이름 추가
df_2025["Country"] = df_2025["CountryCode"].map(country_dict)

# 제외할 코드 및 키워드
exclude_codes = ["001", "110", "119", "163", "200", "205", "400", "505", "603", "903", "998"]
exclude_keywords = ["World", "Economies", "Union", "Market", "Advanced", "Emerging"]

# 필터링
df_2025 = df_2025[
    ~df_2025["CountryCode"].isin(exclude_codes) &
    ~df_2025["Country"].str.contains("|".join(exclude_keywords), case=False, na=False)
]

# 컬럼 정리 및 GDP 내림차순 정렬
df_2025_sorted = df_2025[["CountryCode", "Country", "Year", "GDP_Billion_USD"]].sort_values(by="GDP_Billion_USD", ascending=False)

# 결과 출력
df_2025_sorted

Unnamed: 0,CountryCode,Country,Year,GDP_Billion_USD
147,111,United States,2025,30507.22
8993,924,China,2025,19231.71
555,134,Germany,2025,4744.80
4761,534,India,2025,4187.02
994,158,Japan,2025,4186.43
...,...,...,...,...
5204,565,Palau,2025,0.33
8108,826,Kiribati,2025,0.31
8373,867,Marshall Islands,2025,0.30
8135,836,Nauru,2025,0.17
