In [6]:
import zipfile
import pandas as pd

zip_path = "WDI_2009_05.zip"
txt_name = "WDIdata_May09.txt"  

with zipfile.ZipFile(zip_path, "r") as z:
    print("zip：", z.namelist())
    with z.open(txt_name) as f:
        df = pd.read_csv(
            f,
            sep=",",                   
            dtype={"Series code": str},
            na_values=["", "NA", ".."],
            low_memory=False
        )

print(df.columns.tolist())
print(df.head())

codes = ["NY.GDP.PCAP.PP.KD", "PA.NUS.PPP", "PA.NUS.FCRF",'PX.REX.REER',
         'NE.EXP.GNFS.ZS','NE.IMP.GNFS.ZS','SI.POV.GINI']
df_sub = df[df["Series code"].isin(codes)].copy()

year_cols = [col for col in df_sub.columns if col.isdigit()]


df_wide = (
    df_sub
    .pivot_table(
        index=["Country Name", "Country Code", "Series code", "Series name"],
        values=year_cols,
        aggfunc="first"
    )
    .reset_index()
)

df_final = df_wide[df_wide["Country Code"].isin(["USA", "CHN"])]

output_file = "WDI_selected_1980_2007.csv"
df_final.to_csv(output_file, index=False)
print(f"Done. 输出文件：{output_file}")


zip： ['WDInotes_May09.txt', 'WDIonline_May09.mdb', 'WDIonline_readme.txt', 'WDIseries_Apr09.xls', 'WBonline_readme.txt', 'WDIcountry_Apr09.xls', 'WDIdata_May09.txt']
['Series code', 'Series name', 'Country Code', 'Country Name', '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008']
         Series code                                        Series name  \
0  NY.ADJ.SVNX.GN.ZS  Adjusted net savings, excluding particulate em...   
1  NY.ADJ.SVNX.GN.ZS  Adjusted net savings, excluding particulate em...   
2  NY.ADJ.SVNX.GN.ZS  Adjusted net savings, excluding particulate em...   
3  NY.ADJ.SVNX.GN.ZS  Adjusted net savings, excluding particulate em...   
4  N