In [1]:
import pandas as pd

df = pd.read_csv("C:/Users/eguen/OneDrive/Desktop/World_GDP.csv")
print(df.head())



       Country    2020      2021      2022      2023      2024      2025
0  Afghanistan   20136   14278.0   14501.0   17248.0       NaN       NaN
1      Albania   15271   18086.0   19185.0   23388.0   27259.0   28372.0
2      Algeria  164774  185850.0  225709.0  247789.0  264913.0  268885.0
3      Andorra    2885    3325.0    3376.0    3786.0    4038.0    4035.0
4       Angola   66521   84375.0  142442.0  109764.0  115946.0  113343.0


In [4]:
df.iloc[:, 1:7] = df.iloc[:, 1:7].T.ffill().T

1       15271.0
2      164774.0
3        2885.0
4       66521.0
         ...   
191    346310.0
192     15532.0
193     20220.0
194     18138.0
195     26878.0
Name: 2020, Length: 196, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.iloc[:, 1:7] = df.iloc[:, 1:7].T.ffill().T


In [5]:
print(df.isna().any(axis=1))

0      False
1      False
2      False
3      False
4      False
       ...  
191    False
192    False
193    False
194    False
195    False
Length: 196, dtype: bool


In [6]:
print(df.head())

       Country      2020      2021      2022      2023      2024      2025
0  Afghanistan   20136.0   14278.0   14501.0   17248.0   17248.0   17248.0
1      Albania   15271.0   18086.0   19185.0   23388.0   27259.0   28372.0
2      Algeria  164774.0  185850.0  225709.0  247789.0  264913.0  268885.0
3      Andorra    2885.0    3325.0    3376.0    3786.0    4038.0    4035.0
4       Angola   66521.0   84375.0  142442.0  109764.0  115946.0  113343.0


In [8]:
import numpy as np
df["log_2025"] = np.log(df["2025"])

In [9]:
def gdp_category(gdp):
    if gdp > 1_000_000:
        return "High"
    elif gdp > 100_000:
        return "Medium"
    else:
        return "Low"
df["gdp_tier_2025"] = df["2025"].apply(gdp_category)

In [10]:
print(df.head())

       Country      2020      2021      2022      2023      2024      2025  \
0  Afghanistan   20136.0   14278.0   14501.0   17248.0   17248.0   17248.0   
1      Albania   15271.0   18086.0   19185.0   23388.0   27259.0   28372.0   
2      Algeria  164774.0  185850.0  225709.0  247789.0  264913.0  268885.0   
3      Andorra    2885.0    3325.0    3376.0    3786.0    4038.0    4035.0   
4       Angola   66521.0   84375.0  142442.0  109764.0  115946.0  113343.0   

    log_2025 gdp_tier_2025  
0   9.755451           Low  
1  10.253158           Low  
2  12.502039        Medium  
3   8.302762           Low  
4  11.638174        Medium  


In [13]:
print(df.columns.tolist())

['Country', '2020', '2021', '2022', '2023', '2024', '2025', 'log_2025', 'gdp_tier_2025']


In [14]:
df["cagr_2021_2025"] = ((df["2025"] / df["2020"]) ** (1/5)) - 1

In [15]:
for year in range(2021, 2026):
    df[f"rank_{year}"] = df[str(year)].rank(ascending=False)

In [16]:
decline_flag = []

for i in range(1, 6):
    prev = str(2019 + i)
    curr = str(2020 + i)
    decline_flag.append(df[curr] < df[prev])

df["any_decline"] = pd.concat(decline_flag, axis=1).any(axis=1)

In [17]:
gdp_min = df["2025"].min()
gdp_max = df["2025"].max()
df["norm_2025"] = (df["2025"] - gdp_min) / (gdp_max - gdp_min)

In [18]:
summary = df.groupby("gdp_tier_2025")["2025"].agg(["count", "mean", "std"])
print(summary)

               count          mean           std
gdp_tier_2025                                   
High              19  4.791329e+06  7.401181e+06
Low              123  2.764788e+04  2.790842e+04
Medium            54  3.592450e+05  2.178813e+05


In [20]:
df_melted = df.melt(id_vars="Country", value_vars=[str(y) for y in range(2020, 2026)],
                    var_name="Year", value_name="GDP")

In [21]:
print(df.head())

       Country      2020      2021      2022      2023      2024      2025  \
0  Afghanistan   20136.0   14278.0   14501.0   17248.0   17248.0   17248.0   
1      Albania   15271.0   18086.0   19185.0   23388.0   27259.0   28372.0   
2      Algeria  164774.0  185850.0  225709.0  247789.0  264913.0  268885.0   
3      Andorra    2885.0    3325.0    3376.0    3786.0    4038.0    4035.0   
4       Angola   66521.0   84375.0  142442.0  109764.0  115946.0  113343.0   

    log_2025 gdp_tier_2025  cagr_2021_2025  rank_2021  rank_2022  rank_2023  \
0   9.755451           Low       -0.030488      135.0      139.0      134.0   
1  10.253158           Low        0.131891      126.0      126.0      118.0   
2  12.502039        Medium        0.102899       56.0       54.0       54.0   
3   8.302762           Low        0.069398      166.0      166.0      165.0   
4  11.638174        Medium        0.112467       72.0       60.0       66.0   

   rank_2024  rank_2025  any_decline  norm_2025  
0     

In [22]:
df.to_csv("C:/Users/eguen/OneDrive/Desktop/World_GDP_transformed.csv", index=False)