In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:


# Read the CSV file into a DataFrame, skipping metadata rows
worldbank_all_df = pd.read_csv('data/World_Bank/GDP_1960_2023.csv', encoding='utf-8')

# Print the column names to identify the exact names
print(worldbank_all_df.columns)

In [None]:
worldbank_all_df = worldbank_all_df.drop(columns=['Unnamed: 66'])

In [None]:
worldbank_all_df.head()

In [None]:
# Convert all GDP columns to numeric, forcing errors to null
for col in worldbank_all_df.columns[2:]:
    worldbank_all_df[col] = pd.to_numeric(worldbank_all_df[col], errors='coerce')

# Convert GDP values to millions
for col in worldbank_all_df.columns[2:]:
    worldbank_all_df[col] = worldbank_all_df[col] / 1e6

In [None]:
# Format the numeric columns
worldbank_all_df.iloc[:, 2:] = worldbank_all_df.iloc[:, 2:].applymap(lambda x: f"{x:.2f}M" if pd.notna(x) else "NaN")

In [None]:
worldbank_all_df.columns = ['Country Name', 'Country Code'] + [str(year) for year in range(1960, 2024)]

In [None]:
print(worldbank_all_df.head())

In [None]:
# Remove the 'M' suffix and convert to numeric
# Skip the first two columns (Country Name and Country Code)
for column in worldbank_all_df.columns[2:]:  
    worldbank_all_df[column] = worldbank_all_df[column].replace({',': ''}, regex=True)  

# Remove commas if any
    worldbank_all_df[column] = pd.to_numeric(worldbank_all_df[column].str.replace('M', ''), errors='coerce')

In [None]:
# Calculate the average for each country
worldbank_all_df['Average'] = worldbank_all_df.iloc[:, 2:].mean(axis=1).round(2)

worldbank_all_df.head()

In [None]:
#create clean data file
# worldbank_all_df.to_csv('data/World_Bank/World_Bank_GDP_Cleaned.csv', index=False)


In [None]:
#create plotting data frame
plotting_GDP = worldbank_all_df.drop(columns=['Average', 'Country Code'])
plotting_GDP.set_index('Country Name', inplace=True)


In [None]:
#Transpose DataFrame to have years as x and countries as y and create fig size
plotting_GDP = plotting_GDP.T

plt.figure(figsize=(14, 8))

In [None]:

# Plot each country's GDP
for country in plotting_GDP.columns:
    plt.plot(plotting_GDP.index, plotting_GDP[country], label=country)