In [2]:
import pandas as pd
from pathlib import Path

In [3]:
file_path = Path("Resources/gdp_data.csv") 


In [4]:
# Reload the dataset with correct header handling
df = pd.read_csv(file_path, skiprows=3)

# Drop unnecessary columns (e.g., 'Unnamed' columns if present)
df = df.dropna(axis=1, how='all')

# Rename columns for clarity
df.rename(columns={df.columns[0]: "Country Name", df.columns[1]: "Country Code",
                   df.columns[2]: "Indicator Name", df.columns[3]: "Indicator Code"}, inplace=True)

# Reshape the data to have 'Year' as a column instead of multiple year-based columns
df = df.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"], 
             var_name="Year", value_name="GDP per Capita (Constant 2015 US$)")

# Convert year and GDP per capita columns to numeric types
df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
df["GDP per Capita (Constant 2015 US$)"] = pd.to_numeric(df["GDP per Capita (Constant 2015 US$)"], errors="coerce")

# Drop rows with missing values in key columns
df_cleaned = df.dropna(subset=["Country Name", "Year", "GDP per Capita (Constant 2015 US$)"])

# Display cleaned dataset
df_cleaned.head()




Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,GDP per Capita (Constant 2015 US$)
1,Africa Eastern and Southern,AFE,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,1172.316285
3,Africa Western and Central,AFW,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,1110.513849
9,Argentina,ARG,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,7397.109655
13,Australia,AUS,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,19905.313467
14,Austria,AUT,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,11894.744085


In [5]:
# Remove unnecessary columns: "Indicator Name" and "Indicator Code"
df_cleaned = df_cleaned.drop(columns=["Indicator Name", "Indicator Code"], errors="ignore")

# Reset index to remove any unwanted numbering
df_cleaned.reset_index(drop=True, inplace=True)

# Display the updated dataset
df_cleaned.head()

Unnamed: 0,Country Name,Country Code,Year,GDP per Capita (Constant 2015 US$)
0,Africa Eastern and Southern,AFE,1960,1172.316285
1,Africa Western and Central,AFW,1960,1110.513849
2,Argentina,ARG,1960,7397.109655
3,Australia,AUS,1960,19905.313467
4,Austria,AUT,1960,11894.744085


In [6]:
# Filter the dataset to include only Canada and the USA
df_filtered = df_cleaned[df_cleaned["Country Name"].isin(["Canada", "United States"])]

# Display the filtered dataset
df_filtered.head()


Unnamed: 0,Country Name,Country Code,Year,GDP per Capita (Constant 2015 US$)
18,Canada,CAN,1960,15432.471783
138,United States,USA,1960,18991.544603
162,Canada,CAN,1961,15605.523223
289,United States,USA,1961,19108.935365
313,Canada,CAN,1962,16455.753516


In [None]:
# Filter the dataset for the years 2014 to 2024
GDP = df_filtered[(df_filtered["Year"] >= 2014) & (df_filtered["Year"] <= 2024)]
GDP.reset_index(drop=True, inplace=True)  # Removes the first column

# Display the first few rows of the filtered dataset
GDP.head()



Unnamed: 0,Country Name,Country Code,Year,GDP per Capita (Constant 2015 US$)
0,Canada,CAN,2014,43643.235647
1,United States,USA,2014,55817.563247
2,Canada,CAN,2015,43594.194105
3,United States,USA,2015,57040.208214
4,Canada,CAN,2016,43551.342602


In [9]:
# Format the "GDP per Capita (Constant 2015 US$)" column to two decimal places
GDP["GDP per Capita (Constant 2015 US$)"] = GDP["GDP per Capita (Constant 2015 US$)"].round(2)

# Display the updated dataset
GDP.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  GDP["GDP per Capita (Constant 2015 US$)"] = GDP["GDP per Capita (Constant 2015 US$)"].round(2)


Unnamed: 0,Country Name,Country Code,Year,GDP per Capita (Constant 2015 US$)
0,Canada,CAN,2014,43643.24
1,United States,USA,2014,55817.56
2,Canada,CAN,2015,43594.19
3,United States,USA,2015,57040.21
4,Canada,CAN,2016,43551.34


In [10]:
GDP.to_csv("Resources/Canada_USA_GDP_2014-2024.csv")