In [1]:
import pandas as pd
from pathlib import Path

In [6]:
file_path = Path("Resources/gdp_data.csv") 


In [15]:
# Reload the dataset with correct header handling
df = pd.read_csv(file_path, skiprows=3)

# Drop unnecessary columns (e.g., 'Unnamed' columns if present)
df = df.dropna(axis=1, how='all')

# Rename columns for clarity
df.rename(columns={df.columns[0]: "Country Name", df.columns[1]: "Country Code",
                   df.columns[2]: "Indicator Name", df.columns[3]: "Indicator Code"}, inplace=True)

# Reshape the data to have 'Year' as a column instead of multiple year-based columns
df = df.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"], 
             var_name="Year", value_name="GDP per Capita (Constant 2015 US$)")

# Convert year and GDP per capita columns to numeric types
df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
df["GDP per Capita (Constant 2015 US$)"] = pd.to_numeric(df["GDP per Capita (Constant 2015 US$)"], errors="coerce")

# Drop rows with missing values in key columns
df_cleaned = df.dropna(subset=["Country Name", "Year", "GDP per Capita (Constant 2015 US$)"])

# Display cleaned dataset
df_cleaned.head()




Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,GDP per Capita (Constant 2015 US$)
1,Africa Eastern and Southern,AFE,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,1172.316285
3,Africa Western and Central,AFW,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,1110.513849
9,Argentina,ARG,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,7397.109655
13,Australia,AUS,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,19905.313467
14,Austria,AUT,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,11894.744085


In [16]:
# Filter the dataset to include only Canada and the USA
df_filtered = df_cleaned[df_cleaned["Country Name"].isin(["Canada", "United States"])]

# Display the filtered dataset
df_filtered.head()


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,GDP per Capita (Constant 2015 US$)
35,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,15432.471783
251,United States,USA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1960,18991.544603
301,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1961,15605.523223
517,United States,USA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1961,19108.935365
567,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,1962,16455.753516


In [17]:
# Filter the dataset for the years 2014 to 2024
GDP = df_filtered[(df_filtered["Year"] >= 2014) & (df_filtered["Year"] <= 2024)]

# Display the first few rows of the filtered dataset
GDP.head()



Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,GDP per Capita (Constant 2015 US$)
14399,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2014,43643.235647
14615,United States,USA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2014,55817.563247
14665,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2015,43594.194105
14881,United States,USA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2015,57040.208214
14931,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2016,43551.342602


In [18]:
# Format the "GDP per Capita (Constant 2015 US$)" column to two decimal places
GDP["GDP per Capita (Constant 2015 US$)"] = GDP["GDP per Capita (Constant 2015 US$)"].round(2)

# Display the updated dataset
GDP.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  GDP["GDP per Capita (Constant 2015 US$)"] = GDP["GDP per Capita (Constant 2015 US$)"].round(2)


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,GDP per Capita (Constant 2015 US$)
14399,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2014,43643.24
14615,United States,USA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2014,55817.56
14665,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2015,43594.19
14881,United States,USA,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2015,57040.21
14931,Canada,CAN,GDP per capita (constant 2015 US$),NY.GDP.PCAP.KD,2016,43551.34


In [19]:
GDP.to_csv("Resources/Canada_USA_GDP_2014-2024.csv")