In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [125]:
df = pd.read_csv('../data/gini_2023.csv')
df = df.drop(["Indicator Name", "Indicator Code"], axis=1)
df = pd.melt(df, id_vars=["Country Name", "Country Code"])
df = df.rename(columns={"variable": "Year", "value": "Gini Index"})
df = df.dropna()

In [146]:
df['Year'] = pd.to_numeric(df['Year'], errors='coerce').astype('Int32')
df['Gini Index'] = pd.to_numeric(df['Gini Index'], errors='coerce').astype('float32')
df.sort_values(['Country Name', 'Year'], ascending=[True, False], inplace=True)
df = df[df['Year'] >= 2000]
df.head()

Unnamed: 0,Country Name,Country Code,Year,Gini Index
15965,Albania,ALB,2020,29.4
15699,Albania,ALB,2019,30.1
15433,Albania,ALB,2018,30.1
15167,Albania,ALB,2017,33.099998
14901,Albania,ALB,2016,33.700001


In [147]:
# Step 4: Group by 'Country Name' and get the first row for each group
latest_df = df.groupby('Country Name').first().reset_index()
latest_df

Unnamed: 0,Country Name,Country Code,Year,Gini Index
0,Albania,ALB,2020,29.400000
1,Algeria,DZA,2011,27.600000
2,Angola,AGO,2018,51.299999
3,Argentina,ARG,2021,42.000000
4,Armenia,ARM,2021,27.900000
...,...,...,...,...
151,Viet Nam,VNM,2020,36.799999
152,West Bank and Gaza,PSE,2016,33.700001
153,"Yemen, Rep.",YEM,2014,36.700001
154,Zambia,ZMB,2015,55.900002


In [148]:
import plotly.express as px

# Create basic choropleth map
fig = px.choropleth(
    latest_df,
    locations="Country Code",
    color="Gini Index",
    color_continuous_scale="inferno_r",
    hover_name="Country Name",
    hover_data=["Gini Index", "Year"],
    projection="natural earth",
    title="Gini Index by Country",
)

fig.update_traces(
    hovertemplate="<b>%{hovertext}</b><br>Gini Index: %{customdata[0]:.2f}<br>Year: %{customdata[1]}"
)
fig.show()