In [146]:
import pandas as pd
from IPython.display import display, HTML
from utilities.reader_tools.GenericDatasetReader import DataframeFromCSV

raw_population_df = DataframeFromCSV("population_total.csv")
raw_population_df

Unnamed: 0,country,year,population
0,China,2020.0,1.439324e+09
1,China,2019.0,1.433784e+09
2,China,2018.0,1.427648e+09
3,China,2017.0,1.421022e+09
4,China,2016.0,1.414049e+09
...,...,...,...
4180,United States,1965.0,1.997337e+08
4181,United States,1960.0,1.867206e+08
4182,United States,1955.0,1.716853e+08
4183,India,1960.0,4.505477e+08


In [147]:
# CUFFLINKS is not the standard anymore

# import cufflinks as cf
# cf.getThemes()
# cf.set_config_file(sharing="public", theme="pearl", offline=True)

# Best alternative:
import plotly.express as px

In [148]:
# Dataframe preparation

# Make the pivot table
raw_population_df.dropna(inplace=True)
population_df = raw_population_df.pivot(columns="country", 
                                        index="year", 
                                        values="population")

# Convert the index from float64 to int64 for better indexing performance and readability
population_df.index = population_df.index.astype(int)
population_df = population_df[["France", "Spain", "Italy"]]
population_df

country,France,Spain,Italy
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1955,43411367.0,29048395.0,48335578.0
1960,45673146.0,30402411.0,49699951.0
1965,48746652.0,32146263.0,51677257.0
1970,50763920.0,33883749.0,53518969.0
1975,52688576.0,35879209.0,55265284.0
1980,53868009.0,37698196.0,56349349.0
1985,55254848.0,38733876.0,56936774.0
1990,56666849.0,39202525.0,57048236.0
1995,57801892.0,39787419.0,57174408.0
2000,59015096.0,40824754.0,56692178.0


##### LINEPLOT

In [149]:
# We use px.line for interactive line charts. 
# Plotly Express automatically recognizes the Index as the X-axis 
# and the Columns as different lines.
fig = px.line(population_df, 
              title="Population Growth Over Time",
              labels={"year": "Year", "value": "Population", "variable": "Country"})

# Display the interactive plot
fig.show()

##### BARPLOT

In [150]:
population_df.loc[[2020]]

country,France,Spain,Italy
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020,65273511.0,46754778.0,60461826.0


In [151]:
country_colors = {"Italy": "green", 
                  "France" : "blue", 
                  "Spain":"red"}

fig = px.bar(population_df.loc[[2020]], 
             x=population_df.loc[[2020]].index,
             y=population_df.loc[[2020]].columns,
             barmode="group",
             title="Population in 2020",
             # Map the internal names (index, value, variable) to your desired text
             labels={
                "x": "Analysis Year", 
                "value": "Total Population", 
                "country": "Nation"
             },
             color_discrete_map=country_colors
            )

# Display the interactive plot
fig.show()

##### MULTIPLE BARPLOTS

In [152]:
multiple_bar = population_df.loc[[2018, 2019, 2020]]
multiple_bar

country,France,Spain,Italy
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,64990511.0,46692858.0,60627291.0
2019,65129728.0,46736776.0,60550075.0
2020,65273511.0,46754778.0,60461826.0


In [153]:
country_colors = {"Italy": "green", 
                  "France" : "blue", 
                  "Spain":"red"}

fig = px.bar(multiple_bar, 
             x=multiple_bar.index,
             y=multiple_bar.columns,
             barmode="group",
             title="Population",
             # Map the internal names (index, value, variable) to your desired text
             labels={
                "year": "Analysis Year", 
                "value": "Total Population", 
                "country": "Nation"
             },
             color_discrete_map=country_colors
            )

# Display the interactive plot
fig.show()

##### PIECHART

In [154]:
population_2020 = population_df.loc[[2020]]
population_2020

country,France,Spain,Italy
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020,65273511.0,46754778.0,60461826.0


In [155]:
fig = px.pie(population_2020, 
             values=population_2020.loc[2020],
             names=population_2020.columns,
             title="Population"
             )

fig.show()

##### BOXPLOT

In [168]:
population_italy = population_df[["Italy"]]
population_italy

country,Italy
year,Unnamed: 1_level_1
1955,48335578.0
1960,49699951.0
1965,51677257.0
1970,53518969.0
1975,55265284.0
1980,56349349.0
1985,56936774.0
1990,57048236.0
1995,57174408.0
2000,56692178.0


In [169]:
fig = px.box(population_italy, 
             title="Population",
             labels={"value": "Population"}
             )

fig.show()

In [172]:
country_colors = {"Italy": "green", 
                  "France" : "blue", 
                  "Spain":"red"}

fig = px.box(population_df, 
             title="Population",
             labels={"value": "Population"},
             color_discrete_map=country_colors
             )

fig.show()

##### HISTOGRAMS

In [173]:
population_italy = population_df[["Italy"]]
population_italy

country,Italy
year,Unnamed: 1_level_1
1955,48335578.0
1960,49699951.0
1965,51677257.0
1970,53518969.0
1975,55265284.0
1980,56349349.0
1985,56936774.0
1990,57048236.0
1995,57174408.0
2000,56692178.0


In [176]:
fig = px.histogram(population_italy, 
             title="Population",
             )

fig.show()

In [180]:
fig = px.histogram(population_df, 
             title="Population",
             labels={"count":"Count", "value":"Population"}
             )

fig.show()

##### SCATTER PLOT

In [183]:
fig = px.scatter(population_df, 
             title="Population",
             labels={"year":"Year", "value":"Population"}
             )

fig.show()