<a href="https://colab.research.google.com/github/LeilaMo/Effective_DataVisualization_using_PlotlyExpress/blob/main/Part2_Choropleth_Maps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing the Libraries

In [1]:
import pandas as pd
import numpy as np
import os
import PIL
import plotly.express as px
import plotly.graph_objects as go

### Mounting Google Drive in Google Colab

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
ls /content/drive/MyDrive/effective_data_visualization_using_plotly_express-main/data

all_stocks_5yr.csv     Iris.csv
apple_stock_price.csv  share-of-individuals-using-the-internet.csv
clean_auto_mpg.csv     tips.csv
gapminder_2007.csv     vgsales.csv
gapminder.csv          world_happiness_ranking_2019.csv


### **Readme**
Examples of Choropleth plots using various datasets have been presented in the following document!

Enjoy improving your visualization skills!


### Part 1 - Tips Dataset

In [4]:
df = pd.read_csv("/content/drive/MyDrive/effective_data_visualization_using_plotly_express-main/data/share-of-individuals-using-the-internet.csv")

In [5]:
df.head()

Unnamed: 0,Country,Code,Year,Individuals using the Internet (% of population)
0,Afghanistan,AFG,1990,0.0
1,Afghanistan,AFG,2001,0.004723
2,Afghanistan,AFG,2002,0.004561
3,Afghanistan,AFG,2003,0.087891
4,Afghanistan,AFG,2004,0.105809


In [6]:
df.shape

(6056, 4)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6056 entries, 0 to 6055
Data columns (total 4 columns):
 #   Column                                            Non-Null Count  Dtype  
---  ------                                            --------------  -----  
 0   Country                                           6056 non-null   object 
 1   Code                                              4912 non-null   object 
 2   Year                                              6056 non-null   int64  
 3   Individuals using the Internet (% of population)  6056 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 189.4+ KB


In [9]:
# Separate the data belong to 2016 from the whole dataset
df_2016 = df[df["Year"]==2016]

In [10]:
df_2016.head(3)

Unnamed: 0,Country,Code,Year,Individuals using the Internet (% of population)
16,Afghanistan,AFG,2016,10.595726
39,Albania,ALB,2016,66.363445
63,Algeria,DZA,2016,42.945527


In [23]:
plot = px.choropleth(
                     data_frame = df_2016, 
                     locations = "Country",
                     color = "Individuals using the Internet (% of population)", 
                     locationmode = "country names", 
                     hover_name = "Country",
                     title = "Internet usage for various countries in the world in 2016", 
                                         
)
plot.show()

### Part 2 - Gapminder Dataset

In [20]:
df1 = pd.read_csv("/content/drive/MyDrive/effective_data_visualization_using_plotly_express-main/data/gapminder_2007.csv")

In [21]:
df1.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,11,11,Afghanistan,Asia,2007,43.828,31889923,974.580338,AFG,4
1,23,23,Albania,Europe,2007,76.423,3600523,5937.029526,ALB,8
2,35,35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12
3,47,47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24
4,59,59,Argentina,Americas,2007,75.32,40301927,12779.37964,ARG,32


In [22]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142 entries, 0 to 141
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    142 non-null    int64  
 1   Unnamed: 0.1  142 non-null    int64  
 2   country       142 non-null    object 
 3   continent     142 non-null    object 
 4   year          142 non-null    int64  
 5   lifeExp       142 non-null    float64
 6   pop           142 non-null    int64  
 7   gdpPercap     142 non-null    float64
 8   iso_alpha     142 non-null    object 
 9   iso_num       142 non-null    int64  
dtypes: float64(2), int64(5), object(3)
memory usage: 11.2+ KB


In [27]:
plot = px.choropleth(
                     data_frame = df1, 
                     locations = "iso_alpha",
                     color = "lifeExp", 
                     locationmode = "ISO-3", 
                     hover_name = "country",
                     title = "Life expectancy for various countries in the world in 2007", 
                                         
)
plot.show()

### Part 3 - World happiness ranking 2019 Dataset

In [28]:
df2 = pd.read_csv("/content/drive/MyDrive/effective_data_visualization_using_plotly_express-main/data/world_happiness_ranking_2019.csv")

In [29]:
df2.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [30]:
df2.shape

(156, 9)

In [31]:
df2.columns

Index(['Overall rank', 'Country or region', 'Score', 'GDP per capita',
       'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption'],
      dtype='object')

In [34]:
plot = px.choropleth(
                     data_frame = df2, 
                     locations = 'Country or region',
                     color = 'Score', 
                     locationmode = "country names", 
                     hover_name = 'Country or region',
                     title = "World happiness ranking for various countries in the world in 2019", 
                                         
)
plot.show()