In [1]:
# import libraries
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
init_notebook_mode(connected = True)
import cufflinks as cf
cf.go_offline()

In [2]:
# Set sample dataset, World Happines Dataset from Kaggle
whp = pd.read_csv('archive/2016.csv')

# Call info() on you dataframe
whp.info()

# Check the head of the dataframe
whp.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country                        157 non-null    object 
 1   Region                         157 non-null    object 
 2   Happiness Rank                 157 non-null    int64  
 3   Happiness Score                157 non-null    float64
 4   Lower Confidence Interval      157 non-null    float64
 5   Upper Confidence Interval      157 non-null    float64
 6   Economy (GDP per Capita)       157 non-null    float64
 7   Family                         157 non-null    float64
 8   Health (Life Expectancy)       157 non-null    float64
 9   Freedom                        157 non-null    float64
 10  Trust (Government Corruption)  157 non-null    float64
 11  Generosity                     157 non-null    float64
 12  Dystopia Residual              157 non-null    flo

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939
1,Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463
2,Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137
3,Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465
4,Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596


In [3]:
# Change datatype using for loop along with .astype() method
# Convert to string 'str'
for col in whp.columns:
    whp[col] = whp[col].astype(str)
    
# Check dataframe again
whp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   Country                        157 non-null    object
 1   Region                         157 non-null    object
 2   Happiness Rank                 157 non-null    object
 3   Happiness Score                157 non-null    object
 4   Lower Confidence Interval      157 non-null    object
 5   Upper Confidence Interval      157 non-null    object
 6   Economy (GDP per Capita)       157 non-null    object
 7   Family                         157 non-null    object
 8   Health (Life Expectancy)       157 non-null    object
 9   Freedom                        157 non-null    object
 10  Trust (Government Corruption)  157 non-null    object
 11  Generosity                     157 non-null    object
 12  Dystopia Residual              157 non-null    object
dtypes: ob

In [4]:
# Create a 'text' column for the World Happiness Dataset, to summerize
whp['text'] = whp['Country'] + '<br>' +\
'Happiness Rank '+whp['Happiness Rank']+'<br>' +\
'Happiness Score '+whp['Happiness Score']+'<br>'+\
'Freedom '+whp['Freedom']+'<br>' +\
'Generosity ' + whp['Generosity']+'<br>'+\
'Trust (Government Corruption) '+whp['Trust (Government Corruption)']
whp.head()

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,text
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939,Denmark<br>Happiness Rank 1<br>Happiness Score...
1,Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463,Switzerland<br>Happiness Rank 2<br>Happiness S...
2,Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,Iceland<br>Happiness Rank 3<br>Happiness Score...
3,Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465,Norway<br>Happiness Rank 4<br>Happiness Score ...
4,Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596,Finland<br>Happiness Rank 5<br>Happiness Score...


In [5]:
# Create line plots for Happiness Rank Vs Health / Freedom / Trust. * Do you think people living in happier countries have better health?
whp.iplot(
    x='Happiness Rank',
    y=['Health (Life Expectancy)','Freedom','Trust (Government Corruption)'],
    xTitle='Happiness Rank',
    yTitle='Index',
    title='2016 Happiness Rank Vs Generosity / Freedom / Trust '
)

In [6]:
# Recreate above plot with kind = 'bar'
whp.iplot(
    kind = 'bar',
    x='Country',
    y='Trust (Government Corruption)',
    xTitle='Country',
    yTitle='Trust on Governemnt',
    title='2016 Happiness Rank, COuntry Vs Trust '
)

In [7]:
# Create a “choropleth” for happiness score that displays the data from the newly generated ‘text’ column when hover.
# create a data dictionary
data = dict(type = 'choropleth', # type of plot
           colorscale = 'Portland', # can be 'Blues', 'Earth', ...
           locations = whp['Country'], # Get the country column
           locationmode = 'country names', # the level of map
           z = whp['Happiness Score'], # the color scale
           text = whp['text'], # text column from the dataframe
           colorbar = {'title':'Happiness Score'}, # Title for color bar
           #reversescale = True, # in-case you want the reverse scale! default is False
           )

# Setting the layout
layout = dict(title = 'Happiness Score',
             geo = dict(showframe = False,projection = {'type':'mercator'})
             )

choropleth_map = go.Figure(data = [data],layout = layout)
iplot(choropleth_map, validate=False)

2012 US Election Data

In [8]:
# importing dataset using pandas
usdf = pd.read_csv('2012_Election_Data.csv')

usdf.info()

usdf.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 17 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Year                              51 non-null     int64  
 1   ICPSR State Code                  51 non-null     int64  
 2   Alphanumeric State Code           51 non-null     int64  
 3   State                             51 non-null     object 
 4   VEP Total Ballots Counted         41 non-null     object 
 5   VEP Highest Office                51 non-null     object 
 6   VAP Highest Office                51 non-null     object 
 7   Total Ballots Counted             41 non-null     object 
 8   Highest Office                    51 non-null     object 
 9   Voting-Eligible Population (VEP)  51 non-null     object 
 10  Voting-Age Population (VAP)       51 non-null     float64
 11  % Non-citizen                     51 non-null     object 
 12  Prison    

Unnamed: 0,Year,ICPSR State Code,Alphanumeric State Code,State,VEP Total Ballots Counted,VEP Highest Office,VAP Highest Office,Total Ballots Counted,Highest Office,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv
0,2012,41,1,Alabama,,58.6%,56.0%,,2074338,3539217,3707440.0,2.6%,32232,57993,8616,71584,AL
1,2012,81,2,Alaska,58.9%,58.7%,55.3%,301694.0,300495,511792,543763.0,3.8%,5633,7173,1882,11317,AK
2,2012,61,3,Arizona,53.0%,52.6%,46.5%,2323579.0,2306559,4387900,4959270.0,9.9%,35188,72452,7460,81048,AZ
3,2012,42,4,Arkansas,51.1%,50.7%,47.7%,1078548.0,1069468,2109847,2242740.0,3.5%,14471,30122,23372,53808,AR
4,2012,71,5,California,55.7%,55.1%,45.1%,13202158.0,13038547,23681837,28913129.0,17.4%,119455,0,89287,208742,CA


In [9]:
# create the plot with kind is bar, x is State and y is Voting-Age Population (VAP)
usdf.iplot(x = 'State Abv', y = 'Voting-Age Population (VAP)', kind = 'bar')

In [10]:
# create a plot that displays the Voting-Age Population (VAP) per state. 
# To play around with other columns, make sure you consider their data type. 
# Hint: * In the marker (data dictionary) set the line color to white and its 
# thickness 2 between the states. * Set showlakes = True in the layout

data = dict( type='choropleth',
            #colorscale = 'Viridis',#'Hot','Jet','Earth'.....
            #reversescale = True,
            locations = usdf['State Abv'],
            locationmode = 'USA-states',
            z = usdf['Voting-Age Population (VAP)'],
            text = usdf['State'],
            # setting the thickness and color of the line in marker
            marker = dict(line = dict(color = 'rgb(255,255,255)',width = 2)),
            colorbar = {'title':"Voting-Age Population (VAP)"}
           )

layout = dict(title = '2012 General Election Voting Data',
              geo = dict(scope='usa',
                         showlakes = True,
                         lakecolor = 'rgb(85,173,240)')
             )
choropleth_map = go.Figure(data = [data],layout = layout)
iplot(choropleth_map,validate=False)

A dataset on Alcohol Consumption by Country

In [11]:
# read dataset
alcoDf = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2010_alcohol_consumption_by_country.csv')
alcoDf.info()
alcoDf.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191 entries, 0 to 190
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   location  191 non-null    object 
 1   alcohol   191 non-null    float64
dtypes: float64(1), object(1)
memory usage: 3.1+ KB


Unnamed: 0,location,alcohol
0,Belarus,17.5
1,Moldova,16.8
2,Lithuania,15.4
3,Russia,15.1
4,Romania,14.4


In [12]:
# create the bar plot 
alcoDf.iplot(kind = 'bar', x = 'location', y ='alcohol', xTitle = 'Country Name', yTitle = 'Alcohol Consumption', title = 'Alcohol Consumption by Country in 2010')

In [13]:
#create Geogrophical plot, a Choropleth Plot of the Alcohol Consumption for Countries using the data and layout dictionary
data = dict( type = 'choropleth',
            colorscale = 'Viridis',
            reversescale = True,
            locations = alcoDf['location'],
            locationmode = "country names",
            z = alcoDf['alcohol'],
            text = alcoDf['location'],
            colorbar = {'title' : 'Alcohol Consumption'},
           )

layout = dict( title = '2010 Alcohol Consumption',
              geo = dict(
                  #showframe = False,
                  projection = {'type':'robinson'})#Mercator, orthographic, natural earth,miller
             )
choropleth_map = go.Figure(data = [data],layout = layout)
iplot(choropleth_map,validate=False)

##### references
Dataset: 
https://www.kaggle.com/datasets/unsdsn/world-happiness
https://github.com/plotly/datasets/blob/master/2010_alcohol_consumption_by_country.csv

Code:
https://plotly.com/python/reference/#choropleth
