# Data Visualization Project

## Freights and Map



In [120]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import io
import requests
import psutil 

In [121]:
#data = pd.read_csv("countryPolicy.csv", low_memory=False)

# Read airways transportation data: 
#freight_mail = pd.read_csv("data/eustat/avia_gooc.tsv", sep='\t', header=0)
# Get data of interested years
#years=freight_mail.filter(regex='2020|2019').columns
# Split first columns into separated columns
#df = freight_mail.iloc[:,0].str.split(pat=',', n = 5,expand = True)\
#.rename(columns = {0:'unit',1:'tra_meas',2:'schedule',3:'tra_cov',4:'geo'})
# re-concat the dataset
#freight_mail = pd.concat([df,freight_mail[years]],axis = 1)

In [122]:
fm_df = pd.read_excel("data/MailFreight.xlsx",sheet_name="Sheet 1",header = 10, skiprows =0)

In [123]:
fm_df.dropna(axis = 1, how = 'all', inplace=True)
fm_df.dropna(axis = 0, how = 'any', inplace=True)
fm_df = fm_df.drop(index=1).reset_index(drop=True)

In [124]:
fm_df = fm_df.applymap(lambda x: np.NaN if x == ":" else x)

In [133]:
fm_df.loc[fm_df['TIME'] == 'Germany (until 1990 former territory of the FRG)','TIME'] = 'Germany'
fm_df = fm_df.set_index('TIME')

In [134]:
# data error in this country
fm_df = fm_df.drop(index="Malta")

### Total freights and mails transported

In [136]:
plot_data = np.sum(fm_df,axis = 0)

data = dict(type='scatter', 
            x=plot_data.index,
            y=plot_data.values)
# As you can start to notice both data and layout parameters have similar 'dict' structures

layout = dict(title=dict(text="Total amount of freigths and mails transfer within Europe",
                         font=dict(color='red'),
                         xref='paper',
                         x=0.5,
                         xanchor = 'center'),
              xaxis=dict(title='Months'),
              yaxis=dict(title='Tonnes')
             )

In [137]:
clear_figure = go.Figure(data=data,
                        layout=layout)

clear_figure.show()

A significant surged in Oct 2020 may due to companies stocking inventories up to prepare for holidays. Nope, it was the Malta

### Total freights and mails transported

In [138]:
top10 = fm_df.sum(axis = 1).sort_values(ascending = False)[:10].index

In [139]:
plot_data = fm_df.loc[top10,:]

# Example of some python skills needed to achieve more complex visualizations with multiple data objects inside a single figure

data = [dict(type='scatter',
                        y=plot_data.loc[country].values,
                        x=plot_data.loc[country].index,                       
                        mode='lines',
                        name=country
                    ) 
                for country in plot_data.index]

# As you can start to notice both data and layout parameters have similar 'dict' structures
layout = go.Layout(
            title={
                'text': "Top 10 countrys with highest amount of freigths and mails transfer within Europe",
                'y':0.93,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top',
                'font_size':30
                },
            xaxis=dict(title='Months'),
            yaxis=dict(title='Tonnes'),
            autosize=False,
            width=1150,
            height=600,
            template='plotly_dark',
            
        )

In [141]:
fig = go.Figure(data=data, layout=layout)

#update y axis text
fig.update_yaxes(title_text="Tonnes")
#add vertical line for covid beginning
fig.add_vline(x=1583200000000, line_width=1.7, line_dash="dash", line_color="lightslategray")
fig.add_annotation(text="Europe had become the epicentre of the pandemic",
                   xref="paper", 
                   yref="paper",
                   x=0.25, y=1, 
                   showarrow=False, 
                   font=dict(
                        size=12,
                        color="white"), 
                   bgcolor="#283442")
#add vertical line to summer beginning
fig.add_vline(x=1591000000000, line_width=1.7, line_dash="dash", line_color="lightslategray")
fig.add_annotation(text="Beginning of Summer Season",
                   xref="paper", 
                   yref="paper",
                   x=0.73, y=0.85, 
                   showarrow=False,
                   font=dict(
                        size=12,
                        color="white"), 
                   bgcolor="#283442")

fig.show()

In [142]:
fig.write_image("images/line_freights.svg")

In [13]:
c_code = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv")

In [14]:
# Read airways transportation data: 
df_passenger = pd.read_excel("data/AVIA_PAOC_passenger.xlsx",sheet_name="Sheet 1",header = 10, skiprows =0)

In [15]:
# Preprocessing
df_passenger.dropna(axis = 1, how = 'all', inplace=True)
df_passenger.dropna(axis = 0, how = 'any', inplace=True)
df_passenger = df_passenger.drop(index=1).reset_index(drop=True)
df_passenger = df_passenger.applymap(lambda x: np.NaN if x == ":" else x)
df_passenger = df_passenger.rename(columns = {'TIME':'country_code', 'TIME.1':'country_name'})

In [16]:
# Turkey have no data so we will drop this country
df_passenger = df_passenger.drop(index=34)
# Drop redundant columns
df_passenger = df_passenger.drop(columns='2021-01')
# 
df_passenger['country_code'] = df_passenger['country_code'].map(lambda x: 'GR' if x == "EL" else x)
df_passenger['country_code'] = df_passenger['country_code'].map(lambda x: 'GB' if x == "UK" else x)

In [17]:
c_code = pd.merge(df_passenger.iloc[:,:2],c_code[['alpha-2','alpha-3']],left_on='country_code', right_on='alpha-2')

In [18]:
df_passenger.head()

Unnamed: 0,country_code,country_name,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,...,2020-03,2020-04,2020-05,2020-06,2020-07,2020-08,2020-09,2020-10,2020-11,2020-12
0,BE,Belgium,2253327.0,2187148.0,2643934.0,3107492.0,3102755.0,3257996.0,3650171.0,3570657.0,...,1058082.0,17596.0,22975.0,109685.0,795201.0,935379.0,580349.0,472460.0,324998.0,
1,BG,Bulgaria,570956.0,536254.0,618037.0,678527.0,877009.0,1455157.0,1866085.0,1885574.0,...,293130.0,29572.0,77248.0,156857.0,438020.0,543762.0,404993.0,267838.0,134256.0,166890.0
2,CZ,Czechia,1015592.0,1011909.0,1269379.0,1434249.0,1540267.0,1925827.0,2171017.0,2196935.0,...,444914.0,4672.0,7663.0,37395.0,272997.0,377416.0,259224.0,120624.0,67046.0,103465.0
3,DK,Denmark,2233010.0,2260089.0,2652203.0,2884516.0,3075631.0,3401375.0,3640974.0,3416166.0,...,978517.0,30861.0,49903.0,171539.0,654988.0,726795.0,561537.0,,,
4,DE,Germany (until 1990 former territory of the FRG),14177333.0,13965763.0,17192573.0,19190820.0,20253183.0,21595042.0,22839831.0,22756833.0,...,6503935.0,265803.0,397715.0,1323571.0,4556602.0,5618984.0,4300025.0,3663114.0,1547855.0,1888478.0


In [19]:
# setup dataset of before and after pandemic
df_bf_pan = df_passenger.iloc[:,2:17] # 01-19 to 03-20
df_af_pan = df_passenger.iloc[:,17:] # 04-20 to 12-20

In [20]:
per_change = -100*(np.mean(df_af_pan,axis = 1) - np.mean(df_bf_pan,axis = 1)) / np.mean(df_bf_pan,axis = 1)

In [21]:
#fff5f0
#fee0d2
#fcbba1
#fc9272
#fb6a4a
#ef3b2c
#cb181d
#99000d

In [107]:
color_ = ['fff5f0','fee0d2','fcbba1','fc9272','fb6a4a','ef3b2c','cb181d','99000d']
data_choropleth = dict(type='choropleth',
                       locations=c_code['alpha-3'],  #There are three ways to 'merge' your data with the data pre embedded in the map
                       locationmode='ISO-3',
                       z=per_change,
                       text=c_code['country_name'],
                       
                       colorscale="ylorrd",
                       #colorbar_title = "Percentage in decrease",
                       #colorbar=dict(thickness=20, ticklen=3),
                       autocolorscale = False,
                       reversescale = False,
                       marker_line_color='white',
                       #showlegend = True,
                       #ids =  c_code['country_code']         
                      )

layout_choropleth = dict(geo=dict(scope='europe',  #default
                                  projection=dict(type='orthographic'),
                                  #countrycolor = 'darkgrey',
                                  landcolor = '#111111',
                                  #showcountries = True,
                                  showland=True,   # default = True                               
                                  lakecolor='#111111',
                                  showocean=True,   # default = False
                                  oceancolor='#111111',
                                  bgcolor='#111111',
                                  #center =dict(lat =48.86 ,lon=2.34)  , 
                                  resolution = 50,
                                  countrywidth = 0.5,
                                  showcoastlines = False
                                 ),
                        scene=dict(aspectratio=dict(x=4,y=1,z=10)),
                        width=1150,
                        height=600,
                        margin={"r":0,"t":0,"l":0,"b":0},
                        template="plotly_dark",
                        title={'text': "",
                                'y':0.93,
                                'x':0.5,
                                'xanchor': 'center',
                                'yanchor': 'top',
                                 'font_size':30}
                        )                    

In [108]:
fig_choropleth = go.Figure(data=data_choropleth, layout=layout_choropleth)
#fig_choropleth.update_scenes(aspectratio =dict(x=3,y=1,z=10))
fig_choropleth.add_trace(trace = dict(type="scattergeo", # view all scattergeo properties here: https://plot.ly/r/reference/#scattergeo
                                    locations = c_code['alpha-3'], 
                                    text =  c_code['country_code'],
                                    mode="text",
                                    textfont = dict(size= 6)),
                                    )
fig_choropleth.show()

In [109]:
fig_choropleth.write_image("images/fig1_dark.svg")