## The purpose of this notebook is to visualize how increase of internet usage increase the ecommerce sales and how this process will affect businesses which will not undertake digital transformation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

### Individuals using the Internet (% of population)
http://api.worldbank.org/v2/en/indicator/IT.NET.USER.ZS?downloadformat=csv

Checking the growth of title variable over 2 decades.
Selected countries Poland, Germany and European Union.

In [2]:
!wget "http://api.worldbank.org/v2/en/indicator/IT.NET.USER.ZS?downloadformat=csv" -O data.zip

--2020-09-06 16:07:19--  http://api.worldbank.org/v2/en/indicator/IT.NET.USER.ZS?downloadformat=csv
Resolving api.worldbank.org (api.worldbank.org)... 52.188.136.184
Connecting to api.worldbank.org (api.worldbank.org)|52.188.136.184|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 51376 (50K) [application/zip]
Saving to: ‘data.zip’


2020-09-06 16:07:20 (166 KB/s) - ‘data.zip’ saved [51376/51376]



In [3]:
!unzip -o data

Archive:  data.zip
  inflating: Metadata_Indicator_API_IT.NET.USER.ZS_DS2_en_csv_v2_1307380.csv  
  inflating: API_IT.NET.USER.ZS_DS2_en_csv_v2_1307380.csv  
  inflating: Metadata_Country_API_IT.NET.USER.ZS_DS2_en_csv_v2_1307380.csv  


### Meta Individuals using the Internet (% of population)

In [2]:
pd.read_csv("./Metadata_Indicator_API_IT.NET.USER.ZS_DS2_en_csv_v2_1307380.csv")["INDICATOR_NAME"][0]

'Individuals using the Internet (% of population)'

In [3]:
pd.read_csv("./Metadata_Indicator_API_IT.NET.USER.ZS_DS2_en_csv_v2_1307380.csv")["SOURCE_NOTE"][0]

'Internet users are individuals who have used the Internet (from any location) in the last 3 months. The Internet can be used via a computer, mobile phone, personal digital assistant, games machine, digital TV etc.'

In [4]:
source_df = pd.read_csv("./API_IT.NET.USER.ZS_DS2_en_csv_v2_1307380.csv")

In [29]:
YEARS = [str(x) for x in range(2000, 2019, 1)]
COLUMNS = ["Country Name", "Country Code", *YEARS]
df = source_df[COLUMNS]
# IMPORTANT not data for 2019

In [30]:
df.head()

Unnamed: 0,Country Name,Country Code,2000,2001,2002,2003,2004,2005,2006,2007,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Aruba,ABW,15.442823,17.1,18.8,20.8,23.0,25.4,28.0,30.9,...,58.0,62.0,69.0,74.0,78.9,83.78,88.661227,93.542454,97.17,
1,Afghanistan,AFG,,0.004723,0.004561,0.087891,0.105809,1.224148,2.107124,1.9,...,3.55,4.0,5.0,5.454545,5.9,7.0,8.26,11.2,13.5,
2,Angola,AGO,0.105046,0.136014,0.270377,0.370682,0.464815,1.143367,1.5,1.7,...,2.3,2.8,3.1,6.5,8.9,21.4,12.4,13.0,14.339079,
3,Albania,ALB,0.114097,0.325798,0.390081,0.9719,2.420388,6.043891,9.609991,15.036115,...,41.2,45.0,49.0,54.655959,57.2,60.1,63.252933,66.363445,71.847041,
4,Andorra,AND,10.538836,,11.260469,13.546413,26.837954,37.605766,48.936847,70.87,...,78.53,81.0,81.0,86.434425,94.0,95.9,96.91,97.930637,91.567467,


In [31]:
POLAND = "POL"
GERMANY = "DEU"
EUROPEN_UNION = "EUU"

In [32]:
poland_df = df[df["Country Code"] == POLAND]
# germany_df = df[df["Country Code"] == GERMANY]
# eu_df = df[df["Country Code"] == EUROPEN_UNION]

In [33]:
# decades 
# 2000-2009 (2000s)
# 2010-2019 (2010s) 
# def calculate_decade(df, years, name):
#     df[name] = df[years].mean(axis=1, skipna=True)

# s2000 = [str(x) for x in range(2000, 2010, 1)]
# s2010 = [str(x) for x in range(2010, 2020, 1)]


In [34]:
# calculate_decade(poland_df, s2000, '2000s')
# calculate_decade(poland_df, s2010, '2010s')

# calculate_decade(germany_df, s2000, '2000s')
# calculate_decade(germany_df, s2010, '2010s')

# calculate_decade(eu_df, s2000, '2000s')
# calculate_decade(eu_df, s2010, '2010s')

In [35]:
# poland_df['2000s'].values[0], poland_df['2010s'].values[0]
# germany_df['2000s'].values[0], germany_df['2010s'].values[0]
# eu_df['2000s'].values[0], eu_df['2010s'].values[0]

In [36]:
poland_df[YEARS]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
188,7.285429,9.90067,21.15,24.87,32.53,38.81,44.58,48.6,53.13,58.97,62.32,61.949999,62.309997,62.8492,66.6,67.997,73.300704,75.985366,77.541735


In [57]:
fig = go.Figure()
 

fig = go.Figure(data=[

    
    go.Scatter(mode="lines+text", line={"color": "royalblue"}, name='average 2000s %',
               text=['{0}%'.format(int(x)) for x in poland_df[YEARS].values[0]],
               textposition="top left",
               x=YEARS, 
               y=poland_df[YEARS].values[0])
])

fig.update_layout(
    title="Constat increase of internet users in Poland over 2 last decades.",
    yaxis=dict(
        title="Individuals using the Internet (% of population)",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        ),
        side="left",
        overlaying="y"
    ),
    xaxis = dict(
      title = 'Years',
      showticklabels = True,
      gridwidth = 1
   ),
)
fig.show()

### EU E-commerce sales data
#### https://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=isoc_ec_eseln2&lang=en
You have to download data set manually through application interface. There is not direct url to trigger.
"DATASET: E-commerce sales [isoc_ec_eseln2]"
"LAST UPDATE: 24.02.20 14:13:04"
"EXTRACTION DATE: 08.09.20 11:03:09"
"SOURCE OF DATA: Eurostat"

In [13]:
!unzip -o isoc_ec_eseln2.zip

Archive:  isoc_ec_eseln2.zip
  inflating: isoc_ec_eseln2_Label.csv  
  inflating: isoc_ec_eseln2_1_Data.csv  


In [39]:
ecommerce_sales_df = pd.read_csv("isoc_ec_eseln2_1_Data.csv")

In [40]:
ecommerce_sales_df

Unnamed: 0,TIME,GEO,INDIC_IS,UNIT,SIZEN_R2,Value,Flag and Footnotes
0,2009,European Union - 27 countries (from 2020),Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",:,
1,2009,European Union - 27 countries (from 2020),Enterprises with e-commerce sales,Percentage of enterprises,"Small enterprises (10-49 persons employed), wi...",:,
2,2009,European Union - 27 countries (from 2020),Enterprises with e-commerce sales,Percentage of enterprises,"Medium enterprises (50-249 persons employed), ...",:,
3,2009,European Union - 27 countries (from 2020),Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",12,
4,2009,European Union - 27 countries (from 2020),Enterprises with e-commerce sales,Percentage of enterprises,"Small enterprises (10-49 persons employed), wi...",11,
...,...,...,...,...,...,...,...
74410,2019,Bosnia and Herzegovina,Enterprises with web sales via e-commerce mark...,Percentage of enterprises receiving e-commerce...,"All enterprises, without financial sector (10 ...",:,
74411,2019,Bosnia and Herzegovina,Enterprises with web sales via e-commerce mark...,Percentage of enterprises receiving e-commerce...,"Small enterprises (10-49 persons employed), wi...",:,
74412,2019,Bosnia and Herzegovina,Enterprises with web sales via e-commerce mark...,Percentage of enterprises receiving e-commerce...,"Medium enterprises (50-249 persons employed), ...",:,
74413,2019,Bosnia and Herzegovina,Enterprises with web sales via e-commerce mark...,Percentage of enterprises receiving e-commerce...,"All enterprises, without financial sector (10 ...",:,


In [67]:
mask1 = ecommerce_sales_df["GEO"] == "Poland"
mask2 = ecommerce_sales_df["INDIC_IS"] == "Enterprises with e-commerce sales"
mask3 = ecommerce_sales_df["UNIT"] == "Percentage of enterprises"
mask4 = ecommerce_sales_df["SIZEN_R2"] == "All enterprises, without financial sector (10 persons employed or more)"
mask5 = ecommerce_sales_df["TIME"] != 2009
# excluded 2009 not enough data
ecommerce_sales_df_poland = ecommerce_sales_df[mask1 & mask2 & mask3 & mask4 & mask5]

In [68]:
ecommerce_sales_df_poland

Unnamed: 0,TIME,GEO,INDIC_IS,UNIT,SIZEN_R2,Value,Flag and Footnotes
11055,2010,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",9,
17820,2011,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",10,
24585,2012,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",11,
31350,2013,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",11,
38115,2014,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",12,
44880,2015,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",12,
51645,2016,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",12,
58410,2017,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",12,
65175,2018,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",14,
71940,2019,Poland,Enterprises with e-commerce sales,Percentage of enterprises,"All enterprises, without financial sector (10 ...",16,


In [69]:
import plotly.express as px
fig1 = go.Figure()


fig1 = px.line(ecommerce_sales_df_poland, x="TIME", y="Value")
fig1.update_layout(
    title="Poland ecommerce sales",
    yaxis=dict(
        title="Enterprises with e-commerce sales",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        ),
        side="left",
        overlaying="y"
    ),
    xaxis = dict(
      title = 'Years',
      showgrid = True,
      zeroline = True,
      showline = True,
      showticklabels = True,
      gridwidth = 1
   ),
)
fig1.show()

### Social Media Used By Purpose
#### https://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=isoc_cismp&lang=en
You have to download data set manually through application interface.
"DATASET: Social media use by purpose   [isoc_cismp]"
"LAST UPDATE: 24.02.20 14:13:07"
"EXTRACTION DATE: 08.09.20 12:21:29"
"SOURCE OF DATA: Eurostat"

In [68]:
!unzip -o isoc_cismp.zip

Archive:  isoc_cismp.zip
  inflating: isoc_cismp_Label.csv    
  inflating: isoc_cismp_1_Data.csv   


In [44]:
social_media_used_by_purpose_df = pd.read_csv("isoc_cismp_1_Data.csv")

In [45]:
social_media_used_by_purpose_df

Unnamed: 0,TIME,GEO,INDIC_IS,UNIT,SIZEN_R2,Value,Flag and Footnotes
0,2013,European Union - 27 countries (from 2020),"Website has online ordering, reservation or bo...",Percentage of enterprises,"All enterprises, without financial sector (10 ...",5,
1,2013,European Union - 27 countries (from 2020),"Website has online ordering, reservation or bo...",Percentage of enterprises,"Small enterprises (10-49 persons employed), wi...",5,
2,2013,European Union - 27 countries (from 2020),"Website has online ordering, reservation or bo...",Percentage of enterprises,"Medium enterprises (50-249 persons employed), ...",8,
3,2013,European Union - 27 countries (from 2020),"Website has online ordering, reservation or bo...",Percentage of enterprises,"SMEs (10-249 persons employed), without financ...",5,
4,2013,European Union - 27 countries (from 2020),"Website has online ordering, reservation or bo...",Percentage of enterprises,Large enterprises (250 persons employed or mor...,13,
...,...,...,...,...,...,...,...
15995,2019,Bosnia and Herzegovina,Collaborate with business partners (e.g. suppl...,Percentage of the enterprises with internet ac...,"All enterprises, without financial sector (10 ...",24,
15996,2019,Bosnia and Herzegovina,Collaborate with business partners (e.g. suppl...,Percentage of the enterprises with internet ac...,"Small enterprises (10-49 persons employed), wi...",23,
15997,2019,Bosnia and Herzegovina,Collaborate with business partners (e.g. suppl...,Percentage of the enterprises with internet ac...,"Medium enterprises (50-249 persons employed), ...",28,
15998,2019,Bosnia and Herzegovina,Collaborate with business partners (e.g. suppl...,Percentage of the enterprises with internet ac...,"SMEs (10-249 persons employed), without financ...",24,


In [46]:
mask1 = social_media_used_by_purpose_df["GEO"] == "Poland"
mask2 = social_media_used_by_purpose_df["INDIC_IS"] == "Develop the enterprise's image or market products"
mask3 = social_media_used_by_purpose_df["UNIT"] == "Percentage of enterprises"
mask4 = social_media_used_by_purpose_df["SIZEN_R2"] == "All enterprises, without financial sector (10 persons employed or more)"
social_media_used_by_purpose_df_poland = social_media_used_by_purpose_df[mask1 & mask2 & mask3 & mask4]

In [47]:
social_media_used_by_purpose_df_poland

Unnamed: 0,TIME,GEO,INDIC_IS,UNIT,SIZEN_R2,Value,Flag and Footnotes
2520,2013,Poland,Develop the enterprise's image or market products,Percentage of enterprises,"All enterprises, without financial sector (10 ...",15,
6520,2015,Poland,Develop the enterprise's image or market products,Percentage of enterprises,"All enterprises, without financial sector (10 ...",21,
10520,2017,Poland,Develop the enterprise's image or market products,Percentage of enterprises,"All enterprises, without financial sector (10 ...",25,
14520,2019,Poland,Develop the enterprise's image or market products,Percentage of enterprises,"All enterprises, without financial sector (10 ...",32,


In [48]:
import plotly.express as px
fig1 = go.Figure()


fig1 = px.line(social_media_used_by_purpose_df_poland, x="TIME", y="Value")
fig1.update_layout(
    title="Social Media Used to Develop the enterprise's image or market products in Poland",
    yaxis=dict(
        title="Percentage of enterprises",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        ),
        side="left",
        overlaying="y"
    ),
    xaxis = dict(
      title = 'Years',
      showgrid = True,
      zeroline = True,
      showline = True,
      showticklabels = True,
      gridwidth = 1
   ),
)
fig1.show()

In [131]:
import plotly.express as px
fig2 = go.Figure()

fig2 = go.Figure(data=[
    go.Scatter(mode="lines", 
               line={"color": color_2000s}, 
               name="Social Media Used to Develop the enterprise's image or market products", 
               x=social_media_used_by_purpose_df_poland['TIME'].values, 
               y=social_media_used_by_purpose_df_poland['Value'].values),
    go.Scatter(mode="lines", 
           line={"color": color_2010s}, name="Enterprises with e-commerce sales", 
           x=ecommerce_sales_df_poland['TIME'].values, 
           y=ecommerce_sales_df_poland['Value'].values),
])

fig2.update_layout(
    title="Usage of e-commerce and social media to increase sales (Poland)",
    yaxis=dict(
        title="% of enterprises",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        ),
        side="left",
        overlaying="y"
    ),
    xaxis = dict(
      title = 'Years',
      showgrid = True,
      zeroline = True,
      showline = True,
      showticklabels = True,
      gridwidth = 1
   ),
)
fig2.show()

In [132]:
fig3 = go.Figure()

#  "Enterprises with e-commerce sales"

fig3 = go.Figure(data=[

    go.Scatter(mode="lines+text", line={"color": "royalblue"}, name='Internet Users',
               text=['{0}%'.format(int(x)) for x in poland_df[YEARS].values[0]],
               textposition="top left",
               yaxis="y1",
               x=YEARS, 
               y=poland_df[YEARS].values[0]),
    go.Scatter(mode='markers',
               name='Enterprises',
               yaxis="y2",
               x=social_media_used_by_purpose_df_poland['TIME'].values, 
               y=social_media_used_by_purpose_df_poland['Value'].values, 
               marker=dict(size=[int(x) for x in social_media_used_by_purpose_df_poland['Value'].values],
                          color="red")
    )
])

fig3.update_layout(
 
    title="Growth of internet users along with the increased use of social media by enterprises for marketing (Poland)",
    yaxis=dict(
        title="Individuals using the Internet (% of population)",
        titlefont=dict(
            color="royalblue"
        ),
        tickfont=dict(
            color="royalblue"
        )
    ),
    yaxis2=dict(
        title="social media used by companies (% of enterprises)",
        titlefont=dict(
            color="red"
        ),
        tickfont=dict(
            color="red"
        ),
        anchor="free",
        overlaying="y",
        side="right",
        position=0.96
    ),
    xaxis = dict(
      title = 'Years',
      showticklabels = True,
      gridwidth = 1
   ),
)
fig3.show()