In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from urllib.request import urlopen
import json
pio.renderers.default = "colab"

In [2]:
training = pd.read_csv("train_users_2.csv")
training['Year'] = training['date_account_created'].apply(lambda x: x.split('-')[0]).astype('int')
training['country'] = training['country_destination']
training = training[['id','country','Year']]
training

Unnamed: 0,id,country,Year
0,gxn3p5htnn,NDF,2010
1,820tgsjxq7,NDF,2011
2,4ft3gnwmtx,US,2010
3,bjjt8pjhuk,other,2011
4,87mebub9p4,US,2010
...,...,...,...
213446,zxodksqpep,NDF,2014
213447,mhewnxesx9,NDF,2014
213448,6o3arsjbb4,NDF,2014
213449,jh95kwisub,NDF,2014


In [3]:
path = 'iv.csv'

df = pd.read_csv(path)
df

Unnamed: 0,id,country,Year
0,5uwns89zht,NDF,2014
1,jtl0dijy2j,NDF,2014
2,xx0ulgorjt,NDF,2014
3,6c6puo6ix0,NDF,2014
4,czqhjk3yfe,US,2014
...,...,...,...
62091,cv0na2lf5a,NDF,2014
62092,zp8xfonng8,NDF,2014
62093,fa6260ziny,NDF,2014
62094,87k0fy4ugm,NDF,2014


In [4]:
with urlopen('https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.json') as response:
    country_code_iso = json.load(response)
    pass

def country_process(x):
    
    for y in country_code_iso:
        if(y['alpha-2'].lower()==x.lower()):
            return y['alpha-3']
            pass
        pass
    
    return 'other'
    
    pass

In [5]:
df['country_iso'] = df['country'].apply(country_process)
df['country_iso'].unique()
df

Unnamed: 0,id,country,Year,country_iso
0,5uwns89zht,NDF,2014,other
1,jtl0dijy2j,NDF,2014,other
2,xx0ulgorjt,NDF,2014,other
3,6c6puo6ix0,NDF,2014,other
4,czqhjk3yfe,US,2014,USA
...,...,...,...,...
62091,cv0na2lf5a,NDF,2014,other
62092,zp8xfonng8,NDF,2014,other
62093,fa6260ziny,NDF,2014,other
62094,87k0fy4ugm,NDF,2014,other


In [6]:
training['country_iso'] = training['country'].apply(country_process)
training['country_iso'].unique()
training

Unnamed: 0,id,country,Year,country_iso
0,gxn3p5htnn,NDF,2010,other
1,820tgsjxq7,NDF,2011,other
2,4ft3gnwmtx,US,2010,USA
3,bjjt8pjhuk,other,2011,other
4,87mebub9p4,US,2010,USA
...,...,...,...,...
213446,zxodksqpep,NDF,2014,other
213447,mhewnxesx9,NDF,2014,other
213448,6o3arsjbb4,NDF,2014,other
213449,jh95kwisub,NDF,2014,other


In [7]:
df2 = training.append(df)
df2

Unnamed: 0,id,country,Year,country_iso
0,gxn3p5htnn,NDF,2010,other
1,820tgsjxq7,NDF,2011,other
2,4ft3gnwmtx,US,2010,USA
3,bjjt8pjhuk,other,2011,other
4,87mebub9p4,US,2010,USA
...,...,...,...,...
62091,cv0na2lf5a,NDF,2014,other
62092,zp8xfonng8,NDF,2014,other
62093,fa6260ziny,NDF,2014,other
62094,87k0fy4ugm,NDF,2014,other


In [8]:
df_year = df2.groupby(['Year','country_iso'])['id'].count().reset_index(name='counts')
df_year

Unnamed: 0,Year,country_iso,counts
0,2010,AUS,7
1,2010,CAN,42
2,2010,DEU,16
3,2010,ESP,43
4,2010,FRA,120
5,2010,GBR,28
6,2010,ITA,30
7,2010,NLD,11
8,2010,PRT,1
9,2010,USA,1228


In [9]:
# visualization of the result 1: max scale = 1000, link to see the graph: https://ethereal-paladin.github.io/iv-rut15/iv1.html
start_year = int(df_year['Year'].min())

data_slider = []

for year in df_year['Year'].unique():
    
    df_segmented =  df_year[(df_year['Year']== year)]
    data_each_yr = dict(
                        type = 'choropleth',
                        locations = df_segmented['country_iso'],
                        z = df_segmented['counts'],
                        zmax=1000,
                        zmin=1,
                        #text = df_segmented['country_iso'],
                        locationmode='ISO-3',
                        colorscale = 'YlGnBu',
                        colorbar= {'title':'Number of new users'})
    data_slider.append(data_each_yr)
    pass

steps = []

for i in range(len(data_slider)):
    
    step = dict(method='restyle',args=['visible', [False] * len(data_slider)],label='Year {}'.format(i + start_year))
    step['args'][1][i] = True
    steps.append(step)
    pass

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]
layout = dict(title ='Graph 1: First destination of new users each year in each area (analyze the area except US)', geo=dict(scope='world',projection={'type': 'equirectangular'}),sliders=sliders)
fig = dict(data=data_slider, layout=layout)
fig = go.Figure(fig)
# plotly.offline.iplot(fig)
fig.show()
fig.write_html("iv1.html")


In [10]:
# visualization of the result 2: max scale = 50000, link to see the graph: https://ethereal-paladin.github.io/iv-rut15/iv2.html
start_year = int(df_year['Year'].min())

data_slider = []

for year in df_year['Year'].unique():
    
    df_segmented =  df_year[(df_year['Year']== year)]
    data_each_yr = dict(
                        type = 'choropleth',
                        locations = df_segmented['country_iso'],
                        z = df_segmented['counts'],
                        zmax=50000,
                        zmin=1,
                        #text = df_segmented['country_iso'],
                        locationmode='ISO-3',
                        colorscale = 'YlGnBu',
                        colorbar= {'title':'Number of new users'})
    data_slider.append(data_each_yr)
    pass

steps = []

for i in range(len(data_slider)):
    
    step = dict(method='restyle',args=['visible', [False] * len(data_slider)],label='Year {}'.format(i + start_year))
    step['args'][1][i] = True
    steps.append(step)
    pass

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]
layout = dict(title ='Graph 2: First destination of new users each year in each area (analyze the area in US)', geo=dict(scope='world',projection={'type': 'equirectangular'}),sliders=sliders)
fig = dict(data=data_slider, layout=layout)
fig = go.Figure(fig)
# plotly.offline.iplot(fig)
fig.show()
fig.write_html("iv2.html")