In [None]:
import json
import pandas as pd
import numpy as np
import plotly
import plotly.express as px
import plotly.graph_objects as go

In [None]:
churn_user_profile = pd.read_csv('Churn_UsersProfile.txt', delimiter = "|", encoding= 'unicode_escape')

churn_user_profile_copy = churn_user_profile.copy()

aggr_cols = [
        'Region_of_Origin',
        'District_of_Origin', 
        'County_of_Origin',
        'GenderDescription',
        'AgeClassDescription'
]

def aggregate_relative_change(churn_user_profile_copy, aggr_cols):
    tmp = churn_user_profile_copy.merge(
        churn_user_profile_copy, on=
            aggr_cols, how='left'
    )

    # Keep rows where period_x is different from period_y, and period_x is in 2019
    tmp = tmp[tmp.Period_x != tmp.Period_y]
    tmp = tmp[tmp.Period_x == 'Sep-19 to Feb-20']

    tmp['Relative_Change_AverageBusUsers'] = (tmp['Average_BusUsers_per_Day_y'] - tmp['Average_BusUsers_per_Day_x'])/(tmp['Average_BusUsers_per_Day_x'])
    
    return tmp

tmp = aggregate_relative_change(churn_user_profile_copy, aggr_cols)
daily_users_changes = tmp
tmp_mean = daily_users_changes.groupby('County_of_Origin')['Relative_Change_AverageBusUsers'].mean().reset_index()
tmp_median = daily_users_changes.groupby('County_of_Origin')['Relative_Change_AverageBusUsers'].median().reset_index()

tmp_mean.rename(columns = {'County_of_Origin':'NAME_2'}, inplace = True)
tmp_median.rename(columns = {'County_of_Origin':'NAME_2'}, inplace = True)

In [None]:
# Opening JSON file
f = open('concelhos-shapefile.json')
counties = json.load(f)

In [None]:
#MAP w/ MEAN RELATIVE_CHANGE_AVERAGE

fig = px.choropleth_mapbox(data_frame=tmp_mean, geojson=counties, color='Relative_Change_AverageBusUsers',
                           locations='NAME_2', featureidkey="properties.NAME_2",
                           color_continuous_scale="Viridis",
                           range_color=(tmp_mean["Relative_Change_AverageBusUsers"].min(), tmp_mean["Relative_Change_AverageBusUsers"].max()),
                           center = {"lat": 38.736946, "lon": -9.142685}, 
                           opacity=0.5,
                           labels={"Relative_Change_AverageBusUsers":"Relative_Change_AverageBusUsers"},
                           mapbox_style="carto-positron", zoom=9)


fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
#MAP w/ MEDIAN RELATIVE_CHANGE_AVERAGE

fig = px.choropleth_mapbox(data_frame=tmp_median, geojson=counties, color='Relative_Change_AverageBusUsers',
                           locations='NAME_2', featureidkey="properties.NAME_2",
                           color_continuous_scale="Viridis",
                           range_color=(tmp_median["Relative_Change_AverageBusUsers"].min(), tmp_median["Relative_Change_AverageBusUsers"].max()),
                           center = {"lat": 38.736946, "lon": -9.142685}, 
                           opacity=0.5,
                           labels={"Relative_Change_AverageBusUsers":"Relative_Change_AverageBusUsers"},
                           mapbox_style="carto-positron", zoom=9)


fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
#####IGUAL TASK 1 PARA ADAPTAR#####

churn_od = pd.read_csv('Churn_OD.txt', delimiter = "|", encoding= 'unicode_escape')

# Load Parish Metadata
parish_metadata = pd.read_excel('freguesias-metadata.xlsx')
parish_metadata = parish_metadata[["distrito", "concelho", "freguesia", "dicofre"]]

# Load Population Density
population_density_censos = pd.read_excel('DistritosConcelhosFreguesias_CAOP2013_Populacao_Censos2011.xls')
population_density_censos.rename(columns={"Freguesia (FR)":"dicofre", "PopRes_2011 (nº)":"População"}, inplace=True)
population_density_censos["dicofre"].str.strip()

# Join parish metadata and this by dicofre
population_density_censos_dicofres = population_density_censos.copy().merge(parish_metadata, on=["dicofre"], how="left")
population_density_censos_dicofres = population_density_censos_dicofres[["Designação DT", "Designação CC", "dicofre", "Designação FR", "População"]]
population_density_censos_dicofres.rename(columns={"Designação DT":"Distrito", "Designação CC":"Concelho", "Designação FR":"Freguesia"}, inplace=True)
population_density_censos_dicofres.head()

# We only want the district, county and parish and the codes, along with the population density
pop_density = population_density_censos_dicofres.copy()

# Rename columns to help in the join
population_density_censos_dicofres_renamed = population_density_censos_dicofres.copy()
population_density_censos_dicofres_renamed.rename(columns = {'dicofre': 'Dicofre_ParishCode_of_Public_Transportation'}, inplace=True)
population_density_censos_dicofres_renamed.head()

# Merge churn_od with pop_density_censos_renamed
churn_od_pop_density = churn_od.merge(
    population_density_censos_dicofres_renamed,
    on=["Dicofre_ParishCode_of_Public_Transportation"],
    how="left"
)


pop_density_concelho = pop_density.groupby(['Distrito', 'Concelho'])['População'].sum().reset_index()
pop_density_concelho.rename(columns = {'Concelho':'NAME_2'}, inplace = True)
pop_median=tmp_median.merge(pop_density_concelho , on='NAME_2')
pop_median["Pop_Relative_Change"] = pop_median["Relative_Change_AverageBusUsers"] * pop_median["População"]

In [None]:
soma=pop_median['Pop_Relative_Change'].sum()
soma, (-soma*(4.6*1000)) #A typical passenger vehicle emits about 4.6 metric tons of carbon dioxide per year.

In [None]:
fig = px.choropleth_mapbox(data_frame=pop_median, geojson=counties, color='Pop_Relative_Change',
                           locations='NAME_2', featureidkey="properties.NAME_2",
                           color_continuous_scale="Viridis",
                           range_color=(pop_median["Pop_Relative_Change"].min(), pop_median["Pop_Relative_Change"].max()),
                           center = {"lat": 38.736946, "lon": -9.142685}, 
                           opacity=0.5,
                           labels={"Pop_Relative_Change":"Pop_Relative_Change"},
                           mapbox_style="carto-positron", zoom=9)


fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
###ESTE NÃO SEI SE FAZ SENTIDO COLOCAR###

pt = pd.read_csv('pt.csv', delimiter = ",", encoding= 'utf-8')
churn_od_mean=churn_od.groupby(['County_of_Origin','County_of_Public_Transportation'])['Demand_weight'].mean().reset_index()
churn_od_median=churn_od.groupby(['County_of_Origin','County_of_Public_Transportation'])['Demand_weight'].median().reset_index()


pt_origin=pt.rename(columns = {'city':'County_of_Origin'})
churn_pt_mean=churn_od_mean.merge(pt_origin, on='County_of_Origin')
churn_pt_mean.rename(columns = {'lat':'start_lat'}, inplace = True)
churn_pt_mean.rename(columns = {'lng':'start_lon'}, inplace = True)

churn_pt_median=churn_od_median.merge(pt_origin, on='County_of_Origin')
churn_pt_median.rename(columns = {'lat':'start_lat'}, inplace = True)
churn_pt_median.rename(columns = {'lng':'start_lon'}, inplace = True)


pt['city'] = pt['city'].str.upper()
pt_dest=pt.rename(columns = {'city':'County_of_Public_Transportation'})
new_churn_pt_mean=churn_pt_mean.merge(pt_dest, on='County_of_Public_Transportation')
new_churn_pt_mean.rename(columns = {'lat':'end_lat'}, inplace = True)
new_churn_pt_mean.rename(columns = {'lng':'end_lon'}, inplace = True)
new_churn_pt_median=churn_pt_median.merge(pt_dest, on='County_of_Public_Transportation')
new_churn_pt_median.rename(columns = {'lat':'end_lat'}, inplace = True)
new_churn_pt_median.rename(columns = {'lng':'end_lon'}, inplace = True)


demand_mean=new_churn_pt_mean[['start_lat', 'start_lon', 'end_lat', 'end_lon', 'Demand_weight']]
demand_median=new_churn_pt_median[['start_lat', 'start_lon', 'end_lat', 'end_lon', 'Demand_weight']]

#Demand_weight MEAN

fig = go.Figure()

flight_paths = []
for i in range(len(demand_mean)):
    fig.add_trace(
        go.Scattergeo(
            lon = [demand_mean['start_lon'][i], demand_mean['end_lon'][i]],
            lat = [demand_mean['start_lat'][i], demand_mean['end_lat'][i]],
            mode = 'lines',
            line = dict(width = 1,color = 'red'),
            opacity = float(demand_mean['Demand_weight'][i]) / float(demand_mean['Demand_weight'].max()),
        )
    )

fig.update_layout(
    showlegend = False,
    geo = dict( 
        center = {"lat": 39.736946, "lon": -9.142685},
        projection_scale=50,
         projection_type = 'azimuthal equal area',
        showland = True,
        landcolor = 'rgb(243, 243, 243)',
        countrycolor = 'rgb(204, 204, 204)',
        showcountries=True,
    ),
)


fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

fig.show()

#Demand_weight MEDIAN

fig = go.Figure()

flight_paths = []
for i in range(len(demand_median)):
    fig.add_trace(
        go.Scattergeo(
            lon = [demand_median['start_lon'][i], demand_median['end_lon'][i]],
            lat = [demand_median['start_lat'][i],demand_median['end_lat'][i]],
            mode = 'lines',
            line = dict(width = 1,color = 'red'),
            opacity = float(demand_median['Demand_weight'][i]) / float(demand_median['Demand_weight'].max()),
        )
    )

fig.update_layout(
    showlegend = False,
    geo = dict( 
        center = {"lat": 39.736946, "lon": -9.142685},
        projection_scale=50,
         projection_type = 'azimuthal equal area',
        showland = True,
        landcolor = 'rgb(243, 243, 243)',
        countrycolor = 'rgb(204, 204, 204)',
        showcountries=True,
    ),
)


fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

fig.show()