In [330]:
import pandas as pd
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings("ignore")
import plotly.graph_objs as go

# Gráfico 1: Média de Pessoas por Dia da Semana

- Set global variables

In [331]:
BAIRROS_FOR_STUDY = ['barra', 'botafogo', 'centro', 'copacabana', 'flamengo',
       'ipanema_leblon', 'jacarepagua', 'Rio_de_Janeiro', 'sem_bairro',
       'tijuca']

In [332]:
correct_form_bairro = {
    'barra': 'Barra da Tijuca',
    'botafogo': 'Botafogo',
    'centro': 'Centro',
    'copacabana': 'Copacabana',
    'flamengo': 'Flamengo',
    'ipanema_leblon': 'Ipanema/Leblon',
    'jacarepagua': 'Jacarepaguá',
    'Rio_de_Janeiro': 'Rio de Janeiro',
    'sem_bairro': 'Outros',
    'tijuca': 'Tijuca'
}

- Importing CSV

In [333]:
history_daily = pd.read_csv('../data/20200415.csv')

- Data Preprocessing

In [334]:
history_daily = history_daily.loc[history_daily['bairro'].isin(BAIRROS_FOR_STUDY)]

In [335]:
# history_daily['dia'] = history_daily.dia.apply(lambda x: str(x)[:-3])

In [336]:
history_daily['dia'] = pd.to_datetime(history_daily['dia'])

In [337]:
history_daily['day_of_week'] = history_daily['dia'].dt.day_name()

In [338]:
translate_dayofweek = {
    'Monday': 'Segunda',
    'Tuesday': 'Terça',
    'Wednesday': 'Quarta',
    'Thursday': 'Quinta',
    'Friday': 'Sexta',
    'Saturday': 'Sábado',
    'Sunday': 'Domingo'
}

Analysis starts from the last 7 days

In [339]:
last_record = max(history_daily['dia'])

In [340]:
start_time = last_record - timedelta(days=6)

In [341]:
start_time = start_time.strftime('%Y-%m-%d')

In [342]:
week_now = history_daily.loc[history_daily['dia'] >= start_time]

In [343]:
translate = lambda x: translate_dayofweek[x]
week_now['day_of_week'] = week_now.day_of_week.apply(translate)

In [344]:
week_now['Dia'] = week_now['dia'].apply(lambda x: str(x.strftime('%d/%m')))

In [345]:
# Legend
week_now['proporcao_relacao_media_dia_da_semana_legend'] = week_now['proporcao_media_dia_semana'].apply(lambda x: str(round(x)) + '%')

In [346]:
week_now['day_of_week_initial'] = week_now.day_of_week.apply(lambda x: ' (' + str(x)[0].upper() + ')')

In [347]:
# Creating legend for graph
week_now['day_of_week_legend'] = week_now['Dia'] + week_now['day_of_week_initial']

In [348]:
week_now.rename(columns = {'pessoas_contadas': 'Pessoas Contadas', 'media_pessoas_contadas': 'Média do Dia da Semana'}, inplace = True)

- Generating Graph

In [349]:
bairro_graph = 'Rio_de_Janeiro'
week_graph = week_now.loc[week_now['bairro'] == bairro_graph]

In [350]:
figure_1 = go.Figure(
    data = [
        go.Bar(
            name = "Pessos Contadas",
            x = week_graph['day_of_week_legend'],
            y = week_graph['Pessoas Contadas'],
            text  = week_graph['proporcao_relacao_media_dia_da_semana_legend'],
            textposition='outside',
            offsetgroup = 0
        ),
        go.Bar(
            name = "Média do Dia da Semana",
            x = week_graph['day_of_week_legend'],
            y = week_graph['Média do Dia da Semana'],
            offsetgroup = 1
        )
    ],
    layout = go.Layout(
        title = f'{correct_form_bairro[bairro_graph]}: Média de Pessoas por Dia',
        title_x = 0.5,
        yaxis_title = "Pessoas Contadas",
        plot_bgcolor='rgba(0,0,0,0)',
        width = 800
        
    )
)
figure_1.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')
figure_1.show()

## Análise Geral: Bairros com maiores aglomerações

In [359]:
# Order by number of people in the last day

In [360]:
columns = ['bairro', 'Pessoas Contadas']
last_day = week_now.dia.unique()[-2] # -2 -> yesterday
high_aglom = week_now.loc[(week_now['dia'] == last_day)
              & (~week_now.bairro.isin(['Rio_de_Janeiro', 'sem_bairro'])), columns].sort_values(by = 'Pessoas Contadas', ascending = False)[:3]

In [361]:
high_aglom['bairro'] = high_aglom.bairro.apply(lambda x: correct_form_bairro[x])

In [362]:
high_aglom = high_aglom.to_dict('list')

In [363]:
high_aglom

{'bairro': ['Copacabana', 'Ipanema/Leblon', 'Centro'],
 'Pessoas Contadas': [5182, 1626, 1277]}

## Análise Geral: Bairros com menores variações em relação a média histórica

In [355]:
# Order by querda_proporcional_dia_semana in the last day, in ascending order

In [356]:
columns = ['bairro', 'queda_proporcional_dia_semana']
last_day = week_now.dia.unique()[-2]
low_variations = week_now.loc[(week_now['dia'] == last_day)
              & (~week_now.bairro.isin(['Rio_de_Janeiro', 'sem_bairro'])), columns].sort_values(by = 'queda_proporcional_dia_semana')[:3]

In [357]:
low_variations['bairro'] = low_variations.bairro.apply(lambda x: correct_form_bairro[x])

In [358]:
low_variations = low_variations.to_dict('list')

## Análise Geral: Bairros com maiores variações em relação a média histórica

In [321]:
# Order by querda_proporcional_dia_semana in the last day, in descrescing order

In [325]:
columns = ['bairro', 'queda_proporcional_dia_semana']
last_day = week_now.dia.unique()[-2]
high_variations = week_now.loc[(week_now['dia'] == last_day)
              & (~week_now.bairro.isin(['Rio_de_Janeiro', 'sem_bairro'])), columns].sort_values(by = 'queda_proporcional_dia_semana', ascending = False)[:3]

In [326]:
high_variations['bairro'] = high_variations.bairro.apply(lambda x: correct_form_bairro[x])

In [327]:
high_variations = high_variations.to_dict('list')

In [328]:
k = {'tes': [312321], 'high_aglom': high_aglom, 'low_variations': low_variations, 'high_variations': high_variations }

In [309]:
k['date'] = 12

In [329]:
last_day.strftime('%Y-%m-%d')

'2020-04-14'

# Gráfico 2: Porcentagem em relação a média histórica

In [94]:
last_record = max(week_graph['dia'])

In [95]:
start_time = last_record - timedelta(days=21)

In [96]:
start_time = start_time.strftime('%Y-%m-%d')

In [97]:
week_now = history_daily.loc[history_daily['dia'] >= start_time]

In [98]:
# Redução em relação a média histórica
week_now['proporcao_history'] = week_now['queda_proporcional_dia_semana']

In [99]:
week_now['dia_legend'] = week_now['dia'].apply(lambda x: str(x.strftime('%d/%m')))

In [100]:
# For graph legend
week_now.rename(columns = {'bairro': 'Bairro'}, inplace = True)

- Selecting only some Bairros for graph

In [101]:
# new_list: get second largest day
new_list = set(week_now['dia'])
new_list.remove(max(new_list))
bairros_for_graph = week_now.loc[(week_now['dia'] == max(new_list)) &
            (week_now['Bairro'] != 'Rio de Janeiro') & 
             (week_now['Bairro'] != 'Outros')].sort_values(by = 'queda_proporcional_dia_semana', ascending = True).Bairro.unique()#[:-3]

- Group by Week and Bairro. Graph per Week

In [103]:
week_now['week_number'] = week_now.dia.apply(lambda x: str(x.isocalendar()[1]))

In [104]:
week_graph = week_now.groupby(['Bairro', 'week_number']).agg({'dia':'min', 'queda_proporcional_dia_semana': 'mean'}).reset_index()

In [105]:
week_graph['dia_legend'] = week_graph.dia.apply(lambda x: str(x.strftime('%d/%m')))

In [106]:
week_graph['week_legend'] = 'W' + week_graph['week_number'] + ' - ' + week_graph['dia_legend']

In [107]:
correct_form_bairro = {
    'barra': 'Barra da Tijuca',
    'botafogo': 'Botafogo',
    'centro': 'Centro',
    'copacabana': 'Copacabana',
    'flamengo': 'Flamengo',
    'ipanema_leblon': 'Ipanema/Leblon',
    'jacarepagua': 'Jacarepaguá',
    'Rio_de_Janeiro': 'Rio de Janeiro',
    'sem_bairro': 'Outros',
    'tijuca': 'Tijuca'
}

In [108]:
figure_2 = go.Figure()
for bairro in bairros_for_graph:
    figure_2.add_trace(go.Scatter(
        x = week_graph.loc[week_graph['Bairro'] == bairro]['week_legend'],
        y = week_graph.loc[week_graph['Bairro'] == bairro]['queda_proporcional_dia_semana'],
        mode = 'lines+markers',
        name = correct_form_bairro[bairro]       
    ))
figure_2.update_layout(go.Layout(
    title = 'Redução em relação a Média Histórica',
    title_x = 0.5,
    yaxis_title = "Redução (%)",
    xaxis_title = "IsoWeek - Dia de Início da Semana",
    plot_bgcolor= "rgba(0,0,0,0)",
    width = 800,
))
figure_2.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')
figure_2.show()

# Gráfico 3: Quantidade média de pessoas por hora hoje, ontem e anteontem

In [191]:
history_hourly = pd.read_csv('../data/20200415_hora.csv')

In [192]:
BAIRROS_FOR_STUDY = ['Rio de Janeiro']

In [193]:
correct_form_bairro = {
    'barra': 'Barra da Tijuca',
    'botafogo': 'Botafogo',
    'centro': 'Centro',
    'copacabana': 'Copacabana',
    'flamengo': 'Flamengo',
    'ipanema_leblon': 'Ipanema/Leblon',
    'jacarepagua': 'Jacarepaguá',
    'Rio_de_Janeiro': 'Rio de Janeiro',
    'sem_bairro': 'Outros',
    'tijuca': 'Tijuca'
}

In [194]:
correct_name = lambda x: correct_form_bairro[x]

In [195]:
history_hourly['bairro'] = history_hourly.bairro.apply(correct_name)

- Analyze Bairro in BAIRROS_FOR_STUDY

In [196]:
history_hourly = history_hourly.loc[history_hourly['bairro'].isin(BAIRROS_FOR_STUDY)]

- Data Preprocessning

Removing time zone '-03'

In [197]:
history_hourly['hora'] = history_hourly.hora.apply(lambda x: str(x)[:-3])

In [198]:
last_record = max(pd.to_datetime(history_hourly['hora']))

In [199]:
start_time = last_record - timedelta(days=14) # Last 2 day + today

In [200]:
start_time = start_time.strftime('%Y-%m-%d 00:00')

In [201]:
history_hourly['hora'] = pd.to_datetime(history_hourly['hora'])

In [202]:
week_now = history_hourly.loc[history_hourly['hora'] >= start_time]

Accumulated Sum

In [203]:
# Creating day column for groupby(day) with cumsum
week_now['day'] = week_now.hora.apply(lambda x: str(x.strftime('%Y-%m-%d')))

In [204]:
week_now['people_accumulate'] = week_now.groupby(['day', 'bairro']).agg({'pessoas_contadas': 'cumsum'})

In [205]:
week_now['horario_legend'] = week_now.hora.apply(lambda x: str(x.strftime('%H')) + 'h')

In [206]:
week_now['day_legend'] = week_now.hora.apply(lambda x: str(x.strftime('%d/%m')))

- Graph

In [207]:
week_now.rename(columns = {'day_legend': 'Dia'}, inplace = True)

In [208]:
last_week_day = last_record - timedelta(days=7) # Last 2 day + today
last_last_week_day = last_record - timedelta(days=14) # Last 2 day + today

In [209]:
last_week_day = last_week_day.strftime('%Y-%m-%d')
last_last_week_day = last_last_week_day.strftime('%Y-%m-%d')
today_now = last_record.strftime('%Y-%m-%d')
last_hour = last_record.strftime('%Y-%m-%d %H:00:00')

In [186]:
bairro_graph = 'Rio de Janeiro'

In [216]:
week_now_graph = week_now.loc[(week_now['bairro'] == bairro_graph)
                              & (week_now['day'].isin([last_week_day, last_last_week_day, today_now]))
                              & (week_now['hora'] < last_hour)]

In [217]:
week_now_graph

Unnamed: 0,hora,bairro,cameras_online,pessoas_contadas,media_pessoas_contadas,proporcao_media_dia_semana,queda_proporcional_dia_semana,day,people_accumulate,horario_legend,Dia
18689,2020-04-01 00:00:00,Rio de Janeiro,363,149,1914,7.78,92.22,2020-04-01,149,00h,01/04
18699,2020-04-01 01:00:00,Rio de Janeiro,362,101,1719,5.88,94.12,2020-04-01,250,01h,01/04
18709,2020-04-01 02:00:00,Rio de Janeiro,364,90,1592,5.65,94.35,2020-04-01,340,02h,01/04
18719,2020-04-01 03:00:00,Rio de Janeiro,363,90,1465,6.14,93.86,2020-04-01,430,03h,01/04
18729,2020-04-01 04:00:00,Rio de Janeiro,363,110,1471,7.48,92.52,2020-04-01,540,04h,01/04
...,...,...,...,...,...,...,...,...,...,...,...
22179,2020-04-15 13:00:00,Rio de Janeiro,364,1173,4298,27.29,72.71,2020-04-15,8587,13h,15/04
22189,2020-04-15 14:00:00,Rio de Janeiro,363,1078,3583,30.09,69.91,2020-04-15,9665,14h,15/04
22199,2020-04-15 15:00:00,Rio de Janeiro,363,1142,3604,31.69,68.31,2020-04-15,10807,15h,15/04
22209,2020-04-15 16:00:00,Rio de Janeiro,364,1153,4028,28.62,71.38,2020-04-15,11960,16h,15/04


In [218]:
figure_3 = go.Figure()

for dia in week_now_graph.Dia.unique():
    figure_3.add_trace(go.Scatter(
        x = week_now_graph.loc[week_now_graph['Dia'] == dia]['horario_legend'],
        y = week_now_graph.loc[week_now_graph['Dia'] == dia]['people_accumulate'],
        mode = 'lines+markers',
        name = dia
    ))
figure_3.update_layout(go.Layout(
    title = "Média Acumulada de Pessoas Contadas por Hora",
    title_x = 0.5,
    yaxis_title = "Média Acumulada de Pessoas",
    plot_bgcolor= "rgba(0,0,0,0)",
    width = 800,
))
figure_3.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')
figure_3.show()

## Quantidade de Pessoas por Hora

In [323]:
week_now_graph = week_now.loc[week_now['Dia'] == week_now.sort_values(by = 'hora', ascending = False).Dia.unique()[1]];

In [324]:
DIA = max(week_now_graph['Dia']);

In [328]:
figure_4 = go.Figure()

figure_4.add_trace(go.Bar(
    x = week_now_graph['horario_legend'],
    y = week_now_graph['pessoas_contadas']
))

figure_4.update_layout(go.Layout(
    title = f'{bairro_graph} {DIA}: Média de Pessoas por Hora',
    title_x = 0.5,
    yaxis_title = "Pessoas Contadas",
    plot_bgcolor= "rgba(0,0,0,0)",
    width = 800,
))
figure_4.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')