In [169]:
import pandas as pd
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings("ignore")
import plotly.graph_objs as go

# Gráfico 1: Média de Pessoas por Dia da Semana

- Set global variables

In [352]:
BAIRROS_FOR_STUDY = ['barra', 'botafogo', 'centro', 'copacabana', 'flamengo',
       'ipanema_leblon', 'jacarepagua', 'Rio_de_Janeiro', 'sem_bairro',
       'tijuca']

In [353]:
correct_form_bairro = {
    'barra': 'Barra da Tijuca',
    'botafogo': 'Botafogo',
    'centro': 'Centro',
    'copacabana': 'Copacabana',
    'flamengo': 'Flamengo',
    'ipanema_leblon': 'Ipanema/Leblon',
    'jacarepagua': 'Jacarepaguá',
    'Rio_de_Janeiro': 'Rio de Janeiro',
    'sem_bairro': 'Outros',
    'tijuca': 'Tijuca'
}

- Importing CSV

In [354]:
history_daily = pd.read_csv('../data/20200413.csv')

- Data Preprocessing

In [355]:
history_daily = history_daily.loc[history_daily['bairro'].isin(BAIRROS_FOR_STUDY)]

In [356]:
# history_daily['dia'] = history_daily.dia.apply(lambda x: str(x)[:-3])

In [357]:
history_daily['dia'] = pd.to_datetime(history_daily['dia'])

In [358]:
history_daily['day_of_week'] = history_daily['dia'].dt.day_name()

In [359]:
translate_dayofweek = {
    'Monday': 'Segunda',
    'Tuesday': 'Terça',
    'Wednesday': 'Quarta',
    'Thursday': 'Quinta',
    'Friday': 'Sexta',
    'Saturday': 'Sábado',
    'Sunday': 'Domingo'
}

Analysis starts from the last 7 days

In [360]:
now = datetime.now()

In [361]:
start_time = now - timedelta(days=6)

In [362]:
start_time = start_time.strftime('%Y-%m-%d')

In [363]:
week_now = history_daily.loc[history_daily['dia'] >= start_time]

In [364]:
translate = lambda x: translate_dayofweek[x]
week_now['day_of_week'] = week_now.day_of_week.apply(translate)

In [365]:
week_now['dia'] = week_now['dia'].apply(lambda x: str(x.strftime('%d/%m')))

In [366]:
# Legend
week_now['proporcao_relacao_media_dia_da_semana_legend'] = week_now['proporcao_media_dia_semana'].apply(lambda x: str(round(x)) + '%')

In [367]:
week_now['day_of_week_initial'] = week_now.day_of_week.apply(lambda x: ' (' + str(x)[0].upper() + ')')

In [368]:
# Creating legend for graph
week_now['day_of_week_legend'] = week_now['dia'] + week_now['day_of_week_initial']

In [369]:
week_now.rename(columns = {'pessoas_contadas': 'Pessoas Contadas', 'media_pessoas_contadas': 'Média do Dia da Semana'}, inplace = True)

- Generating Graph

In [370]:
bairro_graph = 'Rio_de_Janeiro'
week_graph = week_now.loc[week_now['bairro'] == bairro_graph]

In [372]:
figure_1 = go.Figure(
    data = [
        go.Bar(
            name = "Pessos Contadas",
            x = week_graph['day_of_week_legend'],
            y = week_graph['Pessoas Contadas'],
            text  = week_graph['proporcao_relacao_media_dia_da_semana_legend'],
            textposition='outside',
            offsetgroup = 0
        ),
        go.Bar(
            name = "Média do Dia da Semana",
            x = week_graph['day_of_week_legend'],
            y = week_graph['Média do Dia da Semana'],
            offsetgroup = 1
        )
    ],
    layout = go.Layout(
        title = f'{correct_form_bairro[bairro_graph]}: Média de Pessoas por Dia',
        title_x = 0.5,
        yaxis_title = "Pessoas Contadas",
        plot_bgcolor='rgba(0,0,0,0)',
        width = 800
        
    )
)
figure_1.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')
figure_1.show()

# Gráfico 2: Porcentagem em relação a média histórica

In [206]:
start_time = now - timedelta(days=21)

In [207]:
start_time = start_time.strftime('%Y-%m-%d')

In [208]:
week_now = history_daily.loc[history_daily['dia'] >= start_time]

In [209]:
# Redução em relação a média histórica
week_now['proporcao_history'] = week_now['queda_proporcional_dia_semana']

In [210]:
week_now['dia_legend'] = week_now['dia'].apply(lambda x: str(x.strftime('%d/%m')))

In [211]:
# For graph legend
week_now.rename(columns = {'bairro': 'Bairro'}, inplace = True)

- Selecting only some Bairros for graph

In [212]:
# new_list: get second largest day
new_list = set(week_now['dia'])
new_list.remove(max(new_list))
bairros_for_graph = week_now.loc[(week_now['dia'] == max(new_list)) &
            (week_now['Bairro'] != 'Rio de Janeiro') & 
             (week_now['Bairro'] != 'Outros')].sort_values(by = 'queda_proporcional_dia_semana', ascending = True).Bairro.unique()#[:-3]

- Group by Week and Bairro. Graph per Week

In [213]:
week_now['week_number'] = week_now.dia.apply(lambda x: str(x.isocalendar()[1]))

In [214]:
week_graph = week_now.groupby(['Bairro', 'week_number']).agg({'dia':'min', 'queda_proporcional_dia_semana': 'mean'}).reset_index()

In [215]:
week_graph['dia_legend'] = week_graph.dia.apply(lambda x: str(x.strftime('%d/%m')))

In [216]:
week_graph['week_legend'] = 'W' + week_graph['week_number'] + ' - ' + week_graph['dia_legend']

In [217]:
correct_form_bairro = {
    'barra': 'Barra da Tijuca',
    'botafogo': 'Botafogo',
    'centro': 'Centro',
    'copacabana': 'Copacabana',
    'flamengo': 'Flamengo',
    'ipanema_leblon': 'Ipanema/Leblon',
    'jacarepagua': 'Jacarepaguá',
    'Rio_de_Janeiro': 'Rio de Janeiro',
    'sem_bairro': 'Outros',
    'tijuca': 'Tijuca'
}

In [351]:
figure_2 = go.Figure()
for bairro in bairros_for_graph:
    figure_2.add_trace(go.Scatter(
        x = week_graph.loc[week_graph['Bairro'] == bairro]['week_legend'],
        y = week_graph.loc[week_graph['Bairro'] == bairro]['queda_proporcional_dia_semana'],
        mode = 'lines+markers',
        name = correct_form_bairro[bairro]       
    ))
figure_2.update_layout(go.Layout(
    title = 'Redução em relação a Média Histórica',
    title_x = 0.5,
    yaxis_title = "Redução (%)",
    xaxis_title = "IsoWeek - Dia de Início da Semana",
    plot_bgcolor= "rgba(0,0,0,0)",
    width = 800,
))
figure_2.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')
figure_2.show()

# Gráfico 3: Quantidade média de pessoas por hora hoje, ontem e anteontem

In [329]:
history_hourly = pd.read_csv('../data/20200413_hora.csv')

In [330]:
BAIRROS_FOR_STUDY = ['Rio de Janeiro']

In [331]:
correct_form_bairro = {
    'barra': 'Barra da Tijuca',
    'botafogo': 'Botafogo',
    'centro': 'Centro',
    'copacabana': 'Copacabana',
    'flamengo': 'Flamengo',
    'ipanema_leblon': 'Ipanema/Leblon',
    'jacarepagua': 'Jacarepaguá',
    'Rio_de_Janeiro': 'Rio de Janeiro',
    'sem_bairro': 'Outros',
    'tijuca': 'Tijuca'
}

In [332]:
correct_name = lambda x: correct_form_bairro[x]

In [333]:
history_hourly['bairro'] = history_hourly.bairro.apply(correct_name)

- Analyze Bairro in BAIRROS_FOR_STUDY

In [334]:
history_hourly = history_hourly.loc[history_hourly['bairro'].isin(BAIRROS_FOR_STUDY)]

- Data Preprocessning

Removing time zone '-03'

In [335]:
history_hourly['hora'] = history_hourly.hora.apply(lambda x: str(x)[:-3])

In [336]:
now = datetime.now()

In [337]:
start_time = now - timedelta(days=14) # Last 2 day + today

In [338]:
start_time = start_time.strftime('%Y-%m-%d 00:00')

In [339]:
history_hourly['hora'] = pd.to_datetime(history_hourly['hora'])

In [340]:
week_now = history_hourly.loc[history_hourly['hora'] >= start_time]

Accumulated Sum

In [341]:
# Creating day column for groupby(day) with cumsum
week_now['day'] = week_now.hora.apply(lambda x: str(x.strftime('%Y-%m-%d')))

In [342]:
week_now['people_accumulate'] = week_now.groupby(['day', 'bairro']).agg({'pessoas_contadas': 'cumsum'})

In [343]:
week_now['horario_legend'] = week_now.hora.apply(lambda x: str(x.strftime('%H')) + 'h')

In [344]:
week_now['day_legend'] = week_now.hora.apply(lambda x: str(x.strftime('%d/%m')))

- Graph

In [345]:
week_now.rename(columns = {'day_legend': 'Dia'}, inplace = True)

In [346]:
last_week_day = now - timedelta(days=7) # Last 2 day + today
last_last_week_day = now - timedelta(days=14) # Last 2 day + today

In [347]:
last_week_day = last_week_day.strftime('%Y-%m-%d')
last_last_week_day = last_last_week_day.strftime('%Y-%m-%d')
today_now = now.strftime('%Y-%m-%d')

In [348]:
bairro_graph = 'Rio de Janeiro'

In [349]:
week_now_graph = week_now.loc[(week_now['bairro'] == bairro_graph) & (week_now['day'].isin([last_week_day, last_last_week_day, today_now]))]

In [350]:
figure_3 = go.Figure()

for dia in week_now_graph.Dia.unique():
    figure_3.add_trace(go.Scatter(
        x = week_now_graph.loc[week_now_graph['Dia'] == dia]['horario_legend'],
        y = week_now_graph.loc[week_now_graph['Dia'] == dia]['people_accumulate'],
        mode = 'lines+markers',
        name = dia
    ))
figure_3.update_layout(go.Layout(
    title = "Média Acumulada de Pessoas Contadas por Hora",
    title_x = 0.5,
    yaxis_title = "Média Acumulada de Pessoas",
    plot_bgcolor= "rgba(0,0,0,0)",
    width = 800,
))
figure_3.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')
figure_3.show()

## Quantidade de Pessoas por Hora

In [323]:
week_now_graph = week_now.loc[week_now['Dia'] == week_now.sort_values(by = 'hora', ascending = False).Dia.unique()[1]];

In [324]:
DIA = max(week_now_graph['Dia']);

In [328]:
figure_4 = go.Figure()

figure_4.add_trace(go.Bar(
    x = week_now_graph['horario_legend'],
    y = week_now_graph['pessoas_contadas']
))

figure_4.update_layout(go.Layout(
    title = f'{bairro_graph} {DIA}: Média de Pessoas por Hora',
    title_x = 0.5,
    yaxis_title = "Pessoas Contadas",
    plot_bgcolor= "rgba(0,0,0,0)",
    width = 800,
))
figure_4.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black')