<h1>Características dos Dados</h1>

<h2>Importação das bibliotecas</h2>

In [1]:
%matplotlib widget

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import mplcursors
from IPython.display import display, clear_output
from ipywidgets import widgets, VBox, HBox, Output
from widgets import create_slider, create_buttons, create_radio_buttons, create_dropdowns, create_labels
from scipy.stats import pearsonr

import seaborn as sns

<h2>Criação dos Widgets</h2>

In [2]:
layout = widgets.Layout(width='100px')
start_month = create_dropdowns(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '', layout=layout)
end_month = create_dropdowns(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '', layout=layout)
start_year = create_dropdowns(['2019', '2020', '2021', '2022', '2023', '2024'], '', layout=layout)
end_year = create_dropdowns(['2019', '2020', '2021', '2022', '2023', '2024'], '', layout=layout)
label = create_labels('Dados disponíveis: 01/2019 à 08/2024', layout=widgets.Layout(width='250px'))
label1 = create_labels('Início do período', layout=widgets.Layout(width='100px'))
label2 = create_labels('Fim do período', layout=widgets.Layout(width='100px'))
button = create_buttons('Selecionar período')

notification_output = Output()
chart_output = Output()
pie_chart_output = Output()

In [3]:
def millions(x, _):
    """Format numbers in millions."""
    return f'{x * 1e-6:.1f}' 

def plot_data(start_date_dt, end_date_dt):
    with chart_output:
        chart_output.clear_output(wait=True)
        df = pd.read_csv('monthly_sums.csv') 
        df['date'] = pd.to_datetime(df['Mês'], format='%Y %m') 
        filtered_df = df[(df['date'] >= start_date_dt) & (df['date'] <= end_date_dt)]
        
        plt.ion()
        fig, ax = plt.subplots(figsize=(20, 12))
        #line, = ax.plot(filtered_df['date'], filtered_df['total_viagens'], marker='o', markersize=8, label='Total de Viagens', color='black') 
        ax.plot(filtered_df['date'], filtered_df['total_viagens'], label='Total de Viagens', color='blue')
        ax.plot(filtered_df['date'], filtered_df['total_pessoas'], label='Total de Pessoas Únicas', color='#99CCFF')
        
        scatter = ax.scatter(filtered_df['date'], filtered_df['total_viagens'], color='blue', marker='o', s=50)
        scatter2 = ax.scatter(filtered_df['date'], filtered_df['total_pessoas'], color='#99CCFF', marker='o', s=50)
        
        ax.set_title('Total de Viagens/Pessoas')
        ax.set_xlabel('Data')
        ax.set_ylabel('Total')
        
        unique_dates = filtered_df['date'].dt.to_period('M').unique()
        ax.set_xticks(unique_dates.to_timestamp())
        ax.set_xticklabels([date.strftime('%b %Y') for date in unique_dates.to_timestamp()], rotation=45)

        ax.grid()
        ax.legend()
        plt.tight_layout()
        
        # Add mplcursors for the scatter plot only
        cursor = mplcursors.cursor([scatter, scatter2], highlight=True)
        cursor.connect("add", lambda sel: sel.annotation.set_text(
            f"Data: {filtered_df['date'].iloc[sel.index].strftime('%b %Y')}\n"
            f"Total Viagens: {filtered_df['total_viagens'].iloc[sel.index]}\n"
            f"Total Pessoas Únicas: {filtered_df['total_pessoas'].iloc[sel.index]}"
        ))

        plt.show()

In [4]:
def plot_type_data(start_date_dt, end_date_dt):
    with chart_output:
        df = pd.read_csv('monthly_sums.csv') 
        df['date'] = pd.to_datetime(df['Mês'], format='%Y %m') 
        filtered_df = df[(df['date'] >= start_date_dt) & (df['date'] <= end_date_dt)]
        
        plt.ion()
        #fig, ax = plt.subplots(figsize=(10, 7))
        plt.figure(figsize=(20, 12))
        ax = plt.subplot(1, 1, 1)
        ax.plot(filtered_df['date'], filtered_df['total_viagens_lazer'], label='Lazer', color='purple') 
        ax.plot(filtered_df['date'], filtered_df['total_viagens_commute'], label='Utilitárias', color='pink') 
        
        scatter_lazer = ax.scatter(filtered_df['date'], filtered_df['total_viagens_lazer'], color='purple', s=50, marker='o')
        scatter_commute = ax.scatter(filtered_df['date'], filtered_df['total_viagens_commute'], color='pink', s=50, marker='o')
        
        ax.set_title('Total de Viagens por tipo de viagens')
        ax.set_xlabel('Data')
        ax.set_ylabel('Total de Viagens (em milhões)')

        ax.yaxis.set_major_formatter(FuncFormatter(millions))

        unique_dates = filtered_df['date'].dt.to_period('M').unique()
        ax.set_xticks(unique_dates.to_timestamp())
        ax.set_xticklabels([date.strftime('%b %Y') for date in unique_dates.to_timestamp()], rotation=45)

        ax.grid()
        ax.legend()
        cursor = mplcursors.cursor([scatter_lazer, scatter_commute], highlight=True)
        cursor.connect("add", lambda sel: sel.annotation.set_text(
            f"Data: {filtered_df['date'].iloc[sel.index].strftime('%b %Y')}\n" +
            (f"Total de Viagens de Lazer: {filtered_df['total_viagens_lazer'].iloc[sel.index]}" if sel.artist == scatter_lazer else
             f"Total de Viagens Utilitárias: {filtered_df['total_viagens_commute'].iloc[sel.index]}")
        ))

        plt.tight_layout()
        plt.show()

        data = filtered_df[['total_viagens', 'total_viagens_lazer', 'total_viagens_commute']]
        correlation_matrix = data.corr(method='pearson')
        
        labels = ['Total', 'Lazer', 'Utilitárias']

        plt.figure(figsize=(10, 8))
        plt.xticks(rotation=0, ha='center', fontsize=11) 
        plt.yticks(rotation=0, ha='right', fontsize=11)
        sns.heatmap(correlation_matrix, 
                    annot=True, 
                    cmap="Blues", 
                    center=0, 
                    fmt=".2f", 
                    cbar_kws={'label': 'Correlação de Pearson'}, 
                    xticklabels=labels, 
                    yticklabels=labels)
        plt.show()

        plt.figure(figsize=(8, 8))
        types = [
            filtered_df['total_viagens_lazer'].sum(),
            filtered_df['total_viagens_commute'].sum(),
        ]
        type_labels = ['Lazer', 'Utilitárias']

        wedges, texts, autotexts = plt.pie(
            types, 
            labels=None,  # No labels on pie chart
            autopct='%1.2f%%',  # Show percentages
            startangle=90, 
            colors=['purple', 'pink'],
            pctdistance = 1.05
        )  
        
        plt.title('Distribuição de Viagens por Tipo (Total no Período)')
        
        for autotext in autotexts:
            autotext.set_fontsize(12)
            autotext.set_color('black')
            
        plt.legend(wedges, type_labels, title='Tipo', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

        plt.axis('equal')  # Equal aspect ratio ensures the pie chart is circular
        plt.tight_layout()
        plt.show() 

In [5]:
def plot_gender_data(start_date_dt, end_date_dt):
    with chart_output:
        df = pd.read_csv('monthly_sums.csv')
        df['date'] = pd.to_datetime(df['Mês'], format='%Y %m')
        filtered_df = df[(df['date'] >= start_date_dt) & (df['date'] <= end_date_dt)]

    
        # Create the line chart
        plt.figure(figsize=(20, 12))  # Start a new figure for the line chart
        ax = plt.subplot(1, 1, 1)
        ax.plot(filtered_df['date'], filtered_df['total_pessoas_masculino'], label='Masculino', color='blue')
        ax.plot(filtered_df['date'], filtered_df['total_pessoas_feminino'], label='Feminino', color='red')
        ax.plot(filtered_df['date'], filtered_df['total_pessoas_nao_especificado'], label='Feminino', color='black')
    
        scatter_male = ax.scatter(filtered_df['date'], filtered_df['total_pessoas_masculino'], color='blue', s=50, marker='o')
        scatter_female = ax.scatter(filtered_df['date'], filtered_df['total_pessoas_feminino'], color='red', s=50, marker='o')
        scatter_unspec = ax.scatter(filtered_df['date'], filtered_df['total_pessoas_nao_especificado'], color='black', s=50, marker='o')
    
        ax.set_title('Total de Pessoas Únicas por Gênero')
        ax.set_xlabel('Data')
        ax.set_ylabel('Total de Pessoas Únicas (em milhões)')

        ax.yaxis.set_major_formatter(FuncFormatter(millions))
    
        unique_dates = filtered_df['date'].dt.to_period('M').unique()
        ax.set_xticks(unique_dates.to_timestamp())
        ax.set_xticklabels([date.strftime('%b %Y') for date in unique_dates.to_timestamp()], rotation=45)
    
        ax.grid()
        ax.legend()
    
        cursor = mplcursors.cursor([scatter_male, scatter_female], highlight=True)
        cursor.connect("add", lambda sel: sel.annotation.set_text(
            f"Data: {filtered_df['date'].iloc[sel.index].strftime('%b %Y')}\n" +
            (f"Total de Pessoas do Gênero Masculino: {filtered_df['total_pessoas_masculino'].iloc[sel.index]}" if sel.artist == scatter_male else
             f"Total de Pessoas do Gênero Feminino: {filtered_df['total_pessoas_feminino'].iloc[sel.index]}") if sel.artist == scatter_female else
             f"Total de Pessoas de Gênero Não Especificado: {filtered_df['total_pessoas_nao_especificado'].iloc[sel.index]}"
        ))
    
        plt.tight_layout()
        plt.show()

        # corr_male_total, _ = pearsonr(filtered_df['total_pessoas_masculino'], filtered_df['total_pessoas'])
        # corr_female_total, _ = pearsonr(filtered_df['total_pessoas_feminino'], filtered_df['total_pessoas'])

        # corr_male_female, _ = pearsonr(filtered_df['total_pessoas_masculino'], filtered_df['total_pessoas_feminino'])
        # corr_male_unspecified, _ = pearsonr(filtered_df['total_pessoas_masculino'], filtered_df['total_pessoas_nao_especificado'])
        # corr_female_unspecified, _ = pearsonr(filtered_df['total_pessoas_feminino'], filtered_df['total_pessoas_nao_especificado'])

        # # Print the correlation values for reference
        # print(f"Pearson correlation between Masculino and Feminino: {corr_male_female:.2f}")
        # print(f"Pearson correlation between Masculino and Não Especificado: {corr_male_unspecified:.2f}")
        # print(f"Pearson correlation between Feminino and Não Especificado: {corr_female_unspecified:.2f}")

        data = filtered_df[['total_pessoas', 'total_pessoas_masculino', 'total_pessoas_feminino', 'total_pessoas_nao_especificado']]
        correlation_matrix = data.corr(method='pearson')
        
        labels = ['Total', 'Masculino', 'Feminino', 'Não Especificado']

        plt.figure(figsize=(10, 8))
        plt.xticks(rotation=0, ha='center', fontsize=11) 
        plt.yticks(rotation=0, ha='right', fontsize=11)
        sns.heatmap(correlation_matrix, 
                    annot=True, 
                    cmap="Blues", 
                    center=0, 
                    fmt=".2f", 
                    cbar_kws={'label': 'Pearson Correlation'}, 
                    xticklabels=labels, 
                    yticklabels=labels)
        plt.show()

        gender_labels = ['Masculino', 'Feminino', 'Não Especificado']
            
        plt.figure(figsize=(8, 8))  # Start a new figure for the pie chart
        genders = [
            filtered_df['total_pessoas_masculino'].sum(),
            filtered_df['total_pessoas_feminino'].sum(),
            filtered_df['total_pessoas_nao_especificado'].sum(),
        ]

        # Create pie chart with percentages displayed but labels hidden
        wedges, texts, autotexts = plt.pie(
            genders, 
            labels=None,  # No labels on pie chart
            autopct='%1.2f%%',  # Show percentages
            startangle=90, 
            colors=['blue', 'red', 'black'],
            pctdistance = 1.05
        )  
        
        plt.title('Distribuição de Pessoas por Gênero (Total no Período)')
        
        for autotext in autotexts:
            autotext.set_fontsize(12)
            autotext.set_color('black')
            
        plt.legend(wedges, gender_labels, title='Gênero', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

        plt.axis('equal')  # Equal aspect ratio ensures the pie chart is circular
        plt.tight_layout()
        plt.show() 

In [6]:
def plot_age_data(start_date_dt, end_date_dt):
    with chart_output:
        df = pd.read_csv('monthly_sums.csv') 
        df['date'] = pd.to_datetime(df['Mês'], format='%Y %m') 
        filtered_df = df[(df['date'] >= start_date_dt) & (df['date'] <= end_date_dt)]
        
        plt.ion()
        plt.figure(figsize=(20, 12)) 
        ax = plt.subplot(1, 1, 1)
        ax.plot(filtered_df['date'], filtered_df['total_pessoas_18_34'], label='18-34', color='red') 
        ax.plot(filtered_df['date'], filtered_df['total_pessoas_35_54'], label='35-54', color='orange') 
        ax.plot(filtered_df['date'], filtered_df['total_pessoas_55_64'], label='55-64', color='yellow') 
        ax.plot(filtered_df['date'], filtered_df['total_pessoas_o65'], label='65+', color='green') 
        
        scatter_18_34 = ax.scatter(filtered_df['date'], filtered_df['total_pessoas_18_34'], color='red', s=50, marker='o')
        scatter_35_54 = ax.scatter(filtered_df['date'], filtered_df['total_pessoas_35_54'], color='orange', s=50, marker='o')
        scatter_55_64 = ax.scatter(filtered_df['date'], filtered_df['total_pessoas_55_64'], color='yellow', s=50, marker='o')
        scatter_o65 = ax.scatter(filtered_df['date'], filtered_df['total_pessoas_o65'], color='green', s=50, marker='o')
        
        ax.set_title('Total de Pessoas Únicas por Idade')
        ax.set_xlabel('Data')
        ax.set_ylabel('Total de Pessoas Únicas (em milhões)')

        unique_dates = filtered_df['date'].dt.to_period('M').unique()
        ax.set_xticks(unique_dates.to_timestamp())
        ax.set_xticklabels([date.strftime('%b %Y') for date in unique_dates.to_timestamp()], rotation=45)

        ax.grid()
        ax.legend()
        cursor = mplcursors.cursor([scatter_18_34, scatter_35_54, scatter_55_64, scatter_o65], highlight=True)
        cursor.connect("add", lambda sel: sel.annotation.set_text(
            f"Data: {filtered_df['date'].iloc[sel.index].strftime('%b %Y')}\n" +
            (f"Total de Pessoas entre 18-34 anos: {filtered_df['total_pessoas_18_34'].iloc[sel.index]}" if sel.artist == scatter_18_34 else
             f"Total de Pessoas entre 35-54 anos: {filtered_df['total_pessoas_35_54'].iloc[sel.index]}" if sel.artist == scatter_35_54 else
             f"Total de Pessoas entre 55-64 anos: {filtered_df['total_pessoas_55_64'].iloc[sel.index]}" if sel.artist == scatter_55_64 else
             f"Total de Pessoas com mais de 65 anos: {filtered_df['total_pessoas_o65'].iloc[sel.index]}")
        ))

        plt.tight_layout()
        plt.show()

        data = filtered_df[['total_pessoas', 'total_pessoas_18_34', 'total_pessoas_35_54', 'total_pessoas_55_64', 'total_pessoas_o65']]
        correlation_matrix = data.corr(method='pearson')
        
        labels = ['Total', '18-34', '35-54', '55-64', '65+']

        plt.figure(figsize=(10, 8))
        plt.xticks(rotation=0, ha='center', fontsize=12) 
        plt.yticks(rotation=0, ha='right', fontsize=12)
        sns.heatmap(correlation_matrix, 
                    annot=True, 
                    cmap="Blues", 
                    center=0, 
                    fmt=".2f", 
                    cbar_kws={'label': 'Pearson Correlation'}, 
                    xticklabels=labels, 
                    yticklabels=labels)
        plt.show()

        plt.figure(figsize=(8, 8))
        ages = [
            filtered_df['total_pessoas_18_34'].sum(),
            filtered_df['total_pessoas_35_54'].sum(),
            filtered_df['total_pessoas_55_64'].sum(),
            filtered_df['total_pessoas_o65'].sum(),
        ]
        age_labels = ['18-34', '35-54', '55-64', '65+']

        wedges, texts, autotexts = plt.pie(
            ages, 
            labels=None, 
            autopct='%1.2f%%', 
            startangle=90, 
            colors=['red', 'orange', 'yellow', 'green'],
            pctdistance=1.05
        )  
        
        plt.title('Distribuição de Pessoas por Idade (Total no Período)')
        
        for autotext in autotexts:
            autotext.set_fontsize(12)
            autotext.set_color('black')
            
        plt.legend(wedges, age_labels, title='Idade', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

        plt.axis('equal')
        plt.tight_layout()
        plt.show()

In [7]:
def plot_activities_data(start_date_dt, end_date_dt):
    with chart_output:
        dfo = pd.read_csv('monthly_sums_hex_o.csv') 
        dfo['date'] = pd.to_datetime(dfo['Mês'], format='%Y %m') 
        filtered_df = dfo[(dfo['date'] >= start_date_dt) & (dfo['date'] <= end_date_dt)]
        
        plt.ion()
        plt.figure(figsize=(20, 12))
        ax = plt.subplot(1, 1, 1)
        ax.plot(filtered_df['date'], filtered_df['leisure_count'], label='Lazer', color='purple') 
        ax.plot(filtered_df['date'], filtered_df['commute_count'], label='Utilitárias', color='pink') 
        
        scatter_lazer = ax.scatter(filtered_df['date'], filtered_df['leisure_count'], color='purple', s=50, marker='o')
        scatter_commute = ax.scatter(filtered_df['date'], filtered_df['commute_count'], color='pink', s=50, marker='o')
        
        ax.set_title('Total de atividade por tipo')
        ax.set_xlabel('Data')
        ax.set_ylabel('Total de Atividades')

        #ax.yaxis.set_major_formatter(FuncFormatter(millions))

        unique_dates = filtered_df['date'].dt.to_period('M').unique()
        ax.set_xticks(unique_dates.to_timestamp())
        ax.set_xticklabels([date.strftime('%b %Y') for date in unique_dates.to_timestamp()], rotation=45)

        ax.grid()
        ax.legend(fontsize=20)
        cursor = mplcursors.cursor([scatter_lazer, scatter_commute], highlight=True)
        cursor.connect("add", lambda sel: sel.annotation.set_text(
            f"Data: {filtered_df['date'].iloc[sel.index].strftime('%b %Y')}\n" +
            (f"Total de Viagens de Lazer: {filtered_df['leisure_count'].iloc[sel.index]}" if sel.artist == scatter_lazer else
             f"Total de Viagens Utilitárias: {filtered_df['commute_count'].iloc[sel.index]}")
        ))

        plt.tight_layout()
        plt.show()

        data = filtered_df[['count', 'leisure_count', 'commute_count']]
        correlation_matrix = data.corr(method='pearson')
        
        labels = ['Total', 'Lazer', 'Utilitárias']

        plt.figure(figsize=(10, 8))
        plt.xticks(rotation=0, ha='center', fontsize=11) 
        plt.yticks(rotation=0, ha='right', fontsize=11)
        sns.heatmap(correlation_matrix, 
                    annot=True, 
                    cmap="Blues", 
                    center=0, 
                    fmt=".2f", 
                    cbar_kws={'label': 'Correlação de Pearson'}, 
                    xticklabels=labels, 
                    yticklabels=labels)
        plt.show()

        plt.figure(figsize=(8, 8))
        types = [
            filtered_df['leisure_count'].sum(),
            filtered_df['commute_count'].sum(),
        ]
        type_labels = ['Lazer', 'Utilitárias']

        wedges, texts, autotexts = plt.pie(
            types, 
            labels=None,  # No labels on pie chart
            autopct='%1.2f%%',  # Show percentages
            startangle=90, 
            colors=['purple', 'pink'],
            pctdistance = 1.05
        )  
        
        for autotext in autotexts:
            autotext.set_fontsize(12)
            autotext.set_color('black')
            
        plt.legend(wedges, type_labels, title='Tipo', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

        plt.axis('equal')
        plt.tight_layout()
        plt.show()

In [8]:
def plot_period_data(start_date_dt, end_date_dt):
    with chart_output:
        dfo = pd.read_csv('monthly_sums_hex_o.csv') 
        dfo['date'] = pd.to_datetime(dfo['Mês'], format='%Y %m') 
        filtered_df = dfo[(dfo['date'] >= start_date_dt) & (dfo['date'] <= end_date_dt)]
        
        plt.ion()
        plt.figure(figsize=(20, 12)) 
        ax = plt.subplot(1, 1, 1)
        ax.plot(filtered_df['date'], filtered_df['morning_count'], label='05:00-10:00', color='red') 
        ax.plot(filtered_df['date'], filtered_df['midday_count'], label='10:00-15:00', color='orange') 
        ax.plot(filtered_df['date'], filtered_df['evening_count'], label='15:00-20:00', color='yellow') 
        ax.plot(filtered_df['date'], filtered_df['overnight_count'], label='20:00-05:00', color='green') 
        
        scatter_morning = ax.scatter(filtered_df['date'], filtered_df['morning_count'], color='red', s=50, marker='o')
        scatter_midday = ax.scatter(filtered_df['date'], filtered_df['midday_count'], color='orange', s=50, marker='o')
        scatter_evening = ax.scatter(filtered_df['date'], filtered_df['evening_count'], color='yellow', s=50, marker='o')
        scatter_overnight = ax.scatter(filtered_df['date'], filtered_df['overnight_count'], color='green', s=50, marker='o')
        
        ax.set_title('Atividades por período do dia')
        ax.set_xlabel('Data')
        ax.set_ylabel('Atividades')

        unique_dates = filtered_df['date'].dt.to_period('M').unique()
        ax.set_xticks(unique_dates.to_timestamp())
        ax.set_xticklabels([date.strftime('%b %Y') for date in unique_dates.to_timestamp()], rotation=45)

        ax.grid()
        ax.legend(fontsize=20)
        cursor = mplcursors.cursor([scatter_morning, scatter_midday, scatter_evening, scatter_overnight], highlight=True)
        cursor.connect("add", lambda sel: sel.annotation.set_text(
            f"Data: {filtered_df['date'].iloc[sel.index].strftime('%b %Y')}\n" +
            (f"Total de Atividades entre 05:00-10:00: {filtered_df['morning_count'].iloc[sel.index]}" if sel.artist == scatter_morning else
             f"Total de Atividades entre 10:00-15:00: {filtered_df['midday_count'].iloc[sel.index]}" if sel.artist == scatter_midday else
             f"Total de Atividades entre 15:00-20:00: {filtered_df['evening_count'].iloc[sel.index]}" if sel.artist == scatter_evening else
             f"Total de Atividades entre 20:00-05:00: {filtered_df['overnight_count'].iloc[sel.index]}")
        ))

        plt.tight_layout()
        plt.show()

        data = filtered_df[['count', 'morning_count', 'midday_count', 'evening_count', 'overnight_count']]
        correlation_matrix = data.corr(method='pearson')
        
        labels = ['Total', '05:00-10:00', '10:00-15:00', '15:00-20:00', '20:00-05:00']

        plt.figure(figsize=(10, 8))
        plt.xticks(rotation=0, ha='center', fontsize=12) 
        plt.yticks(rotation=0, ha='right', fontsize=12)
        sns.heatmap(correlation_matrix, 
                    annot=True, 
                    cmap="Blues", 
                    center=0, 
                    fmt=".2f", 
                    cbar_kws={'label': 'Pearson Correlation'}, 
                    xticklabels=labels, 
                    yticklabels=labels)
        plt.show()

        plt.figure(figsize=(8, 8))
        ages = [
            filtered_df['morning_count'].sum(),
            filtered_df['midday_count'].sum(),
            filtered_df['evening_count'].sum(),
            filtered_df['overnight_count'].sum(),
        ]
        period_labels = ['05:00-10:00', '10:00-15:00', '15:00-20:00', '20:00-05:00']

        wedges, texts, autotexts = plt.pie(
            ages, 
            labels=None, 
            autopct='%1.2f%%', 
            startangle=90, 
            colors=['red', 'orange', 'yellow', 'green'],
            pctdistance=1.05
        )  
        
        #plt.title('Distribuição de Atividades por Período (Total no Período)')
        
        for autotext in autotexts:
            autotext.set_fontsize(12)
            autotext.set_color('black')
            
        plt.legend(wedges, period_labels, title='Idade', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

        plt.axis('equal')
        plt.tight_layout()
        plt.show()

In [9]:
def test(start_date_dt, end_date_dt):
    with chart_output:
        dfo = pd.read_csv('monthly_sums_hex_o.csv') 
        dfo['date'] = pd.to_datetime(dfo['Mês'], format='%Y %m') 
        filtered_df = dfo[(dfo['date'] >= start_date_dt) & (dfo['date'] <= end_date_dt)]

        df = pd.read_csv('monthly_sums.csv') 
        df['date'] = pd.to_datetime(df['Mês'], format='%Y %m') 
        filtered_df2 = df[(df['date'] >= start_date_dt) & (df['date'] <= end_date_dt)]
        
        plt.ion()

        data = pd.concat([filtered_df2[['total_viagens']],
                  filtered_df[['count']]], axis=1)
        correlation_matrix = data.corr(method='pearson')
        
        labels = ['Viagens', 'Atividades']

        plt.figure(figsize=(10, 8))
        plt.xticks(rotation=0, ha='center', fontsize=11) 
        plt.yticks(rotation=0, ha='right', fontsize=11)
        sns.heatmap(correlation_matrix, 
                    annot=True, 
                    cmap="Blues", 
                    center=0, 
                    fmt=".2f", 
                    cbar_kws={'label': 'Correlação de Pearson'}, 
                    xticklabels=labels, 
                    yticklabels=labels)
        plt.show()

In [10]:
def on_button_click(b):
    start_date = f"{start_year.value}-{start_month.value}-01"
    end_date = f"{end_year.value}-{end_month.value}-01"

    start_date_dt = pd.to_datetime(start_date)
    end_date_dt = pd.to_datetime(end_date)

    limit_2024 = pd.to_datetime('2024-08-01')
    if end_date_dt > limit_2024:
        with notification_output:
            print("Erro: Os dados estão disponíveis apenas até 08/2024.")
        return

    if end_date_dt < start_date_dt:
        with notification_output:
            print("Erro: Período inválido. Verifique início e fim do período.")
        return

    clear_output(wait=True)
    display(label, HBox([label1, start_month, start_year]), HBox([label2, end_month, end_year]), button, notification_output)

    plot_data(start_date_dt, end_date_dt)
    plot_type_data(start_date_dt, end_date_dt)
    plot_gender_data(start_date_dt, end_date_dt)
    plot_age_data(start_date_dt, end_date_dt)
    plot_activities_data(start_date_dt, end_date_dt)
    plot_period_data(start_date_dt, end_date_dt)
    test(start_date_dt, end_date_dt)
    
display(label, HBox([label1, start_month, start_year]), HBox([label2, end_month, end_year]), button, notification_output, chart_output)
button.on_click(on_button_click)

Label(value='Dados disponíveis: 01/2019 à 08/2024', layout=Layout(width='250px'))

HBox(children=(Label(value='Início do período', layout=Layout(width='100px')), Dropdown(layout=Layout(width='1…

HBox(children=(Label(value='Fim do período', layout=Layout(width='100px')), Dropdown(layout=Layout(width='100p…

Button(description='Selecionar período', style=ButtonStyle())

Output()

Output()