In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# Função para carregar os dados
def load_data(columns=None):
    # Carregar as partes dos arquivos
    parts = []
    for i in range(3):
        part = pd.read_pickle(f'Datasets_Vodafone/Facebook_PCU_Analysis_part_{i+1}.pkl')
        if columns is not None:
            part = part[columns]
        parts.append(part)

    # Concatenar as partes para obter o DataFrame completo
    Facebook_PCU_Analysis = pd.concat(parts)

    # Selecionar colunas, se necessário
    if columns is not None:
        Facebook_PCU_Analysis = Facebook_PCU_Analysis[columns]

    # Alterar tipos de dados
    Facebook_PCU_Analysis['post_id'] = Facebook_PCU_Analysis['post_id'].astype(str)
    Facebook_PCU_Analysis['post_date'] = pd.to_datetime(Facebook_PCU_Analysis['post_date'])

    # Calcular o dia da semana (0 = Segunda-Feira, 6 = Domingo)
    Facebook_PCU_Analysis['post_weekday'] = Facebook_PCU_Analysis['post_date'].dt.weekday
    weekday_names = {0: 'Segunda-feira', 1: 'Terça-feira', 2: 'Quarta-feira', 3: 'Quinta-feira', 4: 'Sexta-feira', 5: 'Sábado', 6: 'Domingo'}
    Facebook_PCU_Analysis['post_weekday'] = Facebook_PCU_Analysis['post_weekday'].map(weekday_names)

    return Facebook_PCU_Analysis

In [3]:
# Carregar os dados
Facebook_PCU_Analysis = load_data()

In [4]:
Facebook_PCU_Analysis

In [8]:
# Soma de post shares, soma de post reactions e contagem de post comments por mês (agrupar por post_id)
posts_per_month = Facebook_PCU_Analysis.groupby(['post_id', pd.Grouper(key='post_date', freq='M')]) \
    .agg({'post_shares': 'sum', 'post_reactions': 'sum', 'post_comments': 'sum'}).reset_index()

In [9]:
posts_per_month

In [12]:
df = Facebook_PCU_Analysis.copy()

# Filtrar para incluir apenas "Masculino" e "Feminino"
df = df[df['user_predicted_genre'].isin(['Masculino', 'Feminino', 'Indeterminado'])]

# Agrupar dados por Género e Dia da Semana
grouped_df = df.groupby(['user_predicted_genre', 'post_weekday']).size().reset_index(name='Total de Utilizadores')

# Calcular percentagem para cada género e dia da semana
total_users = grouped_df['Total de Utilizadores'].sum()
grouped_df['Percentagem'] = (grouped_df['Total de Utilizadores'] / total_users) * 100

# Criar listas de labels, parents e values para o gráfico de icicle
labels = ['Total'] + grouped_df['user_predicted_genre'].unique().tolist() + grouped_df.apply(lambda row: f"{row['user_predicted_genre']} - {row['post_weekday']}", axis=1).tolist()
parents = [''] + ['Total'] * len(grouped_df['user_predicted_genre'].unique()) + grouped_df['user_predicted_genre'].tolist()
values = [total_users] + [grouped_df[grouped_df['user_predicted_genre'] == genre]['Total de Utilizadores'].sum() for genre in grouped_df['user_predicted_genre'].unique()] + grouped_df['Total de Utilizadores'].tolist()

# Adicionar as percentagens aos labels
labels_with_percent = ['Total'] + [f"{genre} ({(grouped_df[grouped_df['user_predicted_genre'] == genre]['Total de Utilizadores'].sum() / total_users) * 100:.2f}%)" for genre in grouped_df['user_predicted_genre'].unique()] + [f"{row['user_predicted_genre']} - {row['post_weekday']} ({row['Percentagem']:.2f}%)" for _, row in grouped_df.iterrows()]


In [13]:
labels_with_percent

In [14]:
labels_with_percent

In [15]:
parents

In [16]:
values

In [20]:
len(labels_with_percent)