In [36]:
# ================================
# Cloudwalk Monitoring Challenge
# Notebook final – análise de anomalias
# Autor: Luiz Roberto
# ================================

# 1️⃣ Importar bibliotecas
import pandas as pd

# 2️⃣ Ler os arquivos CSV
# Substitua pelos caminhos corretos se necessário
df1 = pd.read_csv('../data/checkout_1.csv')
df2 = pd.read_csv('../data/checkout_2.csv')

print("Checkout 1 - Dados brutos:")
display(df1.head())
print("Checkout 2 - Dados brutos:")
display(df2.head())

# 3️⃣ Calcular variações percentuais
def calcular_variacoes(df):
    df['var_today_yesterday'] = ((df['today'] - df['yesterday']) / df['yesterday'].replace(0,1)) * 100
    df['var_today_avg_week'] = ((df['today'] - df['avg_last_week']) / df['avg_last_week'].replace(0,1)) * 100
    df['var_today_avg_month'] = ((df['today'] - df['avg_last_month']) / df['avg_last_month'].replace(0,1)) * 100
    return df

df1 = calcular_variacoes(df1)
df2 = calcular_variacoes(df2)

# Conferir resultado
print("Checkout 1 - Após cálculo de variações:")
display(df1.head())
print("Checkout 2 - Após cálculo de variações:")
display(df2.head())

# 4️⃣ Criar coluna de explicação curta
def gerar_explicacao(row):
    if row['var_today_avg_week'] <= -50:
        return 'queda acima de 50% comparado à média semanal'
    elif row['var_today_avg_week'] >= 50:
        return 'aumento acima de 50% comparado à média semanal'
    else:
        return 'normal'

df1['explicacao'] = df1.apply(gerar_explicacao, axis=1)
df2['explicacao'] = df2.apply(gerar_explicacao, axis=1)

# Conferir explicações
print("Checkout 1 - Após gerar explicações:")
display(df1.head())
print("Checkout 2 - Após gerar explicações:")
display(df2.head())

# 5️⃣ Filtrar apenas as anomalias
anomalias_df1 = df1[df1['explicacao'] != 'normal']
anomalias_df2 = df2[df2['explicacao'] != 'normal']

print("Checkout 1 - Anomalias detectadas:")
display(anomalias_df1)

print("Checkout 2 - Anomalias detectadas:")
display(anomalias_df2)

# 6️⃣ Salvar relatórios finais no diretório correto
anomalias_df1.to_csv("../reports/checkout_1_report.csv", index=False)
anomalias_df2.to_csv("../reports/checkout_2_report.csv", index=False)

print("✅ Relatórios finais de anomalias gerados com sucesso!")


Checkout 1 - Dados brutos:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month
0,00h,9,12,11,6.42,4.85
1,01h,3,5,1,1.85,1.92
2,02h,1,0,0,0.28,0.82
3,03h,1,0,0,0.42,0.46
4,04h,0,0,1,0.42,0.21


Checkout 2 - Dados brutos:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month
0,00h,6,9,5,5.0,4.92
1,01h,3,3,2,2.0,1.92
2,02h,3,1,2,0.42,0.75
3,03h,0,1,1,0.42,0.46
4,04h,0,0,0,0.14,0.21


Checkout 1 - Após cálculo de variações:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month,var_today_yesterday,var_today_avg_week,var_today_avg_month
0,00h,9,12,11,6.42,4.85,-25.0,40.186916,85.56701
1,01h,3,5,1,1.85,1.92,-40.0,62.162162,56.25
2,02h,1,0,0,0.28,0.82,100.0,257.142857,21.95122
3,03h,1,0,0,0.42,0.46,100.0,138.095238,117.391304
4,04h,0,0,1,0.42,0.21,0.0,-100.0,-100.0


Checkout 2 - Após cálculo de variações:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month,var_today_yesterday,var_today_avg_week,var_today_avg_month
0,00h,6,9,5,5.0,4.92,-33.333333,20.0,21.95122
1,01h,3,3,2,2.0,1.92,0.0,50.0,56.25
2,02h,3,1,2,0.42,0.75,200.0,614.285714,300.0
3,03h,0,1,1,0.42,0.46,-100.0,-100.0,-100.0
4,04h,0,0,0,0.14,0.21,0.0,-100.0,-100.0


Checkout 1 - Após gerar explicações:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month,var_today_yesterday,var_today_avg_week,var_today_avg_month,explicacao
0,00h,9,12,11,6.42,4.85,-25.0,40.186916,85.56701,normal
1,01h,3,5,1,1.85,1.92,-40.0,62.162162,56.25,aumento acima de 50% comparado à média semanal
2,02h,1,0,0,0.28,0.82,100.0,257.142857,21.95122,aumento acima de 50% comparado à média semanal
3,03h,1,0,0,0.42,0.46,100.0,138.095238,117.391304,aumento acima de 50% comparado à média semanal
4,04h,0,0,1,0.42,0.21,0.0,-100.0,-100.0,queda acima de 50% comparado à média semanal


Checkout 2 - Após gerar explicações:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month,var_today_yesterday,var_today_avg_week,var_today_avg_month,explicacao
0,00h,6,9,5,5.0,4.92,-33.333333,20.0,21.95122,normal
1,01h,3,3,2,2.0,1.92,0.0,50.0,56.25,aumento acima de 50% comparado à média semanal
2,02h,3,1,2,0.42,0.75,200.0,614.285714,300.0,aumento acima de 50% comparado à média semanal
3,03h,0,1,1,0.42,0.46,-100.0,-100.0,-100.0,queda acima de 50% comparado à média semanal
4,04h,0,0,0,0.14,0.21,0.0,-100.0,-100.0,queda acima de 50% comparado à média semanal


Checkout 1 - Anomalias detectadas:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month,var_today_yesterday,var_today_avg_week,var_today_avg_month,explicacao
1,01h,3,5,1,1.85,1.92,-40.0,62.162162,56.25,aumento acima de 50% comparado à média semanal
2,02h,1,0,0,0.28,0.82,100.0,257.142857,21.95122,aumento acima de 50% comparado à média semanal
3,03h,1,0,0,0.42,0.46,100.0,138.095238,117.391304,aumento acima de 50% comparado à média semanal
4,04h,0,0,1,0.42,0.21,0.0,-100.0,-100.0,queda acima de 50% comparado à média semanal
6,06h,1,1,5,2.85,2.28,0.0,-64.912281,-56.140351,queda acima de 50% comparado à média semanal
7,07h,2,3,9,5.57,5.21,-33.333333,-64.093357,-61.612284,queda acima de 50% comparado à média semanal
8,08h,0,1,18,8.71,10.42,-100.0,-100.0,-100.0,queda acima de 50% comparado à média semanal
9,09h,2,9,30,20.0,19.07,-77.777778,-90.0,-89.512323,queda acima de 50% comparado à média semanal
10,10h,55,51,45,29.42,28.35,7.843137,86.947655,94.003527,aumento acima de 50% comparado à média semanal
12,12h,51,39,39,27.57,25.42,30.769231,84.983678,100.629426,aumento acima de 50% comparado à média semanal


Checkout 2 - Anomalias detectadas:


Unnamed: 0,time,today,yesterday,same_day_last_week,avg_last_week,avg_last_month,var_today_yesterday,var_today_avg_week,var_today_avg_month,explicacao
1,01h,3,3,2,2.0,1.92,0.0,50.0,56.25,aumento acima de 50% comparado à média semanal
2,02h,3,1,2,0.42,0.75,200.0,614.285714,300.0,aumento acima de 50% comparado à média semanal
3,03h,0,1,1,0.42,0.46,-100.0,-100.0,-100.0,queda acima de 50% comparado à média semanal
4,04h,0,0,0,0.14,0.21,0.0,-100.0,-100.0,queda acima de 50% comparado à média semanal
5,05h,2,1,1,0.71,0.71,100.0,181.690141,181.690141,aumento acima de 50% comparado à média semanal
6,06h,3,1,2,1.42,2.1,200.0,111.267606,42.857143,aumento acima de 50% comparado à média semanal
7,07h,10,2,9,3.0,5.03,400.0,233.333333,98.807157,aumento acima de 50% comparado à média semanal
8,08h,25,0,12,3.71,9.82,2500.0,573.854447,154.582485,aumento acima de 50% comparado à média semanal
9,09h,36,2,27,10.14,17.64,1700.0,255.029586,104.081633,aumento acima de 50% comparado à média semanal
10,10h,43,55,42,26.14,28.57,-21.818182,64.498852,50.507525,aumento acima de 50% comparado à média semanal


✅ Relatórios finais de anomalias gerados com sucesso!


In [33]:
import os
os.getcwd()


'c:\\Users\\lurobert\\Documents\\cloudwalk-monitoring-challenge-luiz\\notebooks'

In [34]:
import os
os.listdir('..')


['.git', 'data', 'notebooks', 'README.md', 'reports', 'requirements.txt']

In [35]:
os.listdir('../data')

['checkout_1.csv',
 'checkout_2.csv',
 'transactions_1.csv',
 'transactions_2.csv']