# Estatística Relatório
Este notebook realiza análise estatística dos dados processados e salvos no banco de dados MySQL.

In [None]:
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine("mysql+pymysql://root:1234@localhost:3306/etlbcb")
df = pd.read_sql("SELECT * FROM meiosdepagamentostri", con=engine)

In [None]:
df['data'] = pd.to_datetime(df['data'])

In [None]:
monetary_cols = ['valorPix', 'valorCartaoCredito', 'valorCartaoDebito', 'valorCartaoPrePago']
for col in monetary_cols:
    if col in df.columns:
        df[col] = df[col] * 1_000_000

In [None]:
quant_cols = ['quantidadePix', 'quantidadeCartaoCredito', 'quantidadeCartaoDebito', 'quantidadeCartaoPrePago']
for col in quant_cols:
    if col in df.columns:
        df[col] = df[col] * 1_000

In [None]:
print("Média:")
print(df.mean(numeric_only=True))

print("\nMediana:")
print(df.median(numeric_only=True))

print("\nModa:")
print(df.mode(numeric_only=True).iloc[0])

In [None]:
print("Variância:")
print(df.var(numeric_only=True))

print("\nDesvio Padrão:")
print(df.std(numeric_only=True))

print("\nAmplitude:")
print(df.max(numeric_only=True) - df.min(numeric_only=True))

In [None]:
df.describe()

In [None]:
for col in monetary_cols:
    if col in df.columns:
        q1 = df[col].quantile(0.25)
        q3 = df[col].quantile(0.75)
        iqr = q3 - q1
        print(f"{col} - Q1: {q1:,.2f}, Q3: {q3:,.2f}, IQR: {iqr:,.2f}")

In [None]:
import numpy as np
if 'valorPix' in df.columns and 'quantidadePix' in df.columns:
    media_ponderada = np.average(df['valorPix'], weights=df['quantidadePix'])
    print(f"Média ponderada valorPix: {media_ponderada:,.2f}")

In [None]:
def outliers(col):
    q1 = df[col].quantile(0.25)
    q3 = df[col].quantile(0.75)
    iqr = q3 - q1
    return df[(df[col] < q1 - 1.5 * iqr) | (df[col] > q3 + 1.5 * iqr)]

outliers_pix = outliers('valorPix')
outliers_pix[['data', 'valorPix']]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

sns.histplot(df['valorPix'], kde=True, bins=30)
plt.title("Histograma - valorPix")
plt.xlabel("Valor")
plt.ylabel("Frequência")
plt.tight_layout()
plt.show()

sns.boxplot(data=df[monetary_cols])
plt.title("Boxplot - Cartões")
plt.tight_layout()
plt.show()

df_sorted = df.sort_values('data')
plt.plot(df_sorted['data'], df_sorted['valorPix'], marker='o')
plt.title("Série Temporal - valorPix")
plt.xlabel("Data")
plt.ylabel("Valor")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()