# An√°lise de Reten√ß√£o por Cohort - Olist E-Commerce

**Autor:** Andr√© Bomfim  
**Data:** Novembro 2024  
**Objetivo:** Analisar reten√ß√£o de clientes ao longo do tempo usando cohort analysis para identificar padr√µes de churn e oportunidades de fideliza√ß√£o

---

## üìã √çndice

1. [Setup e Configura√ß√£o](#1-setup)
2. [Cohort Base e Matriz de Reten√ß√£o](#2-matriz)
3. [Curvas de Reten√ß√£o](#3-curvas)
4. [An√°lise de Churn](#4-churn)
5. [Reten√ß√£o por Estado](#5-estado)
6. [Benchmark e Curva M√©dia](#6-benchmark)
7. [M√©tricas Agregadas por Cohort](#7-metricas)
8. [Insights e Conclus√µes](#8-insights)

## 1. Setup e Configura√ß√£o

In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from datetime import datetime, timedelta
import warnings
from dotenv import load_dotenv
import os

warnings.filterwarnings('ignore')

# Configura√ß√£o de visualiza√ß√£o
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Configura√ß√£o do pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.2f}'.format)

# Carregar vari√°veis de ambiente
load_dotenv()

# Configura√ß√£o BigQuery
PROJECT_ID = os.getenv('GCP_PROJECT_ID')
DATASET_ID = os.getenv('GCP_DATASET_ID', 'olist_ecommerce')

# Cliente BigQuery
client = bigquery.Client(project=PROJECT_ID)

# Criar diret√≥rio para imagens
os.makedirs('../docs/images', exist_ok=True)

print(f"Setup completo - Projeto: {PROJECT_ID}, Dataset: {DATASET_ID}")

In [None]:
def query_bigquery(query: str) -> pd.DataFrame:
    """Helper para executar queries no BigQuery"""
    return client.query(query).to_dataframe()

def save_plot(fig, filename):
    """Salvar gr√°fico com tratamento de erro"""
    try:
        fig.savefig(f'../docs/images/{filename}', dpi=300, bbox_inches='tight')
        print(f"‚úì Gr√°fico salvo: docs/images/{filename}")
    except Exception as e:
        print(f"‚ö†Ô∏è Erro ao salvar gr√°fico: {e}")

## 2. Cohort Base e Matriz de Reten√ß√£o

In [None]:
# Query Matriz de Reten√ß√£o
query_retention_matrix = f"""
WITH cohort_base AS (
    -- Primeira compra de cada cliente
    SELECT 
        c.customer_unique_id,
        DATE_TRUNC(MIN(o.order_purchase_timestamp), MONTH) as cohort_month
    FROM `{PROJECT_ID}.{DATASET_ID}.orders` o
    JOIN `{PROJECT_ID}.{DATASET_ID}.customers` c ON o.customer_id = c.customer_id
    WHERE o.order_status = 'delivered'
    GROUP BY c.customer_unique_id
),
customer_activity AS (
    -- Todas compras com cohort_month
    SELECT 
        cb.customer_unique_id,
        cb.cohort_month,
        DATE_TRUNC(o.order_purchase_timestamp, MONTH) as activity_month,
        DATE_DIFF(DATE_TRUNC(o.order_purchase_timestamp, MONTH), cb.cohort_month, MONTH) as months_since_first_purchase,
        p.payment_value
    FROM `{PROJECT_ID}.{DATASET_ID}.orders` o
    JOIN `{PROJECT_ID}.{DATASET_ID}.customers` c ON o.customer_id = c.customer_id
    JOIN cohort_base cb ON c.customer_unique_id = cb.customer_unique_id
    JOIN `{PROJECT_ID}.{DATASET_ID}.payments` p ON o.order_id = p.order_id
    WHERE o.order_status = 'delivered'
),
retention_data AS (
    -- Matriz de reten√ß√£o
    SELECT
        cohort_month,
        months_since_first_purchase,
        COUNT(DISTINCT customer_unique_id) as active_customers,
        SUM(payment_value) as revenue
    FROM customer_activity
    WHERE months_since_first_purchase BETWEEN 0 AND 12
    GROUP BY cohort_month, months_since_first_purchase
),
cohort_sizes AS (
    -- Tamanho inicial dos cohorts
    SELECT
        cohort_month,
        COUNT(DISTINCT customer_unique_id) as cohort_size
    FROM cohort_base
    GROUP BY cohort_month
)
SELECT
    rd.cohort_month,
    rd.months_since_first_purchase,
    rd.active_customers,
    rd.revenue,
    cs.cohort_size,
    (rd.active_customers / cs.cohort_size) * 100 as retention_rate_pct
FROM retention_data rd
JOIN cohort_sizes cs ON rd.cohort_month = cs.cohort_month
WHERE cs.cohort_size >= 50  -- Filtrar cohorts pequenos
ORDER BY rd.cohort_month, rd.months_since_first_purchase
"""

df_retention = query_bigquery(query_retention_matrix)
print(f"‚úì {len(df_retention):,} registros de reten√ß√£o carregados")
print(f"‚úì Cohorts analisados: {df_retention['cohort_month'].nunique()}")
print(f"‚úì Per√≠odo: {df_retention['cohort_month'].min()} a {df_retention['cohort_month'].max()}")

In [None]:
# Preparar matriz de reten√ß√£o (pivot table)
df_pivot = df_retention.pivot_table(
    index='cohort_month',
    columns='months_since_first_purchase',
    values='retention_rate_pct',
    aggfunc='first'
).fillna(0)

# Ordenar cohorts cronologicamente
df_pivot = df_pivot.sort_index()

# Renomear colunas
df_pivot.columns = [f'M{int(col)}' for col in df_pivot.columns]

print("MATRIZ DE RETEN√á√ÉO (Primeiras 5 cohorts):")
print(df_pivot.head())

# Estat√≠sticas b√°sicas
print(f"\nESTAT√çSTICAS GERAIS:")
print(f"  ‚Ä¢ Cohorts analisados: {len(df_pivot)}")
print(f"  ‚Ä¢ Reten√ß√£o M1 m√©dia: {df_pivot['M1'].mean():.2f}%")
print(f"  ‚Ä¢ Reten√ß√£o M3 m√©dia: {df_pivot['M3'].mean():.2f}%")
print(f"  ‚Ä¢ Reten√ß√£o M6 m√©dia: {df_pivot['M6'].mean():.2f}%")

In [None]:
# Heatmap da Matriz de Reten√ß√£o
plt.close('all')
fig, ax = plt.subplots(figsize=(16, 10))

# Criar heatmap
sns.heatmap(
    df_pivot,
    annot=True,
    fmt='.1f',
    cmap='RdYlGn',
    center=5,
    ax=ax,
    cbar_kws={'label': 'Taxa de Reten√ß√£o (%)'},
    linewidths=0.5,
    annot_kws={'size': 9}
)

ax.set_title('Matriz de Reten√ß√£o - Cohorts por M√™s (Reten√ß√£o %)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Meses desde Primeira Compra', fontsize=12)
ax.set_ylabel('Cohort (M√™s Primeira Compra)', fontsize=12)

plt.tight_layout()
save_plot(fig, 'cohort_retention_matrix.png')
plt.show()

## 3. Curvas de Reten√ß√£o

In [None]:
# Preparar dados para curvas de reten√ß√£o
df_curves = df_retention.copy()

# Calcular m√©tricas agregadas
retention_aggregated = df_curves.groupby('months_since_first_purchase').agg({
    'retention_rate_pct': ['mean', 'std', 'median', lambda x: x.quantile(0.25), lambda x: x.quantile(0.75)]
}).round(2)

retention_aggregated.columns = ['mean', 'std', 'median', 'p25', 'p75']
retention_aggregated = retention_aggregated.reset_index()

print(" CURVA M√âDIA DE RETEN√á√ÉO:")
print(retention_aggregated.head(8))

In [None]:
# Visualiza√ß√£o Curvas de Reten√ß√£o
plt.close('all')
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Gr√°fico 1: Curvas individuais dos √∫ltimos 6 cohorts
recent_cohorts = df_pivot.tail(6)
for cohort in recent_cohorts.index:
    axes[0, 0].plot(
        range(len(recent_cohorts.columns)),
        recent_cohorts.loc[cohort].values,
        marker='o',
        linewidth=2,
        alpha=0.7,
        label=cohort.strftime('%Y-%m')
    )
axes[0, 0].set_xlabel('Meses desde Primeira Compra')
axes[0, 0].set_ylabel('Taxa de Reten√ß√£o (%)')
axes[0, 0].set_title('Curvas de Reten√ß√£o - √öltimos 6 Cohorts')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Gr√°fico 2: Curva m√©dia com intervalo de confian√ßa
months = retention_aggregated['months_since_first_purchase']
mean_retention = retention_aggregated['mean']
p25_retention = retention_aggregated['p25']
p75_retention = retention_aggregated['p75']

axes[0, 1].plot(months, mean_retention, linewidth=3, marker='o', label='M√©dia', color='blue')
axes[0, 1].fill_between(months, p25_retention, p75_retention, alpha=0.3, color='blue', label='P25-P75')
axes[0, 1].set_xlabel('Meses desde Primeira Compra')
axes[0, 1].set_ylabel('Taxa de Reten√ß√£o (%)')
axes[0, 1].set_title('Curva M√©dia de Reten√ß√£o com Intervalo P25-P75')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Gr√°fico 3: Compara√ß√£o M1, M3, M6 retention
key_months = ['M1', 'M3', 'M6']
key_retention = [df_pivot['M1'].mean(), df_pivot['M3'].mean(), df_pivot['M6'].mean()]

bars = axes[1, 0].bar(key_months, key_retention, color=['red', 'orange', 'green'], alpha=0.7)
axes[1, 0].set_ylabel('Taxa de Reten√ß√£o (%)')
axes[1, 0].set_title('Reten√ß√£o em Momentos Chave (M1, M3, M6)')
axes[1, 0].grid(True, alpha=0.3, axis='y')

# Adicionar valores nas barras
for bar, value in zip(bars, key_retention):
    axes[1, 0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, 
                   f'{value:.2f}%', ha='center', va='bottom', fontweight='bold')

# Gr√°fico 4: Churn incremental
churn_incremental = []
for i in range(1, len(mean_retention)):
    churn = mean_retention.iloc[i-1] - mean_retention.iloc[i]
    churn_incremental.append(churn)

axes[1, 1].bar(range(1, len(mean_retention)), churn_incremental, 
               color='red', alpha=0.7, edgecolor='black')
axes[1, 1].set_xlabel('M√™s (Churn M√™s N-1 ‚Üí M√™s N)')
axes[1, 1].set_ylabel('Churn Incremental (% pontos)')
axes[1, 1].set_title('Churn Incremental por M√™s')
axes[1, 1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
save_plot(fig, 'cohort_retention_curves.png')
plt.show()

## 4. An√°lise de Churn 

In [None]:
# Calcular an√°lise de churn a partir dos dados existentes
df_churn = df_retention.groupby('months_since_first_purchase').agg({
    'retention_rate_pct': ['mean', 'std'],
    'cohort_month': 'nunique'
}).reset_index()

df_churn.columns = ['months_since_first_purchase', 'avg_retention_rate', 'std_retention_rate', 'cohort_count']
df_churn['avg_churn_rate'] = 100 - df_churn['avg_retention_rate']

print(" AN√ÅLISE DE CHURN:")
print(df_churn.head(8))

In [None]:
# Visualiza√ß√£o An√°lise de Churn
plt.close('all')
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Gr√°fico 1: Evolu√ß√£o do Churn Rate
months = df_churn['months_since_first_purchase']
churn_rates = df_churn['avg_churn_rate']

axes[0].plot(months, churn_rates, linewidth=3, marker='o', color='red', label='Churn Rate')
axes[0].fill_between(months, 
                    churn_rates - df_churn['std_retention_rate'], 
                    churn_rates + df_churn['std_retention_rate'], 
                    alpha=0.3, color='red', label='¬±1 Std Dev')

axes[0].set_xlabel('Meses desde Primeira Compra')
axes[0].set_ylabel('Taxa de Churn Acumulada (%)')
axes[0].set_title('Evolu√ß√£o do Churn Rate ao Longo do Tempo')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Destacar churn cr√≠tico M0‚ÜíM1
critical_churn = churn_rates.iloc[1] - churn_rates.iloc[0]
axes[0].annotate(f'Churn Cr√≠tico M0‚ÜíM1: {critical_churn:.1f}%',
                xy=(0.5, churn_rates.iloc[1]), xytext=(2, churn_rates.iloc[1] + 10),
                arrowprops=dict(arrowstyle='->', color='darkred'),
                fontweight='bold', color='darkred')

# Gr√°fico 2: Churn Incremental (Waterfall)
incremental_churn = []
for i in range(1, len(churn_rates)):
    incremental = churn_rates.iloc[i] - churn_rates.iloc[i-1]
    incremental_churn.append(incremental)

# Criar waterfall
positions = range(1, len(churn_rates))
colors = ['red' if churn > 5 else 'orange' if churn > 2 else 'yellow' for churn in incremental_churn]

bars = axes[1].bar(positions, incremental_churn, color=colors, alpha=0.7, edgecolor='black')
axes[1].set_xlabel('M√™s (Churn M√™s N-1 ‚Üí M√™s N)')
axes[1].set_ylabel('Churn Incremental (% pontos)')
axes[1].set_title('Churn Incremental por M√™s (Waterfall)')
axes[1].set_xticks(positions)
axes[1].set_xticklabels([f'M{i-1}‚ÜíM{i}' for i in positions])
axes[1].grid(True, alpha=0.3, axis='y')

# Adicionar valores nas barras
for bar, value in zip(bars, incremental_churn):
    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, 
                f'{value:.1f}%', ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()
save_plot(fig, 'cohort_churn_analysis.png')
plt.show()

## 5. Reten√ß√£o por Estado 

In [None]:
# Query Corrigida - Reten√ß√£o por Estado
query_retention_state = f"""
WITH cohort_base AS (
    -- Primeira compra de cada cliente com estado
    SELECT 
        c.customer_unique_id,
        c.customer_state,
        DATE_TRUNC(MIN(o.order_purchase_timestamp), MONTH) as cohort_month
    FROM `{PROJECT_ID}.{DATASET_ID}.orders` o
    JOIN `{PROJECT_ID}.{DATASET_ID}.customers` c ON o.customer_id = c.customer_id
    WHERE o.order_status = 'delivered'
    GROUP BY c.customer_unique_id, c.customer_state
),
all_purchases AS (
    -- Todas as compras dos clientes
    SELECT 
        c.customer_unique_id,
        DATE_TRUNC(o.order_purchase_timestamp, MONTH) as purchase_month
    FROM `{PROJECT_ID}.{DATASET_ID}.orders` o
    JOIN `{PROJECT_ID}.{DATASET_ID}.customers` c ON o.customer_id = c.customer_id
    WHERE o.order_status = 'delivered'
)
SELECT
    cb.customer_state,
    COUNT(DISTINCT cb.customer_unique_id) as total_customers,
    COUNT(DISTINCT CASE 
        WHEN DATE_DIFF(ap.purchase_month, cb.cohort_month, MONTH) = 1 
        THEN cb.customer_unique_id 
    END) * 100.0 / COUNT(DISTINCT cb.customer_unique_id) as m1_retention_pct,
    COUNT(DISTINCT CASE 
        WHEN DATE_DIFF(ap.purchase_month, cb.cohort_month, MONTH) = 3 
        THEN cb.customer_unique_id 
    END) * 100.0 / COUNT(DISTINCT cb.customer_unique_id) as m3_retention_pct
FROM cohort_base cb
LEFT JOIN all_purchases ap ON cb.customer_unique_id = ap.customer_unique_id
GROUP BY cb.customer_state
HAVING COUNT(DISTINCT cb.customer_unique_id) >= 100  -- Estados com volume significativo
ORDER BY m1_retention_pct DESC
"""

df_state_retention = query_bigquery(query_retention_state)
print(f"‚úì Reten√ß√£o calculada para {len(df_state_retention)} estados")
print("\n TOP 5 ESTADOS - RETEN√á√ÉO M1:")
print(df_state_retention.head())

In [None]:
# Visualiza√ß√£o Reten√ß√£o por Estado
plt.close('all')
fig, ax = plt.subplots(figsize=(12, 8))

# Top 10 estados por reten√ß√£o M1
top_states = df_state_retention.head(10).sort_values('m1_retention_pct', ascending=True)

# Criar barras horizontais
y_pos = np.arange(len(top_states))
bars = ax.barh(y_pos, top_states['m1_retention_pct'], 
               color='green', alpha=0.7, edgecolor='black')

ax.set_yticks(y_pos)
ax.set_yticklabels(top_states['customer_state'])
ax.set_xlabel('Taxa de Reten√ß√£o M1 (%)')
ax.set_title('Top 10 Estados - Reten√ß√£o no Primeiro M√™s (M1)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')

# Adicionar valores nas barras
for i, (bar, retention, total) in enumerate(zip(bars, top_states['m1_retention_pct'], top_states['total_customers'])):
    ax.text(bar.get_width() + 0.1, bar.get_y() + bar.get_height()/2, 
           f'{retention:.1f}% ({total:,} clientes)', 
           va='center', ha='left', fontsize=10)

plt.tight_layout()
save_plot(fig, 'cohort_retention_by_state.png')
plt.show()

## 6. Benchmark e Curva M√©dia 

In [None]:
# Preparar dados para benchmark a partir dos dados existentes
benchmark_data = df_retention.groupby('months_since_first_purchase').agg({
    'retention_rate_pct': ['mean', 'std', 'min', 'max', 'median']
}).round(2)

benchmark_data.columns = ['mean', 'std', 'min', 'max', 'median']
benchmark_data = benchmark_data.reset_index()

print(" BENCHMARK DE RETEN√á√ÉO:")
print(benchmark_data.head(8))

In [None]:
# Visualiza√ß√£o Benchmark de Reten√ß√£o
plt.close('all')
fig, ax = plt.subplots(figsize=(14, 8))

months = benchmark_data['months_since_first_purchase']
mean_retention = benchmark_data['mean']
std_retention = benchmark_data['std']

# Curva m√©dia com intervalo de confian√ßa
ax.plot(months, mean_retention, linewidth=4, marker='o', 
        label='Reten√ß√£o M√©dia', color='blue', markersize=8)

# √Årea de ¬±1 desvio padr√£o
ax.fill_between(months, 
               mean_retention - std_retention, 
               mean_retention + std_retention, 
               alpha=0.3, color='blue', label='¬±1 Desvio Padr√£o')

# √Årea min-max
ax.fill_between(months, 
               benchmark_data['min'], 
               benchmark_data['max'], 
               alpha=0.1, color='gray', label='Range Min-Max')

ax.set_xlabel('Meses desde Primeira Compra', fontsize=12)
ax.set_ylabel('Taxa de Reten√ß√£o (%)', fontsize=12)
ax.set_title('Benchmark de Reten√ß√£o - Curva M√©dia com Intervalos', 
             fontsize=16, fontweight='bold', pad=20)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)

# Adicionar anota√ß√µes para momentos chave
key_months = [1, 3, 6, 12]
for month in key_months:
    if month < len(mean_retention):
        retention = mean_retention.iloc[month]
        ax.annotate(f'M{month}: {retention:.1f}%', 
                   xy=(month, retention), 
                   xytext=(month+0.5, retention+2),
                   arrowprops=dict(arrowstyle='->', color='red'),
                   fontweight='bold', color='red')

plt.tight_layout()
save_plot(fig, 'cohort_retention_benchmark.png')
plt.show()

## 7. M√©tricas Agregadas por Cohort 

In [None]:
# Calcular m√©tricas agregadas a partir dos dados existentes
cohort_metrics_data = []
for cohort in df_retention['cohort_month'].unique():
    cohort_data = df_retention[df_retention['cohort_month'] == cohort]
    
    metrics = {
        'cohort_month': cohort,
        'cohort_size': cohort_data['cohort_size'].max(),
        'm1_retention': cohort_data[cohort_data['months_since_first_purchase'] == 1]['retention_rate_pct'].max() if 1 in cohort_data['months_since_first_purchase'].values else 0,
        'm3_retention': cohort_data[cohort_data['months_since_first_purchase'] == 3]['retention_rate_pct'].max() if 3 in cohort_data['months_since_first_purchase'].values else 0,
        'm6_retention': cohort_data[cohort_data['months_since_first_purchase'] == 6]['retention_rate_pct'].max() if 6 in cohort_data['months_since_first_purchase'].values else 0,
        'total_revenue': cohort_data['revenue'].sum(),
        'revenue_per_customer': cohort_data['revenue'].sum() / cohort_data['cohort_size'].max()
    }
    metrics['cohort_quality_score'] = (metrics['m1_retention'] * 0.5 + metrics['m3_retention'] * 0.3 + metrics['m6_retention'] * 0.2)
    cohort_metrics_data.append(metrics)

df_cohort_metrics = pd.DataFrame(cohort_metrics_data)

print(" M√âTRICAS AGRAGADAS POR COHORT:")
print(df_cohort_metrics.round(2))

# Sum√°rio estat√≠stico
print(f"\n SUM√ÅRIO ESTAT√çSTICO:")
print(f"  ‚Ä¢ Cohorts analisados: {len(df_cohort_metrics)}")
print(f"  ‚Ä¢ Reten√ß√£o M1 m√©dia: {df_cohort_metrics['m1_retention'].mean():.2f}%")
print(f"  ‚Ä¢ Reten√ß√£o M3 m√©dia: {df_cohort_metrics['m3_retention'].mean():.2f}%")
print(f"  ‚Ä¢ Reten√ß√£o M6 m√©dia: {df_cohort_metrics['m6_retention'].mean():.2f}%")
print(f"  ‚Ä¢ Revenue por cliente m√©dio: R$ {df_cohort_metrics['revenue_per_customer'].mean():.2f}")
print(f"  ‚Ä¢ Cohort quality score m√©dio: {df_cohort_metrics['cohort_quality_score'].mean():.2f}")

## 8. Insights e Conclus√µes 

In [None]:
# Sum√°rio Executivo Final
print("\n" + "="*80)
print(" RESUMO EXECUTIVO - AN√ÅLISE DE RETEN√á√ÉO POR COHORT")
print("="*80)

# Coletar m√©tricas finais
m1_retention_avg = df_cohort_metrics['m1_retention'].mean()
m3_retention_avg = df_cohort_metrics['m3_retention'].mean()
m6_retention_avg = df_cohort_metrics['m6_retention'].mean()
critical_churn_m0_m1 = 100 - m1_retention_avg

# Melhor e pior cohort
best_cohort = df_cohort_metrics.loc[df_cohort_metrics['cohort_quality_score'].idxmax()]
worst_cohort = df_cohort_metrics.loc[df_cohort_metrics['cohort_quality_score'].idxmin()]

# Melhor estado
best_state = df_state_retention.iloc[0]

print(f"\n M√âTRICAS CHAVE DE RETEN√á√ÉO:")
print(f"   ‚Ä¢ Reten√ß√£o M0 ‚Üí M1: {m1_retention_avg:.2f}% (Churn: {critical_churn_m0_m1:.2f}%)")
print(f"   ‚Ä¢ Reten√ß√£o M0 ‚Üí M3: {m3_retention_avg:.2f}%")
print(f"   ‚Ä¢ Reten√ß√£o M0 ‚Üí M6: {m6_retention_avg:.2f}%")
print(f"   ‚Ä¢ Plateau de reten√ß√£o: ~{benchmark_data[benchmark_data['months_since_first_purchase'] >= 6]['mean'].mean():.2f}% (M6+)")

print(f"\n PERFORMANCE POR COHORT:")
print(f"   ‚Ä¢ Melhor cohort: {best_cohort['cohort_month'].strftime('%Y-%m')} (Score: {best_cohort['cohort_quality_score']:.2f})")
print(f"   ‚Ä¢ Pior cohort: {worst_cohort['cohort_month'].strftime('%Y-%m')} (Score: {worst_cohort['cohort_quality_score']:.2f})")
print(f"   ‚Ä¢ Varia√ß√£o qualidade: {(best_cohort['cohort_quality_score'] - worst_cohort['cohort_quality_score']):.2f} pontos")

print(f"\n PERFORMANCE GEOGR√ÅFICA:")
print(f"   ‚Ä¢ Melhor estado: {best_state['customer_state']} (M1: {best_state['m1_retention_pct']:.2f}%)")
print(f"   ‚Ä¢ Diferen√ßa vs m√©dia: +{(best_state['m1_retention_pct'] - m1_retention_avg):.2f}%")
print(f"   ‚Ä¢ Estados analisados: {len(df_state_retention)} com volume significativo")

print(f"\n PADR√ïES DE CHURN:")
print(f"   ‚Ä¢ Churn cr√≠tico M0‚ÜíM1: {critical_churn_m0_m1:.2f}% (Janela de Oportunidade)")
print(f"   ‚Ä¢ Estabiliza√ß√£o: M3+ (reten√ß√£o estabiliza em ~{m3_retention_avg:.2f}%)")
print(f"   ‚Ä¢ Clientes fidelizados: {m6_retention_avg:.2f}% permanecem ativos em M6")

print(f"\n" + "="*80)
print(" RECOMENDA√á√ïES ESTRAT√âGICAS")
print("="*80)

print(f"\n PRIORIDADE 1: REDUZIR CHURN CR√çTICO (M0‚ÜíM1)")
print(f"   ‚Ä¢ Campanha win-back: D+7 e D+30 ap√≥s primeira compra")
print(f"   ‚Ä¢ Incentivo segunda compra: Cupom 15-20% v√°lido 30 dias")
print(f"   ‚Ä¢ Meta: Aumentar reten√ß√£o M1 para {m1_retention_avg + 2:.2f}% (+2%)")

print(f"\n PRIORIDADE 2: MARKETING AUTOMATIZADO")
print(f"   ‚Ä¢ Email marketing: D+7 (check-in), D+15 (oferta), D+30 (win-back)")
print(f"   ‚Ä¢ Segmenta√ß√£o: Foco em estados com alta reten√ß√£o")
print(f"   ‚Ä¢ Personaliza√ß√£o: Baseado em comportamento de compra")

print(f"\n PRIORIDADE 3: PROGRAMA FIDELIDADE")
print(f"   ‚Ä¢ Iniciar em M2: Clientes que superaram churn cr√≠tico")
print(f"   ‚Ä¢ Benef√≠cios: Frete gr√°tis, cashback, atendimento priorit√°rio")
print(f"   ‚Ä¢ Meta: Aumentar reten√ß√£o M6 para {m6_retention_avg + 1:.2f}% (+1%)")

print(f"\n PRIORIDADE 4: AN√ÅLISE CONT√çNUA")
print(f"   ‚Ä¢ Monitorar cohorts mensalmente")
print(f"   ‚Ä¢ A/B testing de estrat√©gias de reten√ß√£o")
print(f"   ‚Ä¢ Correlation analysis: NPS vs Reten√ß√£o")

print(f"\n" + "="*80)
print(" AN√ÅLISE DE RETEN√á√ÉO CONCLU√çDA")
print("="*80)