# üìö Instalaci√≥n e Importaci√≥n de Librer√≠as

In [20]:
# Importar librer√≠as
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Librer√≠as para an√°lisis estad√≠stico
from scipy import stats
from scipy.stats import ttest_ind, f_oneway, chi2_contingency

# Librer√≠as para machine learning (segmentaci√≥n)
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Configuraci√≥n de estilo
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 12

# Configuraci√≥n para pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.2f}'.format)

print("‚úÖ Librer√≠as importadas correctamente")

‚úÖ Librer√≠as importadas correctamente


# üìÇ Carga Y Exploraci√≥n Inicial del Dataset

In [21]:
# Cargar dataset desde GitHub
dataset_url = "https://raw.githubusercontent.com/ChurnGuard/ChurnInsight/refs/heads/main/data-science/data/dataset_analyst_by_customer.csv"

try:
    df = pd.read_csv(dataset_url)
    print("‚úÖ Dataset cargado desde GitHub")
except:
    from google.colab import files

    # Si falla, abrir un cuadro para subir archivos
    uploaded = files.upload()

    for nombre_archivo in uploaded.keys():
        df = pd.read_csv(nombre_archivo)

    print("‚úÖ Dataset cargado desde archivo local")

‚úÖ Dataset cargado desde GitHub


In [22]:
# Vista Previa de los Datos
print("\nüîç PRIMERAS 5 FILAS:")
display(df.head())

print("\nüìã INFORMACI√ìN DE COLUMNAS:")
print(df.info())

print("\nüìà ESTAD√çSTICAS DESCRIPTIVAS:")
display(df.describe())

# Verificar valores √∫nicos por columna categ√≥rica
print("\nüéØ VALORES √öNICOS EN COLUMNAS CATEG√ìRICAS:")
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    print(f"{col}: {df[col].nunique()} valores √∫nicos")
    if df[col].nunique() < 20:
        print(f"  ‚Üí {df[col].unique()}")

# Mostrar informaci√≥n b√°sica del dataset
print("\nüìä INFORMACI√ìN DEL DATASET:")
print(f"‚Ä¢ Filas: {df.shape[0]}")
print(f"‚Ä¢ Columnas: {df.shape[1]}")
print(f"‚Ä¢ Variables demogr√°ficas: {[col for col in df.columns if col in ['age', 'gender', 'income_bracket', 'education_level', 'occupation', 'marital_status']]}")
print(f"‚Ä¢ Variables de comportamiento: {[col for col in df.columns if 'sales' in col.lower() or 'purchase' in col.lower() or 'frequency' in col.lower()]}")
print(f"‚Ä¢ Variables de churn/promociones: {[col for col in df.columns if col in ['churn', 'promo_flag', 'promotion_type', 'loyalty_program']]}")


üîç PRIMERAS 5 FILAS:


Unnamed: 0,customer_id,age,membership_years,number_of_children,quantity,unit_price,avg_purchase_value,purchase_frequency,avg_discount_used,online_purchases,in_store_purchases,total_sales,total_transactions,total_items_purchased,promotion_effectiveness,days_since_last_purchase,gender,income_bracket,marital_status,education_level,occupation,product_category,promotion_type,transaction_date,last_purchase_date,loyalty_program,churn,promo_flag
0,C1000,33,1,2,3.12,54.51,167.76,72.0,0.31,173,57,12078.86,3023,4184,0.54,924,Other,Medium,Divorced,Master's,Retired,Electronics,no_promotion,2023-06-23,2023-06-23,1,0,1
1,C1001,52,8,3,2.76,50.96,144.7,9.0,0.22,137,70,10418.14,3671,7395,0.62,916,Female,Medium,Divorced,Master's,Unemployed,Home,no_promotion,2023-07-01,2023-07-01,1,0,1
2,C1002,67,10,4,2.97,47.2,143.7,7.2,0.35,170,54,10346.25,3023,1867,0.65,915,Female,High,Divorced,Bachelor's,Student,Home,BOGO,2023-07-02,2023-07-02,1,1,1
3,C1003,32,4,3,3.32,51.35,173.64,18.0,0.3,183,64,12502.3,2303,2178,0.6,918,Female,Medium,Single,Bachelor's,Employed,Books,no_promotion,2023-06-29,2023-06-29,1,0,1
4,C1004,34,10,4,3.26,47.94,157.12,7.2,0.34,160,76,11312.85,2087,1455,0.64,923,Male,High,Single,Bachelor's,Self-Employed,Electronics,Discount,2023-06-24,2023-06-24,1,0,1



üìã INFORMACI√ìN DE COLUMNAS:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 28 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   customer_id               5000 non-null   object 
 1   age                       5000 non-null   int64  
 2   membership_years          5000 non-null   int64  
 3   number_of_children        5000 non-null   int64  
 4   quantity                  5000 non-null   float64
 5   unit_price                5000 non-null   float64
 6   avg_purchase_value        5000 non-null   float64
 7   purchase_frequency        5000 non-null   float64
 8   avg_discount_used         5000 non-null   float64
 9   online_purchases          5000 non-null   int64  
 10  in_store_purchases        5000 non-null   int64  
 11  total_sales               5000 non-null   float64
 12  total_transactions        5000 non-null   int64  
 13  total_items_purchased     5000 

Unnamed: 0,age,membership_years,number_of_children,quantity,unit_price,avg_purchase_value,purchase_frequency,avg_discount_used,online_purchases,in_store_purchases,total_sales,total_transactions,total_items_purchased,promotion_effectiveness,days_since_last_purchase,loyalty_program,churn,promo_flag
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,44.27,7.68,2.11,2.57,50.15,129.02,17.99,2.09,187.67,114.09,9226.58,2094.7,4103.72,0.96,432.99,0.53,0.28,1.0
std,14.93,4.44,1.42,0.19,2.4,11.91,19.59,4.12,167.33,170.39,859.66,969.65,1888.4,0.12,106.02,0.5,0.45,0.0
min,18.0,1.0,0.0,2.1,40.32,96.17,4.73,0.0,0.0,0.0,6889.13,355.0,710.0,0.42,367.0,0.0,0.0,1.0
25%,32.0,4.0,1.0,2.46,48.52,121.25,6.45,0.1,71.0,0.0,8664.4,1296.0,2376.0,1.0,369.0,0.0,0.0,1.0
50%,45.0,8.0,2.0,2.54,50.18,127.74,9.0,0.21,144.0,0.0,9123.35,2088.0,4189.0,1.0,374.0,1.0,0.0,1.0
75%,57.0,11.0,3.0,2.64,51.75,134.61,18.0,0.3,284.0,144.0,9632.51,2982.0,5616.0,1.0,490.0,1.0,1.0,1.0
max,70.0,15.0,4.0,3.56,58.02,190.01,72.0,14.91,720.0,720.0,13680.65,3672.0,7488.0,1.0,927.0,1.0,1.0,1.0



üéØ VALORES √öNICOS EN COLUMNAS CATEG√ìRICAS:
customer_id: 5000 valores √∫nicos
gender: 3 valores √∫nicos
  ‚Üí ['Other' 'Female' 'Male']
income_bracket: 3 valores √∫nicos
  ‚Üí ['Medium' 'High' 'Low']
marital_status: 3 valores √∫nicos
  ‚Üí ['Divorced' 'Single' 'Married']
education_level: 4 valores √∫nicos
  ‚Üí ["Master's" "Bachelor's" 'High School' 'PhD']
occupation: 5 valores √∫nicos
  ‚Üí ['Retired' 'Unemployed' 'Student' 'Employed' 'Self-Employed']
product_category: 9 valores √∫nicos
  ‚Üí ['Electronics' 'Home' 'Books' 'Beauty' 'Clothing' 'Sports' 'Home Goods'
 'Groceries' 'Toys']
promotion_type: 6 valores √∫nicos
  ‚Üí ['no_promotion' 'BOGO' 'Discount' 'Seasonal Discount' '20% Off'
 'Buy One Get One Free']
transaction_date: 378 valores √∫nicos
last_purchase_date: 378 valores √∫nicos

üìä INFORMACI√ìN DEL DATASET:
‚Ä¢ Filas: 5000
‚Ä¢ Columnas: 28
‚Ä¢ Variables demogr√°ficas: ['age', 'gender', 'income_bracket', 'marital_status', 'education_level', 'occupation']
‚Ä¢ Variables de

# üë• Issue 1: An√°lisis de Perfil de Clientes por Demograf√≠a

## ‚öô Configuraci√≥n inicial

In [23]:
# Crear copia del dataframe para an√°lisis demogr√°fico
df_demo = df.copy()

# Definir variables demogr√°ficas
demographic_vars = ['age', 'gender', 'income_bracket', 'education_level', 'occupation', 'marital_status']
behavioral_vars = ['total_sales', 'avg_purchase_value', 'purchase_frequency', 'total_transactions', 'total_items_purchased']

# Crear grupos de edad
df_demo['age_group'] = pd.cut(df_demo['age'],
                              bins=[18, 25, 35, 50, 65, 100],
                              labels=['18-25', '26-35', '36-50', '51-65', '65+'])

# Ordenar variables categ√≥ricas
if 'income_bracket' in df_demo.columns:
    income_order = ['Low', 'Medium', 'High']
    df_demo['income_bracket'] = pd.Categorical(df_demo['income_bracket'],
                                               categories=income_order, ordered=True)

if 'education_level' in df_demo.columns:
    edu_order = ['High School', 'Bachelor\'s', 'Master\'s', 'PhD']
    df_demo['education_level_ordered'] = pd.Categorical(df_demo['education_level'],
                                                       categories=edu_order, ordered=True)

## üë™ An√°lisis Descriptivo Demogr√°fico

In [24]:
print("="*80)
print("ISSUE 1: AN√ÅLISIS DE PERFIL DE CLIENTES POR DEMOGRAF√çA")
print("="*80)
print("\nüìä DISTRIBUCI√ìN DEMOGR√ÅFICA:")

# Distribuci√≥n por g√©nero
if 'gender' in df_demo.columns:
    print(f"\nüë• DISTRIBUCI√ìN POR G√âNERO:")
    gender_dist = df_demo['gender'].value_counts(normalize=True) * 100
    display(gender_dist.round(2))

# Distribuci√≥n por grupo de edad
print(f"\nüìÖ DISTRIBUCI√ìN POR GRUPO DE EDAD:")
age_dist = df_demo['age_group'].value_counts(normalize=True) * 100
display(age_dist.round(2))

# Distribuci√≥n por nivel de ingresos
if 'income_bracket' in df_demo.columns:
    print(f"\nüí∞ DISTRIBUCI√ìN POR NIVEL DE INGRESOS:")
    income_dist = df_demo['income_bracket'].value_counts(normalize=True) * 100
    display(income_dist.round(2))

ISSUE 1: AN√ÅLISIS DE PERFIL DE CLIENTES POR DEMOGRAF√çA

üìä DISTRIBUCI√ìN DEMOGR√ÅFICA:

üë• DISTRIBUCI√ìN POR G√âNERO:


Unnamed: 0_level_0,proportion
gender,Unnamed: 1_level_1
Male,34.0
Other,33.28
Female,32.72



üìÖ DISTRIBUCI√ìN POR GRUPO DE EDAD:


Unnamed: 0_level_0,proportion
age_group,Unnamed: 1_level_1
36-50,30.36
51-65,26.87
26-35,18.79
18-25,13.49
65+,10.49



üí∞ DISTRIBUCI√ìN POR NIVEL DE INGRESOS:


Unnamed: 0_level_0,proportion
income_bracket,Unnamed: 1_level_1
High,38.72
Low,31.68
Medium,29.6


## üìâ An√°lisis de Comportamiento por Variables Demogr√°ficas

In [25]:
# Funci√≥n para an√°lisis por segmento demogr√°fico
def analyze_demographic_segment(df, segment_var, metric_var='total_sales'):
    """
    Analiza una m√©trica por segmento demogr√°fico
    """
    analysis = df.groupby(segment_var).agg({
        metric_var: ['count', 'mean', 'median', 'std', 'min', 'max'],
        'customer_id': 'count'
    }).round(2)

    # Calcular percentiles
    percentiles = df.groupby(segment_var)[metric_var].agg(
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75)
    ).round(2)

    analysis['Q1'] = percentiles['Q1']
    analysis['Q3'] = percentiles['Q3']

    return analysis

# An√°lisis por grupo de edad
print("\nüìà COMPORTAMIENTO POR GRUPO DE EDAD:")
age_analysis = df_demo.groupby('age_group').agg({
    'total_sales': ['mean', 'median', 'std'],
    'avg_purchase_value': 'mean',
    'purchase_frequency': 'mean',
    'total_transactions': 'mean',
    'customer_id': 'count'
}).round(2)

display(age_analysis)


# An√°lisis por nivel de ingresos
if 'income_bracket' in df_demo.columns:
    print("\nüìà COMPORTAMIENTO POR NIVEL DE INGRESOS:")
    income_analysis = df_demo.groupby('income_bracket').agg({
        'total_sales': ['mean', 'median', 'std'],
        'avg_purchase_value': 'mean',
        'purchase_frequency': 'mean',
        'customer_id': 'count'
    }).round(2)

    display(income_analysis)

# An√°lisis por nivel educativo
if 'education_level' in df_demo.columns:
    print("\nüìà COMPORTAMIENTO POR NIVEL EDUCATIVO:")
    edu_analysis = df_demo.groupby('education_level').agg({
        'total_sales': ['mean', 'median'],
        'avg_purchase_value': 'mean',
        'age': 'mean',
        'customer_id': 'count'
    }).sort_values(('total_sales', 'mean'), ascending=False).round(2)

    display(edu_analysis)


üìà COMPORTAMIENTO POR GRUPO DE EDAD:


Unnamed: 0_level_0,total_sales,total_sales,total_sales,avg_purchase_value,purchase_frequency,total_transactions,customer_id
Unnamed: 0_level_1,mean,median,std,mean,mean,mean,count
age_group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
18-25,9220.17,9163.79,857.54,128.87,22.02,2095.82,665
26-35,9241.56,9131.57,855.23,129.23,15.76,2183.45,926
36-50,9224.21,9123.72,854.18,128.97,13.64,2084.33,1496
51-65,9225.43,9097.65,874.04,129.01,20.11,2069.38,1324
65+,9221.25,9100.1,858.94,129.0,23.48,1995.14,517



üìà COMPORTAMIENTO POR NIVEL DE INGRESOS:


Unnamed: 0_level_0,total_sales,total_sales,total_sales,avg_purchase_value,purchase_frequency,customer_id
Unnamed: 0_level_1,mean,median,std,mean,mean,count
income_bracket,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Low,9220.41,9106.76,863.67,128.93,17.57,1584
Medium,9206.31,9110.44,858.02,128.75,20.9,1480
High,9247.13,9141.3,857.63,129.29,16.1,1936



üìà COMPORTAMIENTO POR NIVEL EDUCATIVO:


Unnamed: 0_level_0,total_sales,total_sales,avg_purchase_value,age,customer_id
Unnamed: 0_level_1,mean,median,mean,mean,count
education_level,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
PhD,9233.08,9139.54,129.12,45.84,1273
Master's,9229.23,9137.91,129.06,44.92,1342
Bachelor's,9228.7,9116.02,129.02,43.91,1283
High School,9213.37,9095.5,128.84,42.07,1102


## üìä Visualizaciones

### Distribuci√≥n de edades

In [26]:
fig1 = px.histogram(df_demo, x='age', nbins=30,
                    title='Distribuci√≥n de Edades de Clientes',
                    labels={'age': 'Edad', 'count': 'N√∫mero de Clientes'},
                    color_discrete_sequence=['#636EFA'])
fig1.update_layout(bargap=0.1)
fig1.show()

### Ventas promedio por grupo de edad

In [27]:
age_sales = df_demo.groupby('age_group')['total_sales'].mean().reset_index()
fig2 = px.bar(age_sales, x='age_group', y='total_sales',
              title='Ventas Promedio por Grupo de Edad',
              labels={'age_group': 'Grupo de Edad', 'total_sales': 'Ventas Promedio ($)'},
              color='total_sales', color_continuous_scale='Emrld')
fig2.show()

### Diagrama de dispersi√≥n: Edad vs Ventas

In [28]:
fig3 = px.scatter(df_demo.sample(min(500, len(df_demo)), random_state=42),
                  x='age', y='total_sales',
                  color='income_bracket' if 'income_bracket' in df_demo.columns else None,
                  size='avg_purchase_value',
                  hover_data=['gender', 'education_level'],
                  title='Relaci√≥n entre Edad, Ventas e Ingresos',
                  labels={'age': 'Edad', 'total_sales': 'Ventas Totales ($)'})
fig3.show()

### Heatmap: Edad vs Ingresos

In [29]:
if all(col in df_demo.columns for col in ['age_group', 'income_bracket']):
    pivot_table = df_demo.pivot_table(values='total_sales',
                                      index='age_group',
                                      columns='income_bracket',
                                      aggfunc='mean')

    fig4 = px.imshow(pivot_table,
                    labels=dict(x="Nivel de Ingresos", y="Grupo de Edad", color="Ventas Promedio ($)"),
                    x=pivot_table.columns,
                    y=pivot_table.index,
                    title='Ventas Promedio: Edad vs Ingresos',
                    color_continuous_scale='Emrld',
                    aspect="auto")
    fig4.update_xaxes(side="top")
    fig4.show()

### Boxplot por ocupaci√≥n

In [30]:
if 'occupation' in df_demo.columns:
    top_occupations = df_demo['occupation'].value_counts().nlargest(6).index
    df_top_occ = df_demo[df_demo['occupation'].isin(top_occupations)]

    fig5 = px.box(df_top_occ, x='occupation', y='total_sales',
                  title='Distribuci√≥n de Ventas por Ocupaci√≥n',
                  labels={'occupation': 'Ocupaci√≥n', 'total_sales': 'Ventas Totales ($)'},
                  color='occupation')
    fig5.update_layout(xaxis_tickangle=-45)
    fig5.show()

## üî¨ An√°lisis Estad√≠stico

In [31]:
print("\nüî¨ AN√ÅLISIS ESTAD√çSTICO:")

# ANOVA: Diferencias en ventas por nivel de ingresos
if 'income_bracket' in df_demo.columns:
    print("\nüìä PRUEBA ANOVA - Ventas por Nivel de Ingresos:")
    income_groups = [df_demo[df_demo['income_bracket'] == level]['total_sales']
                    for level in income_order if level in df_demo['income_bracket'].unique()]

    if len(income_groups) >= 2:
        f_stat, p_value = f_oneway(*income_groups)
        print(f"  F-statistic: {f_stat:.3f}")
        print(f"  p-value: {p_value:.4f}")

        if p_value < 0.05:
            print("  ‚úÖ CONCLUSI√ìN: Hay diferencias significativas en ventas entre niveles de ingresos (p < 0.05)")
            # Prueba post-hoc (Tukey)
            from statsmodels.stats.multicomp import pairwise_tukeyhsd
            tukey = pairwise_tukeyhsd(endog=df_demo['total_sales'],
                                     groups=df_demo['income_bracket'],
                                     alpha=0.05)
            print("\n  üìã RESULTADOS TUKEY (post-hoc):")
            print(tukey)
        else:
            print("  ‚ùå CONCLUSI√ìN: No hay diferencias significativas en ventas entre niveles de ingresos")

# Correlaci√≥n edad-ventas
print(f"\nüìä CORRELACI√ìN EDAD-VENTAS:")
corr_age_sales = df_demo['age'].corr(df_demo['total_sales'])
print(f"  Coeficiente de correlaci√≥n: {corr_age_sales:.3f}")

if abs(corr_age_sales) > 0.3:
    strength = "fuerte" if abs(corr_age_sales) > 0.5 else "moderada"
    direction = "positiva" if corr_age_sales > 0 else "negativa"
    print(f"  ‚úÖ Existe una correlaci√≥n {direction} {strength} entre edad y ventas")
elif abs(corr_age_sales) > 0.1:
    print(f"  ‚ö†Ô∏è Existe una correlaci√≥n d√©bil entre edad y ventas")
else:
    print(f"  ‚ùå No existe correlaci√≥n significativa entre edad y ventas")


üî¨ AN√ÅLISIS ESTAD√çSTICO:

üìä PRUEBA ANOVA - Ventas por Nivel de Ingresos:
  F-statistic: 1.005
  p-value: 0.3660
  ‚ùå CONCLUSI√ìN: No hay diferencias significativas en ventas entre niveles de ingresos

üìä CORRELACI√ìN EDAD-VENTAS:
  Coeficiente de correlaci√≥n: -0.001
  ‚ùå No existe correlaci√≥n significativa entre edad y ventas


## üí° Insights

In [32]:
# Calcular insights
insights_demo = []

### Grupo de edad m√°s valioso

In [33]:
top_age_group = df_demo.groupby('age_group')['total_sales'].mean().idxmax()
top_age_sales = df_demo.groupby('age_group')['total_sales'].mean().max()
insights_demo.append(f"üéØ **Grupo de edad m√°s valioso**: {top_age_group} (${top_age_sales:,.0f} de ventas promedio)")

### Nivel de ingresos vs frecuencia de compra

In [34]:
if 'income_bracket' in df_demo.columns:
    top_income_freq = df_demo.groupby('income_bracket')['purchase_frequency'].mean().idxmax()
    insights_demo.append(f"üí∞ **Mayor frecuencia de compra**: Nivel de ingresos '{top_income_freq}'")

### Educaci√≥n vs valor de compra

In [35]:
if 'education_level' in df_demo.columns:
    top_edu_value = df_demo.groupby('education_level')['avg_purchase_value'].mean().idxmax()
    insights_demo.append(f"üéì **Mayor valor de compra**: Nivel educativo '{top_edu_value}'")

### Ocupaci√≥n m√°s rentable

In [36]:
if 'occupation' in df_demo.columns:
    top_occupation = df_demo.groupby('occupation')['total_sales'].mean().idxmax()
    insights_demo.append(f"üíº **Ocupaci√≥n m√°s rentable**: '{top_occupation}'")

### G√©nero vs comportamiento

In [37]:
if 'gender' in df_demo.columns:
    gender_analysis = df_demo.groupby('gender')['total_sales'].mean()
    if len(gender_analysis) > 1:
        max_gender = gender_analysis.idxmax()
        min_gender = gender_analysis.idxmin()
        ratio = gender_analysis[max_gender] / gender_analysis[min_gender]
        insights_demo.append(f"üë• **Diferencia por g√©nero**: '{max_gender}' gasta {ratio:.1f}x m√°s que '{min_gender}'")

### Resumen de Insights

In [38]:
print("\n" + "="*80)
print("üí° INSIGHTS ")
print("="*80)
print("\nüìå **INSIGHTS CLAVE:**")
for i, insight in enumerate(insights_demo, 1):
    print(f"{i}. {insight}")


üí° INSIGHTS 

üìå **INSIGHTS CLAVE:**
1. üéØ **Grupo de edad m√°s valioso**: 26-35 ($9,242 de ventas promedio)
2. üí∞ **Mayor frecuencia de compra**: Nivel de ingresos 'Medium'
3. üéì **Mayor valor de compra**: Nivel educativo 'PhD'
4. üíº **Ocupaci√≥n m√°s rentable**: 'Employed'
5. üë• **Diferencia por g√©nero**: 'Other' gasta 1.0x m√°s que 'Male'
