In [None]:
import pandas as pd

# Leer dataset desde Google Sheets
url = "https://docs.google.com/spreadsheets/d/1eiA7CPzctNmmWStDR1YZTV2ou0L32nef/export?format=csv"
df = pd.read_csv(url)

# Normalización de texto
df["Customer_Satisfaction"] = df["Customer_Satisfaction"].astype(str).str.strip().str.title()
df["Purchase_Location"] = df["Purchase_Location"].astype(str).str.strip().str.title()
df["Repeat_Customer"] = df["Repeat_Customer"].astype(str).str.strip().str.title()

# Calcular probabilidades
prob_very_satisfied = len(df[df["Customer_Satisfaction"] == "Very Satisfied"]) / len(df)
online_customers = df[df["Purchase_Location"] == "Online"]
prob_repeat_given_online = len(online_customers[online_customers["Repeat_Customer"] == "Yes"]) / len(online_customers)

# Mostrar resultados como tabla limpia
pd.Series({
    "P(Very Satisfied Customer)": prob_very_satisfied,
    "P(Repeat Customer | Purchased Online)": prob_repeat_given_online
}).to_frame(name="Probability")

Unnamed: 0,Probability
P(Very Satisfied Customer),0.2
P(Repeat Customer | Purchased Online),0.5


In [None]:
# Normalizar columnas necesarias
df["Recommend_to_Friend"] = df["Recommend_to_Friend"].astype(str).str.strip().str.title()
df["Repeat_Customer"] = df["Repeat_Customer"].astype(str).str.strip().str.title()
df["Customer_Satisfaction"] = df["Customer_Satisfaction"].astype(str).str.strip().str.title()

# Pregunta 3: P(Recommend ∩ Repeat)
prob_recommend_and_repeat = len(df[(df["Recommend_to_Friend"] == "Yes") &
                                   (df["Repeat_Customer"] == "Yes")]) / len(df)

# Pregunta 4: P(Recommend | Satisfied or Very Satisfied)
satisfied_subset = df[df["Customer_Satisfaction"].isin(["Satisfied", "Very Satisfied"])]
prob_recommend_given_satisfaction = len(satisfied_subset[satisfied_subset["Recommend_to_Friend"] == "Yes"]) / len(satisfied_subset)

# Resultados en formato tabla
pd.Series({
    "P(Recommend ∩ Repeat)": prob_recommend_and_repeat,
    "P(Recommend | Satisfied or Very Satisfied)": prob_recommend_given_satisfaction
}).to_frame(name="Probability")

Unnamed: 0,Probability
P(Recommend ∩ Repeat),0.258333
P(Recommend | Satisfied or Very Satisfied),0.571429


In [None]:
# Normalizar texto si aún no se ha hecho
df["Recommend_to_Friend"] = df["Recommend_to_Friend"].astype(str).str.strip().str.title()
df["Customer_Satisfaction"] = df["Customer_Satisfaction"].astype(str).str.strip().str.title()

# Calcular probabilidades individuales y conjunta
prob_recommend = len(df[df["Recommend_to_Friend"] == "Yes"]) / len(df)
prob_satisfied = len(df[df["Customer_Satisfaction"].isin(["Satisfied", "Very Satisfied"])]) / len(df)
prob_both = len(df[(df["Recommend_to_Friend"] == "Yes") &
                   (df["Customer_Satisfaction"].isin(["Satisfied", "Very Satisfied"]))]) / len(df)

# Evaluar independencia
is_independent = prob_both == (prob_recommend * prob_satisfied)

# Mostrar como tabla
pd.Series({
    "P(Recommend)": prob_recommend,
    "P(Satisfied)": prob_satisfied,
    "P(Both)": prob_both,
    "Are Independent?": is_independent
}).to_frame(name="Result")

Unnamed: 0,Result
P(Recommend),0.55
P(Satisfied),0.35
P(Both),0.2
Are Independent?,False
