In [None]:
from pathlib import Path
import pandas as pd


In [None]:
output_dir = Path("data") / "complete-launch"
output_dir.mkdir(exist_ok=True, parents=True)
output_dir


In [None]:
# Import relevant tables
users_df = pd.read_csv(output_dir / "users.csv")
chatroom_df = pd.read_csv(output_dir / "chatrooms.csv")
messages_df = pd.read_csv(output_dir / "messages.csv")

# Add limit_reached column to users_df from chatroom_df
users_df = users_df.merge(
    chatroom_df[["id", "limit_reached"]].rename(columns={"id": "chatroom_id"}),
    on="chatroom_id",
    how="left",
)

post_survey_chat_df = pd.read_csv(output_dir / "post_survey_chat.csv")[2:].dropna(
    subset=["ResponseId"]
)
post_survey_no_chat_df = pd.read_csv(output_dir / "post_survey_no_chat.csv")[2:].dropna(
    subset=["ResponseId"]
)

# Set of IDs in no-chat post-survey
no_chat_ids = set(post_survey_no_chat_df["RESPONDENT_ID"].tolist())

# Set of IDs in chat post-survey
chat_ids = set(post_survey_chat_df["RESPONDENT_ID"].tolist())

# Add column to users_df to indicate which post-survey they took
users_df["post_survey"] = users_df["response_id"].apply(
    lambda user_id: "chat"
    if user_id in chat_ids
    else ("no_chat" if user_id in no_chat_ids else float("nan"))
)

# Concat post_survey_chat_df and post_survey_no_chat_df
post_survey_df = pd.concat([post_survey_chat_df, post_survey_no_chat_df])

# Add difficulties_what column to users_df from post_survey_chat_df
# users_df = users_df.merge(
#     post_survey_df[["RESPONDENT_ID", "difficulties_what"]].rename(
#         columns={"RESPONDENT_ID": "response_id"}
#     ),
#     on="response_id",
#     how="left",
# )


In [None]:
# Get all users who were support vs oppose
support_users = users_df[users_df["position"] == "SUPPORT"]
oppose_users = users_df[users_df["position"] == "OPPOSE"]

support_count = len(support_users)
oppose_count = len(oppose_users)
total_users = len(users_df)

assert support_count + oppose_count == total_users
support_count, oppose_count, total_users


In [None]:
# Get all users who were matched
matched_users = users_df[users_df["treatment"].notnull()]
# Get all users who were not matched
unmatched_users = users_df[users_df["treatment"].isnull()]

matched_count = len(matched_users)
unmatched_count = len(unmatched_users)

matched_count, unmatched_count


In [None]:
# Get positions of unmatched users
unmatched_support = unmatched_users[unmatched_users["position"] == "SUPPORT"]
unmatched_oppose = unmatched_users[unmatched_users["position"] == "OPPOSE"]

unmatched_support_count = len(unmatched_support)
unmatched_oppose_count = len(unmatched_oppose)

unmatched_support_count, unmatched_oppose_count


In [None]:
# Get users who didn't provide a view
matched_users_with_view = matched_users[matched_users["view"].notnull()]
matched_users_no_view = matched_users[matched_users["view"].isnull()]

matched_users_with_view_count = len(matched_users_with_view)
matched_users_no_view_count = len(matched_users_no_view)

matched_users_with_view_count, matched_users_no_view_count


In [None]:
# Get users who joined a chatroom at least once
matched_users_joined_chatroom = matched_users_with_view[
    matched_users_with_view["started_chat_time"].notnull()
]
# Get users who never joined a chatroom
matched_users_no_chatroom = matched_users_with_view[
    matched_users_with_view["started_chat_time"].isnull()
]
# Get users who joined a chatroom but whose partner never joined
matched_users_partner_no_chatroom = matched_users_joined_chatroom[
    # This is complicated:
    # - Get chatroom for user
    # - Get other user in chatroom
    # - Check if other user is in matched_users_no_chatroom
    matched_users_joined_chatroom["chatroom_id"].apply(
        lambda chatroom_id: len(
            matched_users_no_chatroom[
                matched_users_no_chatroom["chatroom_id"] == chatroom_id
            ]
        )
    )
    > 0
]

matched_users_joined_chatroom_count = len(matched_users_joined_chatroom)
matched_users_no_chatroom_count = len(matched_users_no_chatroom)
matched_users_partner_no_chatroom_count = len(matched_users_partner_no_chatroom)

(
    matched_users_joined_chatroom_count,
    matched_users_no_chatroom_count,
    matched_users_partner_no_chatroom_count,
)


In [None]:
# Get users who joined a chatroom and whose partner joined (NOTE: this is not currently
# used in the diagram)
matched_users_joined_chatroom_with_partner = matched_users_joined_chatroom[
    # This is complicated:
    # - Get chatroom for user
    # - Get other user in chatroom
    # - Check if other user is in matched_users_joined_chatroom
    matched_users_joined_chatroom["chatroom_id"].apply(
        lambda chatroom_id: len(
            matched_users_joined_chatroom[
                matched_users_joined_chatroom["chatroom_id"] == chatroom_id
            ]
        )
        > 1
    )
]

matched_users_joined_chatroom_with_partner_count = len(
    matched_users_joined_chatroom_with_partner
)

matched_users_joined_chatroom_with_partner_count


In [None]:
# Get users who didn't join a chatroom but somehow provided a leave reason
# This should be empty
matched_users_no_chatroom_with_leave_reason = matched_users_no_chatroom[
    matched_users_no_chatroom["leave_reason"].notnull()
]
# Get users whose partner never joined a chatroom but who provided a leave reason
# This should also be empty
# TODO: See what these responses were
matched_users_partner_no_chatroom_with_leave_reason = matched_users_partner_no_chatroom[
    matched_users_partner_no_chatroom["leave_reason"].notnull()
]
matched_users_partner_no_chatroom_without_leave_reason = (
    matched_users_partner_no_chatroom[
        ~matched_users_partner_no_chatroom["leave_reason"].notnull()
    ]
)
# Get users whose partner joined a chatroom but who left early with a reason
matched_users_partner_joined_chatroom_with_leave_reason = (
    matched_users_joined_chatroom_with_partner[
        matched_users_joined_chatroom_with_partner["leave_reason"].notnull()
    ]
)

# Get chatroom IDs for users who left early
joined_chatroom_with_leave_reason_chatroom_ids = set(
    matched_users_partner_joined_chatroom_with_leave_reason["chatroom_id"]
    .dropna()
    .astype(int)
)
matched_users_partner_joined_chatroom_without_leave_reason = (
    # Leave reason is null and limit is not reached
    matched_users_joined_chatroom_with_partner[
        matched_users_joined_chatroom_with_partner["leave_reason"].isnull()
        & (matched_users_joined_chatroom_with_partner["limit_reached"] == False)
    ]
)
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason = matched_users_partner_joined_chatroom_without_leave_reason[
    # Chatroom ID is in joined_chatroom_with_leave_reason_chatroom_ids
    matched_users_partner_joined_chatroom_without_leave_reason["chatroom_id"].isin(
        joined_chatroom_with_leave_reason_chatroom_ids
    )
]
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason = matched_users_partner_joined_chatroom_without_leave_reason[
    # Chatroom ID is not in joined_chatroom_with_leave_reason_chatroom_ids
    ~matched_users_partner_joined_chatroom_without_leave_reason["chatroom_id"].isin(
        joined_chatroom_with_leave_reason_chatroom_ids
    )
]

# Get all users who left early with a reason just to verify that the above is exhaustive
matched_users_with_leave_reason = matched_users[matched_users["leave_reason"].notnull()]


matched_users_no_chatroom_with_leave_reason_count = len(
    matched_users_no_chatroom_with_leave_reason
)
matched_users_partner_no_chatroom_with_leave_reason_count = len(
    matched_users_partner_no_chatroom_with_leave_reason
)
matched_users_partner_no_chatroom_without_leave_reason_count = len(
    matched_users_partner_no_chatroom_without_leave_reason
)
matched_users_partner_joined_chatroom_with_leave_reason_count = len(
    matched_users_partner_joined_chatroom_with_leave_reason
)
matched_users_partner_joined_chatroom_without_leave_reason_count = len(
    matched_users_partner_joined_chatroom_without_leave_reason
)
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_count = len(
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason
)
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_count = len(
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason
)
matched_users_with_leave_reason_count = len(matched_users_with_leave_reason)

(
    matched_users_no_chatroom_with_leave_reason_count,
    matched_users_partner_no_chatroom_with_leave_reason_count,
    matched_users_partner_no_chatroom_without_leave_reason_count,
    matched_users_partner_joined_chatroom_with_leave_reason_count,
    matched_users_partner_joined_chatroom_without_leave_reason_count,
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_count,
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_count,
    # The last two should be the same. If they're not, then there's a bad assumption
    # here somewhere. Maybe it was Rick.
    matched_users_partner_no_chatroom_with_leave_reason_count
    + matched_users_partner_joined_chatroom_with_leave_reason_count,
    matched_users_with_leave_reason_count,
)


In [None]:
# Get users who joined a chatroom and reached limit
matched_users_joined_chatroom_reached_limit = matched_users_joined_chatroom[
    matched_users_joined_chatroom["limit_reached"] == 1
]

matched_users_joined_chatroom_reached_limit_count = len(
    matched_users_joined_chatroom_reached_limit
)

matched_users_joined_chatroom_reached_limit_count


In [None]:
# Users who finished conversations and ended up in post surveys
matched_users_joined_chatroom_reached_limit_post_survey = (
    matched_users_joined_chatroom_reached_limit[
        matched_users_joined_chatroom_reached_limit["post_survey"] == "chat"
    ]
)
# Users who left conversation early without leave reason where partner left with reason
# and ended up in post surveys
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_post_survey = matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason[
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason[
        "post_survey"
    ]
    == "chat"
]
# Users who left conversation early without leave reason where partner left without
# reason and ended up in post surveys
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_post_survey = pd.concat(
    [
        matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason[
            matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason[
                "post_survey"
            ]
            == "chat"
        ],
        # TODO: This might be a little fraught
        matched_users_partner_no_chatroom_without_leave_reason[
            matched_users_partner_no_chatroom_without_leave_reason["post_survey"]
            == "chat"
        ],
    ]
)
# Users who left conversation early with leave reason and ended up in post
# surveys
matched_users_joined_chatroom_with_leave_reason_post_survey = pd.concat(
    [
        matched_users_partner_joined_chatroom_with_leave_reason[
            matched_users_partner_joined_chatroom_with_leave_reason["post_survey"]
            == "chat"
        ],
        matched_users_partner_no_chatroom_with_leave_reason[
            matched_users_partner_no_chatroom_with_leave_reason["post_survey"] == "chat"
        ],
    ]
)
# Users who never joined a chatroom and ended up in post surveys
matched_users_no_chatroom_no_chat_post_survey = matched_users_no_chatroom[
    matched_users_no_chatroom["post_survey"] == "no_chat"
]
matched_users_no_chatroom_chat_post_survey = matched_users_no_chatroom[
    matched_users_no_chatroom["post_survey"] == "chat"
]
# Users who never provided a view and ended up in post surveys
matched_users_no_view_post_survey = matched_users_no_view[
    matched_users_no_view["post_survey"] == "no_chat"
]
# Unmatched supporters who ended up in post surveys
unmatched_support_post_survey = unmatched_support[
    unmatched_support["post_survey"] == "no_chat"
]
# Unmatched opponents who ended up in post surveys
unmatched_opponent_post_survey = unmatched_oppose[
    unmatched_oppose["post_survey"] == "no_chat"
]
# Unmatched users who ended up in CHAT post surveys (bug)
unmatched_post_survey_chat = unmatched_users[unmatched_users["post_survey"] == "chat"]


matched_users_joined_chatroom_reached_limit_post_survey_count = len(
    matched_users_joined_chatroom_reached_limit_post_survey
)
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_post_survey_count = len(
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_post_survey
)
matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_post_survey_count = len(
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_post_survey
)
matched_users_joined_chatroom_with_leave_reason_post_survey_count = len(
    matched_users_joined_chatroom_with_leave_reason_post_survey
)
matched_users_no_chatroom_no_chat_post_survey_count = len(
    matched_users_no_chatroom_no_chat_post_survey
)
matched_users_no_chatroom_chat_post_survey_count = len(
    matched_users_no_chatroom_chat_post_survey
)
matched_users_no_view_post_survey_count = len(matched_users_no_view_post_survey)
unmatched_support_post_survey_count = len(unmatched_support_post_survey)
unmatched_opponent_post_survey_count = len(unmatched_opponent_post_survey)
unmatched_post_survey_chat_count = len(unmatched_post_survey_chat)

(
    matched_users_joined_chatroom_reached_limit_post_survey_count,
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_post_survey_count,
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_post_survey_count,
    matched_users_joined_chatroom_with_leave_reason_post_survey_count,
    matched_users_no_chatroom_no_chat_post_survey_count,
    matched_users_no_chatroom_chat_post_survey_count,
    matched_users_no_view_post_survey_count,
    unmatched_support_post_survey_count,
    unmatched_opponent_post_survey_count,
    unmatched_post_survey_chat_count,
)


In [None]:
node_labels = [
    # layer 1
    "Total users",  # 0
    # 2
    "Matched",  # 1
    "Didn't match",  # 2
    # 3
    "Unmatched Supporter",  # 3
    "Unmatched Opponent",  # 4
    "Provided view",  # 5
    "Didn't provide view",  # 6
    # 4
    "Joined chatroom",  # 7
    "Never chatted",  # 8
    # 5
    "Partner never joined chatroom",  # 9
    "Joined chatroom with partner",  # 10
    # 6
    "Left with reason",  # 11
    # TODO: Actually add cases for this—should be has start time, no end time, no leave
    #  reason
    "Left without reason",  # 12
    "Partner left with reason",  # 13
    "Finished conversation",  # 14
    # 7
    "No-chat post-survey",  # 15
    "Chat post-survey",  # 16
    "Didn't take post-survey",  # 16
]

link_sources = [
    # total users -> matched users
    0,
    # total users -> unmatched users
    0,
    # unmatched users -> supporters
    2,
    # unmatched users -> opponents
    2,
    # matched users -> didn't provide view
    1,
    # matched users -> provided view
    1,
    # provided view -> joined chatroom
    5,
    # provided view -> didn't join chatroom
    5,
    # joined chatroom -> partner never joined chatroom
    7,
    # partner never joined chatroom -> left with reason
    9,
    # partner never joined chatroom -> left without reason + partner left without reason
    # (END CONDITION)
    9,
    # joined chatroom -> joined with partner
    7,
    # partner joined chatroom -> left with reason
    10,
    # partner joined chatroom -> finished conversation
    10,
    # partner joined chatroom -> left without reason + partner left with reason (END
    # CONDITION)
    10,
    # partner joined chatroom -> left without reason + partner left without reason (END
    # CONDITION)
    10,
    # left with reason -> chat post-survey
    11,
    # left without reason + partner left with reason -> chat post-survey
    13,
    # left without reason + partner left without reason -> chat post-survey
    12,
    # finished conversation -> chat post-survey
    14,
    # didn't join chatroom -> no-chat post-survey
    8,
    # didn't join chatroom -> chat post-survey (THIS SHOULDN'T HAPPEN)
    8,
    # didn't provide view -> no-chat post-survey
    6,
    # supporter -> no-chat post-survey
    3,
    # opponent -> no-chat post-survey
    4,
    # (BUG) unmatched users -> chat post-survey
    2,
]

link_targets = [
    # total users -> matched users
    1,
    # total users -> unmatched users
    2,
    # matched users -> supporters
    3,
    # matched users -> opponents
    4,
    # matched users -> provided view
    5,
    # matched users -> didn't provide view
    6,
    # provided view -> joined chatroom
    7,
    # provided view -> didn't join chatroom
    8,
    # joined chatroom -> partner never joined chatroom
    9,
    # partner never joined chatroom -> left with reason
    11,
    # partner never joined chatroom -> left without reason + partner left without reason
    # (END CONDITION)
    12,
    # joined chatroom -> joined with partner
    10,
    # partner joined chatroom -> left with reason
    11,
    # partner joined chatroom -> finished conversation
    14,
    # partner joined chatroom -> left without reason + partner left with reason (END
    # CONDITION)
    13,
    # partner joined chatroom -> left without reason + partner left without reason (END
    # CONDITION)
    12,
    # left with reason -> chat post-survey
    16,
    # left without reason + partner left with reason -> chat post-survey
    16,
    # left without reason + partner left without reason -> chat post-survey
    16,
    # finished conversation -> chat post-survey
    16,
    # didn't join chatroom -> no-chat post-survey
    15,
    # didn't join chatroom -> chat post-survey (THIS SHOULDN'T HAPPEN)
    16,
    # didn't provide view -> no-chat post-survey
    15,
    # supporter -> no-chat post-survey
    15,
    # opponent -> no-chat post-survey
    15,
    # (BUG) unmatched users -> chat post-survey
    16,
]

link_values = [
    # total users -> matched users
    matched_count,
    # total users -> unmatched users
    unmatched_count,
    # matched users -> supporters
    unmatched_support_count,
    # matched users -> opponents
    unmatched_oppose_count,
    # matched users -> provided view
    matched_users_with_view_count,
    # matched users -> didn't provide view (END CONDITION)
    matched_users_no_view_count,
    # provided view -> joined chatroom
    matched_users_joined_chatroom_count,
    # provided view -> didn't join chatroom (END CONDITION)
    matched_users_no_chatroom_count,
    # provided view -> partner didn't join chatroom (END CONDITION)
    matched_users_partner_no_chatroom_count,
    # partner never joined chatroom -> left with reason (END CONDITION)
    matched_users_partner_no_chatroom_with_leave_reason_count,
    # partner never joined chatroom -> left without reason + partner left without reason
    # (END CONDITION)
    matched_users_partner_no_chatroom_without_leave_reason_count,
    # joined chatroom -> joined with partner
    matched_users_joined_chatroom_with_partner_count,
    # partner joined chatroom -> left with reason (END CONDITION)
    matched_users_partner_joined_chatroom_with_leave_reason_count,
    # partner joined chatroom -> finished conversation (END CONDITION)
    matched_users_joined_chatroom_reached_limit_count,
    # partner joined chatroom -> left without reason + partner left with reason (END
    # CONDITION)
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_count,
    # partner joined chatroom -> left without reason + partner left without reason (END
    # CONDITION)
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_count,
    # left with reason -> chat post-survey
    matched_users_joined_chatroom_with_leave_reason_post_survey_count,
    # left without reason + partner left with reason -> chat post-survey
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_with_reason_post_survey_count,
    # left without reason + partner left without reason -> chat post-survey
    matched_users_partner_joined_chatroom_without_leave_reason_partner_left_without_reason_post_survey_count,
    # finished conversation -> chat post-survey
    matched_users_joined_chatroom_reached_limit_post_survey_count,
    # didn't join chatroom -> no-chat post-survey
    matched_users_no_chatroom_no_chat_post_survey_count,
    # didn't join chatroom -> chat post-survey (THIS SHOULDN'T HAPPEN)
    matched_users_no_chatroom_chat_post_survey_count,
    # didn't provide view -> no-chat post-survey
    matched_users_no_view_post_survey_count,
    # supporter -> no-chat post-survey
    unmatched_support_post_survey_count,
    # opponent -> no-chat post-survey
    unmatched_opponent_post_survey_count,
    # (BUG) unmatched users -> chat post-survey
    unmatched_post_survey_chat_count,
]


In [None]:
import plotly.graph_objects as go

attrition_figure = go.Figure(
    data=[
        go.Sankey(
            # Format everything as integers
            valueformat=".0f",
            # Define nodes
            node=dict(
                pad=15,
                thickness=15,
                line=dict(color="black", width=0.5),
                label=node_labels,
            ),
            # Add links
            link=dict(
                source=link_sources,
                target=link_targets,
                value=link_values,
            ),
        )
    ]
)

attrition_figure.update_layout(
    title_text="Depolarizing Chatroom Launch Attrition", font_size=12
)


In [None]:
# Print link values
for i in range(len(link_sources)):
    print(
        f"{node_labels[link_sources[i]]} -> {node_labels[link_targets[i]]}: {link_values[i]}"
    )
