## Hatemongers ride on echo chambers to escalate hate speech diffusion

In [None]:
# Imports

import pandas as pd
import numpy as np
import davidson_model
import fountana_model
import waseem_model

from tqdm import tqdm
import seaborn as sns
sns.set()

from collections import Counter
import pickle
from copy import deepcopy
from dateutil import parser

from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt
tqdm.pandas()

In [None]:
from ekphrasis.classes.preprocessor import TextPreProcessor
from ekphrasis.classes.tokenizer import SocialTokenizer
from ekphrasis.dicts.emoticons import emoticons

text_processor = TextPreProcessor(
    normalize=['url', 'email', 'percent', 'money', 'phone',
        'time', 'url', 'date', 'number'],
    annotate={"hashtag", "elongated", "repeated",
        'emphasis', 'censored'},
    fix_html=True,
    segmenter="twitter",
    corrector="twitter", 
    unpack_hashtags=True,
    unpack_contractions=True,
    spell_correct_elong=False,
    tokenizer=SocialTokenizer(lowercase=True).tokenize,
    dicts=[emoticons]
)

### Dataframe specifications

Our code works on any social media platform, given the data is modelled into a dataframe as specified below, where each row of the dataframe refers to a post:

1. `id` -> ID of the post
2. `content` -> Original content of the post
3. `content_ek` -> The preprocessed textual content for the post
4. `created_at` -> Time of posting of the post
5. `parent` -> The ID of the post if this post is a retweet/comment/reblog to another post else None
6. `account_id` -> ID of the user who posted this post

In [None]:
df['content_ek'] = df.content.progress_apply(lambda x: " ".join(text_processor.pre_process_doc(x)))

In [None]:
davidson_scores = []

for i in tqdm(range(0, len(df), 1000)):
    davidson_scores.append(davidson_model.score_set(df.content_ek.values[i:i+1000], only_hate = True))

In [None]:
waseem_scores = []

for i in tqdm(range(0, len(df), 1000)):
    waseem_scores.append(waseem_model.score_set(df.content_ek.values[i:i+1000], only_hate = True))

In [None]:
founta_scores = []

for i in tqdm(range(0, len(df), 500)):
    founta_scores.append(fountana_model.score_set(df.content_ek.values[i:i+500], only_hate = True))

In [None]:
davidson_score_list = []
waseem_score_list = []
founta_score_list = []

for i in davidson_scores:
    for j in i:
        davidson_score_list.append(j)
        
for i in waseem_scores:
    for j in i:
        waseem_score_list.append(j)

for i in founta_scores:
    for j in i:
        founta_score_list.append(j)

In [None]:
df['hate_dson'] = davidson_score_list
df['hate_waseem'] = waseem_score_list
df['hate_founta'] = founta_score_list

In [None]:
df_dropped = df.drop(['h_score_davidson', 'h_type', 'mentions', 'tags', 'emojis', 'in_reply_to_id', 'in_reply_to_account_id', 'replies_count', 'reblogs_count'], axis = 1)

In [None]:
thresh = [0.5, 0.5, 0.5]

In [None]:
df_dropped['d_bin'] = np.array(np.array(davidson_score_list) >= thresh[0], dtype = 'int')
df_dropped['w_bin'] = np.array(np.array(waseem_score_list) >= thresh[1], dtype = 'int')
df_dropped['f_bin'] = np.array(np.array(founta_score_list) >= thresh[2], dtype = 'int')

In [None]:
df_dropped['cumm_hate'] = df_dropped.d_bin + df_dropped.w_bin + df_dropped.f_bin

In [None]:
df_dropped['cumm_hate'] = df_dropped['cumm_hate'].replace(3, 2)

In [None]:
df_dropped.cumm_hate.plot.hist()

In [None]:
df_dropped['avg_score'] = (df_dropped.hate_dson + df_dropped.hate_waseem + df_dropped.hate_founta)/3

In [None]:
df_dropped.boxplot(column = ['hate_dson', 'hate_waseem', 'hate_founta', 'avg_score'])

In [None]:
df_dropped['hate'] = np.array(df_dropped.hate_dson > 0.4 ,dtype='uint')

In [None]:
no_hate = np.where(df_dropped.hate_dson < 0.3)

In [None]:
mid_hate = np.where((df_dropped.hate_dson >= 0.3) & (df_dropped.hate_dson < 0.6))

In [None]:
high_hate = np.where(df_dropped.hate_dson >= 0.6)

In [None]:
hate_preds = np.zeros(len(df_dropped))

In [None]:
hate_preds[no_hate] = 0

In [None]:
hate_preds[mid_hate] = 1

In [None]:
hate_preds[high_hate] = 2

In [None]:
hate_dict = dict(Counter(list(hate_preds)))

In [None]:
sns.displot(df_dropped[['cumm_hate']], x='cumm_hate')

In [None]:
sns.kdeplot(df_dropped['cumm_hate'], shade=True)

### Extracting cascades

In [None]:
import networkx as nx

In [None]:
casc_graph = nx.Graph()

In [None]:
for i in tqdm(df_dropped.iloc):
    casc_graph.add_node(i.id, hate_score = [i.hate_dson, i.hate_waseem, i.hate_founta, i.avg_score], hate_bin = [i.d_bin, i.w_bin, i.f_bin], cumm_hate = i.cumm_hate, hate = i.hate, time = parser.parse(i.created_at), parent = pd.isna(float(i.parent)), cumm_hate_2 = 0 if i.hate_dson < 0.2 else 1 if i.hate_dson < 0.5 else 2)

In [None]:
for i in tqdm(df_dropped.iloc):
    if not pd.isna(float(i.parent)):
        casc_graph.add_edge(i.id, i.parent)

In [None]:
cascs = [i for i in nx.connected_components(casc_graph) if len(i) >= 3]

In [None]:
cascs_sg = [casc_graph.subgraph(i) for i in cascs]

In [None]:
# Cascade subgraphs

CAS = {}
for i in cascs_sg:
    for node in i.nodes:
        if i.nodes[node].get('parent', False):
            CAS[node] = i
            break

In [None]:
hate_scores_0 = []
hate_scores_1 = []
hate_scores_2 = []

for i in CAS:
    source_hate = CAS[i].nodes[i].get('cumm_hate_2', -1)
    if source_hate != -1:
        for j in CAS[i].nodes:
            if i!=j:
                if source_hate == 0:
                    hate_scores_0.append(CAS[i].nodes[j].get('hate', 1))
                elif source_hate == 1:
                    hate_scores_1.append(CAS[i].nodes[j].get('hate', 1))
                else:
                    hate_scores_2.append(CAS[i].nodes[j].get('hate', 1))

In [None]:
interaction_fracs = [[0,0,0], [1,0,0], [2,0,0]]
interaction_fracs[0][1] = hate_scores_0.count(0)/(hate_scores_0.count(0) + hate_scores_0.count(1))
interaction_fracs[0][2] = hate_scores_0.count(1)/(hate_scores_0.count(0) + hate_scores_0.count(1))
interaction_fracs[1][1] = hate_scores_1.count(0)/(hate_scores_1.count(0) + hate_scores_1.count(1))
interaction_fracs[1][2] = hate_scores_1.count(1)/(hate_scores_1.count(0) + hate_scores_1.count(1))
interaction_fracs[2][1] = hate_scores_2.count(0)/(hate_scores_2.count(0) + hate_scores_2.count(1))
interaction_fracs[2][2] = hate_scores_2.count(1)/(hate_scores_2.count(0) + hate_scores_2.count(1))

In [None]:
plotting_df = pd.DataFrame(arr, columns=['source', 'non-hate', 'hate'])

#### Plotting fraction of interactions grouped by typee of source post 

In [None]:
X = ['low', 'medium', 'high']
non_hate = plotting_df['non-hate'].values
hate = plotting_df['hate'].values

X_axis = np.arange(len(X))
  
plt.bar(X_axis - 0.2, non_hate, 0.4, label = 'non-hate')
plt.bar(X_axis + 0.2, hate, 0.4, label = 'hate')
  
plt.xticks(X_axis, X)
plt.xlabel("Source type")
plt.ylabel("Fraction of interactions")
plt.title("Hate attracted by type of source")
plt.legend()
plt.show()

In [None]:
X = ['low', 'medium', 'high']
total = plotting_df['non-hate'].values/80740 + plotting_df['hate'].values/80740
  
X_axis = np.arange(len(X))
  
plt.bar(X_axis - 0.2, total, 0.4)
plt.bar(X_axis + 0.2, hate, 0.4, label = 'hate')
  
plt.xticks(X_axis, X)
plt.xlabel("Source type")
plt.ylabel("Fraction of interactions")
plt.legend()
plt.show()

In [None]:
df_dropped['cumm_hate_2'] = df_dropped['hate_dson'].progress_apply(lambda x: 0 if x < 0.2 else 1 if x < 0.5 else 2)

In [None]:
post_author = {}

for i in tqdm(df_dropped.iloc):
    post_author[i.id] = i.account_id

In [None]:
author_hate = {}

for i in tqdm(df_dropped.iloc):
    author_hate[i.account_id] = author_hate.get(i.account_id, []) + [i.cumm_hate_2]

In [None]:
G = nx.Graph()

In [None]:
for i in tqdm(df_dropped.iloc):
    G.add_node(post_author[i.id])

In [None]:
for i in tqdm(df_dropped.iloc):
    if not pd.isna(float(i.parent)) and post_author.get(i.parent, False):
        G.add_edge(post_author[i.id], post_author[i.parent])

In [None]:
G.remove_edges_from(nx.selfloop_edges(G))

In [None]:
G = G.subgraph(max(nx.connected_components(G), key=len))

In [None]:
author_core = nx.core_number(G)

In [None]:
author_hate_classify = {}
for a in author_hate:
    hate_counts = dict(Counter(author_hate[a]))
    hateful = hate_counts.get(1, 0) + hate_counts.get(2, 0)
    if hateful >= 4:
        author_hate_classify[a] = 2
    elif hateful >= 2 and hateful < 4:
        author_hate_classify[a] = 1
    else:
        author_hate_classify[a] = 0

In [None]:
core_hate = {}
for a in author_core:
    c = author_core[a]
    core_hate[c] = core_hate.get(c, [0, 0, 0])
    core_hate[c][author_hate_classify[a]] += 1

In [None]:
def plot_hate_core_plots():
    sns.set_style("white")
    sns.set_style("ticks")
    plt.figure(figsize=(10, 6), dpi=80)
    sns.despine(top=True, right=True)
    L = len(core_hate.keys()) - 1
    non_hate  = [0]*L
    med_hate  = [0]*L
    high_hate = [0]*L
    combinedCount = 3
    iterator = 0
    for c in range(1, L+1, combinedCount):
        total = 0
        for k in range(combinedCount):
            total += sum(core_hate.get(c+k, [0]))

        for k in range(combinedCount):
            non_hate[iterator] += core_hate.get(c+k, [0, 0, 0])[0]
            med_hate[iterator] += core_hate.get(c+k, [0, 0, 0])[1]
            high_hate[iterator] += core_hate.get(c+k, [0, 0, 0])[2]
        iterator += 1
    X_axis = np.arange(iterator)
    width = 0.3
    non_hate = non_hate[:iterator]
    med_hate = med_hate[:iterator]
    high_hate = high_hate[:iterator]
    plt.bar(X_axis - width, non_hate, width, label = 'Non hate')
    plt.bar(X_axis, med_hate, width, label='Medium hate')
    plt.bar(X_axis + width, high_hate, width, label = 'High hate')
    plt.xticks(X_axis, range(1, iterator+1))
    plt.xlabel("Core")
    plt.ylabel("Fraction of users in core")
    plt.title("Hateful users in core decomposition of reddit network")
    plt.legend()
    plt.show()

In [None]:
plot_hate_core_plots()

#### Plotting core decomposition and distribution of type of users

In [None]:
def plot_hate_core_plots():
    sns.set_style("white")
    sns.set_style("ticks")
    plt.figure(figsize=(10, 6), dpi=80)
    sns.despine(top=True, right=True)
    L = len(core_hate.keys()) - 1
    non_hate  = [0]*L
    med_hate  = [0]*L
    high_hate = [0]*L
    combinedCount = 3
    iterator = 0
    for c in range(1, L+1, combinedCount):
        total = 0
        for k in range(combinedCount):
            total += sum(core_hate.get(c+k, [0]))

        for k in range(combinedCount):
            non_hate[iterator] += core_hate.get(c+k, [0, 0, 0])[0]/total
            med_hate[iterator] += core_hate.get(c+k, [0, 0, 0])[1]/total
            high_hate[iterator] += core_hate.get(c+k, [0, 0, 0])[2]/total
        iterator += 1
    X_axis = np.arange(iterator)
    width = 0.3
    non_hate = non_hate[:iterator]
    med_hate = med_hate[:iterator]
    high_hate = high_hate[:iterator]
    plt.bar(X_axis - width, non_hate, width, label = 'Non hate')
    plt.bar(X_axis, med_hate, width, label='Medium hate')
    plt.bar(X_axis + width, high_hate, width, label = 'High hate')
    plt.xticks(X_axis, range(1, iterator+1))
    plt.xlabel("Core")
    plt.ylabel("Fraction of users in core")
    plt.title("Hateful users in core decomposition of reddit network")
    plt.legend()
    plt.show()

In [None]:
plot_hate_core_plots()

In [None]:
hate_auth_0 = []
hate_auth_1 = []
hate_auth_2 = []

for i in CAS:
    source_hate = author_hate_classify.get(post_author[i], -1)
    if source_hate != -1:
        for j in CAS[i].nodes:
            if i!=j:
                if source_hate == 0:
                    hate_auth_0.append(CAS[i].nodes[j].get('hate', 1))
                elif source_hate == 1:
                    hate_auth_1.append(CAS[i].nodes[j].get('hate', 1))
                else:
                    hate_auth_2.append(CAS[i].nodes[j].get('hate', 1))

In [None]:
user_int_fracs = [[0,0,0], [1,0,0], [2,0,0]]
user_int_fracs[0][1] = hate_auth_0.count(0)/(hate_auth_0.count(0) + hate_auth_0.count(1))
user_int_fracs[0][2] = hate_auth_0.count(1)/(hate_auth_0.count(0) + hate_auth_0.count(1))
user_int_fracs[1][1] = hate_auth_1.count(0)/(hate_auth_1.count(0) + hate_auth_1.count(1))
user_int_fracs[1][2] = hate_auth_1.count(1)/(hate_auth_1.count(0) + hate_auth_1.count(1))
user_int_fracs[2][1] = hate_auth_2.count(0)/(hate_auth_2.count(0) + hate_auth_2.count(1))
user_int_fracs[2][2] = hate_auth_2.count(1)/(hate_auth_2.count(0) + hate_auth_2.count(1))

In [None]:
plotting_df = pd.DataFrame(arr, columns=['source', 'non-hate', 'hate'])

#### Plotting fraction of interactions grouped by typee of source user

In [None]:
X = ['low', 'medium', 'high']
non_hate = plotting_df['non-hate'].values
hate = plotting_df['hate'].values
  
X_axis = np.arange(len(X))
  
plt.bar(X_axis - 0.2, non_hate, 0.4, label = 'non-hate')
plt.bar(X_axis + 0.2, hate, 0.4, label = 'hate')
  
plt.xticks(X_axis, X)
plt.xlabel("Source user type")
plt.ylabel("Fraction of interactions")
plt.title("Hate attracted by type of source user")
plt.legend()
plt.show()

In [None]:
parents_dict = {}
for i in tqdm(CAS):
    descendant = nx.descendants(casc_graph, i)
    parents_dict[i] = [descendant, len(descendant)]

In [None]:
cascade_sizes = []
for v in parents_dict.values():
    cascade_sizes.append(v[1])

In [None]:
min_size = 20
filtered_dict = {}
for p in parents_dict:
    if parents_dict[p][1] >= min_size:
        filtered_dict[p] = deepcopy(parents_dict[p])

key = list(filtered_dict.keys())[0]

In [None]:
from scipy.signal import savgol_filter
from scipy.special import softmax

def get_timeseries_for_cascade(p):
    descendants = filtered_dict[p][0]
    info = []
    for d in descendants:
        info_dict = casc_graph.nodes[d]
        info_dict['id'] = d
        info.append(info_dict)
    df = pd.DataFrame.from_dict(info)
    vals = list(df[['time', 'id']].values)
    vals.append([casc_graph.nodes[p]['time'], p])
    vals = np.array(vals)
    sort_key = vals[:,0].argsort()
    d = timedelta(days = 2)
    filter = (casc_graph.nodes[p]['time'] + d).timestamp()
    filtered_key = np.array([i.timestamp() for i in vals[sort_key][:, 0]]) < filter
    vals = vals[sort_key][filtered_key]
    ts = vals
    return ts, vals[:, 1]


In [None]:
from datetime import datetime, timedelta
d = timedelta(days = 2)
kernel_size = 12
thresh=0.2

In [None]:
def predict_hate(text):
    return softmax(model(**tokenizer(text, return_tensors="pt")).logits.detach().numpy())

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def plot_zero_crossings(key):
    time_series, ids = get_timeseries_for_cascade(key)
    T = list(time_series[:, 0].flatten() - time_series[0, 0])
    T = [0] + [i.total_seconds() for i in T] + [0]
    laplacian_T = []
    kernel_size = 5
    kernel = np.ones(kernel_size) / kernel_size
    vel_T = []
    for i in range(1, len(T) - 1):
        laplacian_T.append((T[i+1] + T[i-1] - 2*T[i]))
        vel_T.append((T[i+1] - T[i]))
    laplacian_T = laplacian_T[:-1]
    savgol_smooth = savgol_filter(laplacian_T, 5, 3)
    kernel = np.ones(kernel_size) / kernel_size
    mean_smooth = np.convolve(savgol_smooth, kernel, mode='same')
    zero_crossings = np.where(np.diff(np.sign(mean_smooth)))[0]
    print(len(zero_crossings))
    hate_cross_1 = []
    hate_cross_0 = []
    hate_cross_2 = []
    for z in zero_crossings:
        hate_cross_0.append(casc_graph.nodes[ids[z-1]].get('hate', 0))
        
        hate_cross_1.append(casc_graph.nodes[ids[z]].get('hate', 0))
        
        hate_cross_2.append(casc_graph.nodes[ids[z+1]].get('hate', 0))

    sns.set_style("darkgrid")
    plt.plot(range(len(mean_smooth)), mean_smooth)
    plt.plot(zero_crossings, np.zeros(len(zero_crossings)), 'x')
    plt.show()
    return zero_crossings, Counter(hate_cross_0).get(1, 0), Counter(hate_cross_1).get(1, 0), Counter(hate_cross_2).get(1, 0)

In [None]:
key = list(filtered_dict.keys())[10] 
plot_zero_crossings(key)

In [None]:
cdf_vals = [{}, {}, {}]
failed = 0

for i in CAS:
    desc = nx.shortest_path_length(CAS[i], i)
    levels = {}
    base_time = CAS[i].nodes[i]['time'].timestamp()
    
    for d in desc:
        levels[desc[d]] = levels.get(desc[d], float('inf'))
        if (levels[desc[d]] > CAS[i].nodes[d]['time'].timestamp()):
            levels[desc[d]] = CAS[i].nodes[d]['time'].timestamp()
    
    source_hate = CAS[i].nodes[i].get('cumm_hate', -1)
    
    if source_hate != -1:
        for l in levels:
            cdf_vals[source_hate][l] = cdf_vals[0].get(l, []) + [(levels[l] - base_time)]
            
title = "CDF velocity based on source post"

In [None]:
cdf_medians = [[], [], []]
cdf_mean = [[], [], []]
for l in range(len(cdf_vals)):
    L = cdf_vals[l]
    for key in range(len(L.keys())):
        if len(L[key]) > 10:
            cdf_medians[l].append(np.median(L[key]))
            cdf_mean[l].append(np.mean(L[key]))

In [None]:
plt.figure(figsize=(10, 10), dpi=100)
plt.xlabel('Depth of cascade')
plt.ylabel('Seconds after original post')
plt.yscale("log")
labels = ['non-hate', 'medium-hate', 'high-hate']
for l in range(0, 3, 2):
    X = cdf_medians[l]
    p = np.arange(len(X))
    plt.step(p, X, label=labels[l])
    plt.title(title)
    plt.legend()

In [None]:
cdf_vals = [{}, {}, {}]
failed = 0

for i in CAS:
    desc = nx.shortest_path_length(CAS[i], i)
    levels = {}
    base_time = CAS[i].nodes[i]['time'].timestamp()
    
    for d in desc:
        levels[desc[d]] = levels.get(desc[d], float('inf'))
        if (levels[desc[d]] > CAS[i].nodes[d]['time'].timestamp()):
            levels[desc[d]] = CAS[i].nodes[d]['time'].timestamp()
    
    source_hate = author_hate_classify.get(post_author[i], -1)
    
    if source_hate != -1:
        for l in levels:
            cdf_vals[source_hate][l] = cdf_vals[0].get(l, []) + [(levels[l] - base_time)]
            
title = "CDF velocity based on source user"

In [None]:
cdf_medians = [[], [], []]
cdf_mean = [[], [], []]
for l in range(len(cdf_vals)):
    L = cdf_vals[l]
    for key in range(len(L.keys())):
        if len(L[key]) > 10:
            cdf_medians[l].append(np.median(L[key]))
            cdf_mean[l].append(np.mean(L[key]))

In [None]:
plt.figure(figsize=(10, 10), dpi=100)
plt.xlabel('Depth of cascade')
plt.ylabel('Seconds after original post')
plt.yscale("log")
labels = ['non-hate', 'medium-hate', 'high-hate']
for l in range(0, 3, 2):
    X = cdf_medians[l]
    p = np.arange(len(X))
    plt.step(p, X, label=labels[l])
    plt.title(title)
    plt.legend()

In [None]:
def moving_average(x, w, t='valid'):
    return np.convolve(x, np.ones(w), t) / w

In [None]:
results = {}
failed = 0

for i in tqdm(CAS):
    desc = nx.shortest_path_length(CAS[i], i)
    levels = {}
    base_time = CAS[i].nodes[i]['time'].timestamp()
    timelines = []
    content_arr = []
    for d in desc:
        timelines.append(CAS[i].nodes[i]['time'].timestamp() - base_time)
        content_arr.append(d)
    timelines = np.array(timelines)
    content_arr = np.array(content_arr)
    sorter = np.argsort(timelines)
    timelines = timelines[sorter]
    content_arr = timelines[sorter]
    
    filt = timelines < 86400
    timelines = timelines[filt]
    content_arr = content_arr[filt]
    vel = np.gradient(np.arange(len(content_arr)), timelines)
    acc = np.gradient(vel, timelines)
    zero_crossings = np.where(np.diff(np.sign(acc)))[0]
    results[i] = [timelines, zero_crossings, CAS[i].nodes[i]['hate'], author_hate_classify[post_author[i]]]

In [None]:
mappings = np.zeros((len(results), 5))
counter = 0
for i in tqdm(results):
    mappings[counter][0] = len(results[i][0])
    mappings[counter][1] = len(results[i][1])
    mappings[counter][2] = results[i][2]
    mappings[counter][3] = results[i][3]
    counter += 1

In [None]:
no_hate_zero = mappings[:, 2] == 0
hate_zero = mappings[:, 2] >= 1

In [None]:
no_hate_zero_user = mappings[:, 3] == 0
hate_zero_user = mappings[:, 3] >= 1

In [None]:
X = mappings[no_hate_zero][:, 0].reshape(-1, 1)
y = mappings[no_hate_zero][:, 1].reshape(-1, 1)
reg = LinearRegression().fit(X, y)
preds_non_hate = reg.predict(X)
X_non_hate = mappings[no_hate_zero][:, 0].reshape(-1, 1)
plt.plot(mappings[no_hate_zero][:, 0], mappings[no_hate_zero][:, 1], 'o')
plt.plot(mappings[no_hate_zero][:, 0], preds_non_hate)

In [None]:
X = mappings[hate_zero][:, 0].reshape(-1, 1)
y = mappings[hate_zero][:, 1].reshape(-1, 1)
reg = LinearRegression().fit(X, y)
preds_hate = reg.predict(X)
X_med_hate = mappings[hate_zero][:, 0].reshape(-1, 1)
plt.plot(mappings[hate_zero][:, 0], mappings[hate_zero][:, 1], 'o')
plt.plot(mappings[hate_zero][:, 0], preds_hate)

In [None]:
plt.figure(figsize=(10, 10), dpi=80)
plt.plot(mappings[no_hate_zero][:, 0], preds_non_hate, label='non-hate')
plt.plot(mappings[hate_zero][:, 0], preds_hate, label='high-hate')
plt.legend()

In [None]:
X = mappings[no_hate_zero_user][:, 0].reshape(-1, 1)
y = mappings[no_hate_zero_user][:, 1].reshape(-1, 1)
reg = LinearRegression().fit(X, y)
preds_non_hate_user = reg.predict(X)
X_non_hate_user = mappings[no_hate_zero_user][:, 0].reshape(-1, 1)
plt.plot(mappings[no_hate_zero_user][:, 0], mappings[no_hate_zero_user][:, 1], 'o')
plt.plot(mappings[no_hate_zero_user][:, 0], preds_non_hate_user)

In [None]:
X = mappings[hate_zero_user][:, 0].reshape(-1, 1)
y = mappings[hate_zero_user][:, 1].reshape(-1, 1)
reg = LinearRegression().fit(X, y)
preds_hate_user = reg.predict(X)
X_hate_user = mappings[hate_zero_user][:, 0].reshape(-1, 1)
plt.plot(mappings[hate_zero_user][:, 0], mappings[hate_zero_user][:, 1], 'o')
plt.plot(mappings[hate_zero_user][:, 0], preds_hate_user)

In [None]:
plt.figure(figsize=(10, 10), dpi=80)
plt.plot(mappings[hate_zero_user][:, 0], preds_hate_user, label='high-hate user')
plt.plot(mappings[no_hate_zero_user][:, 0], preds_non_hate_user, label='non-hate user')
plt.legend()

## Echo Chamber

In [None]:
import tensorflow_hub as hub

In [None]:
import os
os.environ["TFHUB_CACHE_DIR"] = ""

In [None]:
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

In [None]:
text = df_dropped[df_dropped.id.isin([i for i in CAS])].content_ek.values
embeds = np.zeros((len(text), 512))
for i in tqdm(range(0, len(text), 16)):
    embeds[i: i+16] = embed(text[i: i+16])

In [None]:
import hdbscan
from sklearn.manifold import TSNE
import seaborn as sns

import umap

from sklearn.decomposition import PCA

In [None]:
red_embeds = PCA(n_components=256).fit_transform(embeds)
print("PCA_done")
standard_embedding = trans = umap.UMAP(n_neighbors=5, n_components=64).fit_transform(red_embeds)
print("Umap done")
plt.scatter(standard_embedding[:, 0], standard_embedding[:, 1], s=0.1, cmap='Spectral');
clusterer = hdbscan.HDBSCAN(min_cluster_size=30, min_samples=3).fit(standard_embedding)
print("Clustering done")

In [None]:
roots = df_dropped[df_dropped.id.isin([i for i in CAS])].copy()

In [None]:
roots['labels'] = clusterer.labels_

In [None]:
plt.hist(clusterer.labels_, bins=len(Counter(roots.labels)))
plt.show()

In [None]:
plt.figure(figsize=(15, 10))
plt.scatter(standard_embedding[:, 6], standard_embedding[:, 7], s=20, linewidth=0, c=clusterer.labels_, alpha=0.8)

In [None]:
echo = roots[['id', 'account_id', 'labels', 'hate', 'cumm_hate']]

In [None]:
id_to_label = {}
for i in tqdm(roots[['id', 'labels']].values):
    nodes = CAS[i[0]].nodes
    id_to_label[i[0]] = i[1]
    for n in nodes:
        id_to_label[n] = i[1]

In [None]:
user_to_label = {}
label_to_user = {}
for i in tqdm(id_to_label):
    auth = post_author[i]
    label_to_user[id_to_label[i]] = label_to_user.get(id_to_label[i], dict())
    label_to_user[id_to_label[i]][auth] = label_to_user[id_to_label[i]].get(auth, 0)
    label_to_user[id_to_label[i]][auth] += 1

In [None]:
remove_keys = []
for l in label_to_user:
    for a in label_to_user[l]:
        if label_to_user[l][a] < 2:
            remove_keys.append((l, a))
for r in remove_keys:
    l, a = r
    del label_to_user[l][a]

for l in label_to_user:
    if l != -1:
        for a in label_to_user[l]:
            user_to_label[a] = user_to_label.get(a, dict())
            user_to_label[a][l] = label_to_user[l][a]

In [None]:
count = 0
target_users = {}
for u in user_to_label:
    if len(user_to_label[u]) >= 2:
        target_users[u] = set(user_to_label[u].keys())

In [None]:
target_user_ids = list(target_users.keys())

In [None]:
possible_matches_4 = {}
possible_matches_2 = {}
possible_matches_3 = {}
count = 0
for t in tqdm(range(len(target_user_ids))):
    for u in range(t+1, len(target_user_ids)):
        tuser = target_user_ids[t]
        uuser = target_user_ids[u]
        match_key = tuple(sorted(list(target_users[tuser].intersection(target_users[uuser]))))
        if len(match_key) >= 2:
            possible_matches_2[match_key] = possible_matches_2.get(match_key, set())
            possible_matches_2[match_key].add(tuser)
            possible_matches_2[match_key].add(uuser)
            count += 1

In [None]:
plt.figure(figsize=(4,4), dpi=80)
counter = 0
labels = ["min_2", "min_3", "min_4"]
consider_echo_store = {"min_2": {}, "min_3": {}, "min_4": {}}
for P in [possible_matches_2, possible_matches_3, possible_matches_4]:
    thresh_to_size = []
    for thresh in range(4, 21, 2):
        consider_echo = {}
        sizes_echo = []
        for p in P:
            if (len(P[p]) >= thresh):
                sizes_echo.append(len(P[p]))
                consider_echo[p] = P[p]
        thresh_to_size.append([thresh, len(deepcopy(consider_echo))])
        consider_echo_store[labels[counter]][thresh] = deepcopy(consider_echo)
    thresh_to_size = np.array(thresh_to_size)
    plt.plot(thresh_to_size[:, 0], thresh_to_size[:, 1], label=labels[counter])
    counter += 1F
plt.legend()

In [None]:
plt.figure(figsize=(10, 4), dpi=100)
plt.xlabel('Size of echo chamber')
plt.ylabel('CDF')
for l in range(0, 1, 2):
    X = sorted(sizes_echo)
    p = np.arange(len(X))/len(X)
    plt.step(X, p)
    plt.title('Size of echo chamber')
    plt.legend()

In [None]:
hate_echo = {}

In [None]:
consider_echo = consider_echo_store['min_2'][10]

In [None]:
for key in consider_echo:
    hate_echo[key] = {0: 0, 1: 0, 2: 0}
    for user in consider_echo[key]:
        try:
            hate_echo[key][author_hate_classify[user]] += 1
        except:
            pass

In [None]:
purity = []
secondpurity = []
hate_count = 0
non_hate_count = 0
uniform = []
non_uniform = []
for h in hate_echo:
    E = hate_echo[h]
    hate_counter = 0
    if (E[0] + E[1]) <= E[2]:
        purity.append(1 - (E[0]+E[1] + 0.00001)/(E[2] + 0.00001))
        hate_count += 1
        hate_counter += 1
    else:
        purity.append(1 - (E[2] + 0.00001)/(E[0]+E[1] + 0.00001))
        non_hate_count += 1
    secondpurity.append((E[2])/(E[0]+E[1] + E[2] + 0.00000001))
    uniform.append(E[2])
    non_uniform.append((E[0] + E[1]))

In [None]:
echo_mat = roots[['labels', 'cumm_hate_2']].values
echo_color = {}
for e in echo_mat:
    echo_color[e[0]] = echo_color.get(e[0], [0, 0, 0])
    echo_color[e[0]][0] += e[1]
    echo_color[e[0]][1] += 1
    if e[1] == 2:
        echo_color[e[0]][2] += 1
for e in echo_color:
    echo_color[e].append(echo_color[e][2]/echo_color[e][1])
consider_echo_colors = {}
for c in range(len(consider_keys)):
    vals_to_add = [0, 0]
    for t in consider_keys[c]:
        vals_to_add[0] += echo_color[t][1]
        vals_to_add[1] += echo_color[t][2]
    vals_to_add.append(vals_to_add[1]/vals_to_add[0])
    consider_echo_colors[consider_keys[c]] = vals_to_add

In [None]:
hate_post_fraction = []
for c in range(len(consider_keys)):
    hate_post_fraction.append(consider_echo_colors[consider_keys[c]][2])

In [None]:
jointdf = pd.DataFrame([purity, uniform, secondpurity, non_uniform, hate_post_fraction]).T
jointdf.columns = ['purity', 'hatecount', 'secondpurity', 'non_hate_count', 'hate_post_fraction']

In [None]:
sns.jointplot(data=jointdf, x='purity', y='hate_post_fraction', kind="kde", color='green', shade=True, fill=True, cmap="hot", thresh=0, levels=300)
plt.savefig("GabEchoHeat.pdf", format="svg", bbox_inches="tight")

In [None]:
jointdf2 = jointdf.copy()
jointdf2 = jointdf2[jointdf2['secondpurity'] <= 1]
jointdf2 = jointdf2[jointdf2['purity'] <= 1]
jointdf2 = jointdf2[jointdf2['secondpurity'] >= 0]
jointdf2 = jointdf2[jointdf2['purity'] >= 0]

In [None]:
g = sns.jointplot(data=jointdf2, x='purity', y='secondpurity', kind="kde", color='red', fill=True, thresh=0, cmap='summer')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
fig = plt.figure(figsize=(8,8), dpi=60)
gs = GridSpec(4, 4)

ax_scatter = fig.add_subplot(gs[1:4, 0:3])
ax_hist_y = fig.add_subplot(gs[0,0:3])
ax_hist_x = fig.add_subplot(gs[1:4, 3])

sns.set_style('darkgrid')
ax_scatter.scatter(jointdf['secondpurity'].values, jointdf['purity'].values)
ax_hist_x.hist(jointdf['secondpurity'], orientation='horizontal', bins=20)
ax_hist_y.hist(jointdf['purity'], bins=20)
ax_hist_x.set_yticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
ax_hist_y.set_xticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
ax_scatter.set_xticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
ax_scatter.set_yticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
plt.show()

In [None]:
sns.heatmap(jointdf[['purity', 'hatecount']].values)

In [None]:
from scipy import stats

In [None]:
plt.figure(figsize=(10, 4), dpi=100)
plt.xlabel('Purity of echo chamber')
plt.ylabel('CDF')
for l in range(0, 1, 2):
    X = sorted(secondpurity)
    p = np.arange(len(X))/len(X)
    plt.step(X, p)
    plt.legend()

In [None]:
purity = np.array(sorted(purity))
ax = sns.heatmap(np.array(purity).reshape(len(purity), 1))

In [None]:
HIGH_PURITY = purity[purity < 0.4].shape[0]

In [None]:
HIGH_PURITY

In [None]:
IMPURE = purity[(purity > 0.5) & (purity <= 1)].shape[0]

In [None]:
PURE = purity[(purity <= 0.5)].shape[0]

In [None]:
PURE/(IMPURE+PURE)

In [None]:
HIGH_PURITY/(IMPURE+PURE)

In [None]:
hate_count, non_hate_count

### Plotting Density plots for cascade size, width, depth and velocity categorized by source post and source user

Construct a CSV which is basically a list of information about each cascade in the network, with the following columns:
1. Source post hatefulness for the cascade
2. Source user hatefulness for the cascade
3. Size of cascade
4. Volume
5. Width
6. Height
7. Source user in an echo chamber (yes/no)

Use the following code to get density plots.

In [None]:
plt.figure(figsize=(8, 8), dpi=80)
df = pd.read_csv('plot_csvs/gab_statistic.csv') # Just an example. Please generate your own CSV for the analysis you want
# color = {"High hate": "#4400aa", "Med hate": "#9955ff", "Non hate": "#ccaaff"}

# Replace <type> with any of source post hatefulness, source user hatefulness, echo chamber depending on the analysis you wish for.
# Replace <Category Label>s with your choice depending on the number of categorit
df.loc[df.author_type == 0, '<type>'] = '<Category Label 0>'
df.loc[df.author_type == 1, '<type>'] = '<Category Label 1>'
df.loc[df.author_type == 2, '<type>'] = '<Category Label 2>'
grid = sns.kdeplot(data=df, x='<column name>', hue='<type>', common_grid=True,  log_scale=True) # Replace <column name> with any of size, width, height, volume column names

### Sample code for density plots based on type of post from a type of hateful user (check Fig 1.c from manuscript)

In [None]:
df = pd.read_csv('plot_csvs/gab_statistic.csv')
log_scales = [[], []]
df0 = df[df['source_user_hate'] == 0]
df2 = df[df['source_user_hate'] == 2]

df00 = df0[df0['source_post_hate'] == 0]
df00.loc[df00['source_post_hate'] == 0, 'category'] = '1'
df02 = df0[df0['source_post_hate'] == 2]
df02.loc[df02['source_post_hate'] == 2, 'category'] = '2'
df20 = df2[df2['source_post_hate'] == 0]
df20.loc[df20['source_post_hate'] == 0, 'category'] = '3'
df22 = df2[df2['source_post_hate'] == 2]
df22.loc[df22['source_post_hate'] == 2, 'category'] = '4'

DF = pd.concat([df00, df02, df20, df22])

# color = {"1": "#8deb91", "2": "#5dbb65", "3": "#006010", "4": "#003600"}

#Replace <column name> with any of size, width, height, volume column names
grid = sns.kdeplot(data=DF, x='<column name>', hue='category', common_grid=True, log_scale=True, multiple='layer', linewidth=2, fill=True, alpha=0.7)

In [None]:
def my_similarity(a, b, c=0):
    assert type(a) == set
    assert type(b) == set
    if c == 0:
        # Jaccard
        return len(a.intersection(b))/len(a.union(b))
    else:
        # Overlapping
        return len(a.intersection(b))/min(len(a), len(b))

In [None]:
ECHO_GRAPHS = nx.Graph()
consider_keys = list(consider_echo.keys())
key_mappings = {}
key_mappings_inverse = {}
for i in range(len(consider_keys)):
    ECHO_GRAPHS.add_node(i)
edges = []
for i in tqdm(range(len(consider_keys))):
    for j in range(i+1, len(consider_keys)):
        ### Intersecting users
        iu_1 = set(consider_echo[consider_keys[i]])
        iu_2 = set(consider_echo[consider_keys[j]])
        if my_similarity(iu_1, iu_2, 1) > 0.7:
#         if len(set(consider_keys[i]).intersection(set(consider_keys[j])))/min(len(consider_keys[i]),len(consider_keys[j])) >= 0.8:
#         if len(set(consider_keys[i]).intersection(set(consider_keys[j])))/len(set(consider_keys[i]).union(set(consider_keys[j]))) >= 0.6:
            ECHO_GRAPHS.add_edge(consider_keys[i],consider_keys[j])
H = nx.relabel_nodes(ECHO_GRAPHS, key_mappings)

In [None]:
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities
from cdlib import algorithms

In [None]:
removing_nodes = []
for h in H.nodes:
    if H.degree[h] > 3:
        removing_nodes.append(h)
U = nx.subgraph(H, removing_nodes)

In [None]:
fc = nx.find_cliques(U)

In [None]:
ncn = nx.node_clique_number(U)

In [None]:
nx.draw_spring(U, with_labels=True, font_weight='bold', node_color='lightblue', node_size=500)

In [None]:
m = 0
degree_freq = nx.degree_histogram(H)
degrees = range(len(degree_freq))
plt.figure(figsize=(12, 8)) 
plt.loglog(degrees[m:], degree_freq[m:],'o') 
plt.xlabel('Degree')
plt.ylabel('Frequency')

In [None]:
new_echo = deepcopy(consider_echo)
def echo_propagation(consider_keys):
    flag = True
    while flag:
#         print("here")
        flag = False
        print(flag)
        KEYS = list(new_echo.keys())
        for i in tqdm(range(len(KEYS))):
            for j in range(i+1, len(KEYS)):
                if KEYS[i] != None and KEYS[j] != None:
                    iu_1 = set(new_echo[KEYS[i]])
                    iu_2 = set(new_echo[KEYS[j]])
                    iu_1_k = set(KEYS[i])
                    iu_2_k = set(KEYS[j])
                    if my_similarity(iu_1_k, iu_2_k, 0) >= 0.6:
                        new_key = tuple(sorted(list(set(KEYS[i]).union(set(KEYS[j])))))
                        new_user = iu_1.union(iu_2)
    #                     del new_echo[KEYS[i]]
                        del new_echo[KEYS[j]]
    #                     print(KEYS[j])
                        new_echo[new_key] = new_user
                        flag = True
                        KEYS[i] = new_key
                        KEYS[j] = None
                        j += 1

In [None]:
echo_propagation(consider_keys)

In [None]:
user_to_echo = {}
for n in new_echo:
    for u in new_echo[n]:
        user_to_echo[u] = user_to_echo.get(u, set())
        user_to_echo[u].add(n)

In [None]:
all_echo_users = set()
for n in new_echo:
    all_echo_users = all_echo_users.union(new_echo[n])
    

In [None]:
log_scales = [[], [], []]
found = set()
total = set()
e_1 = 0
e_2 = 0
ne_1 = 0
ne_2 = 0
not_matched = 0
for i in tqdm(CAS):
    if post_author[i] in all_echo_users and author_hate_classify[post_author[i]] == 2:
        nodes = CAS[i]
        volume = nx.volume(casc_graph, nodes, weight=None)
        log_scales[0].append(volume)
        found.add(post_author[i])
        to_compare_echo = user_to_echo[post_author[i]]
        for n in nodes:
            try:
                curr_user_echo = user_to_echo.get(post_author[n], set())
                intersect = curr_user_echo.intersection(to_compare_echo)
                if len(intersect) > 0:
                    e_1 += 1
                else:
                    e_2 += 1
            except:
                not_matched += 1
    elif author_hate_classify[post_author[i]] == 2:
        nodes = CAS[i]
        volume = nx.volume(casc_graph, nodes, weight=None)
        log_scales[1].append(volume)
        total.add(post_author[i])
        for n in nodes:
            try:
                if post_author[n] in all_echo_users:
                    ne_1 += 1
                else:
                    ne_2 += 1
            except:
                not_matched += 1
#     break

In [None]:
e_1/(e_1+e_2), ne_1/(ne_1 + ne_2), not_matched

In [None]:
arr = [[e_1/(e_1+e_2), e_2/(e_1+e_2)],
       [ne_1/(ne_1+ne_2), ne_2/(ne_1+ne_2)]
      ]
df_cm = pd.DataFrame(arr, index = [i for i in ['e1', 'e2']],
                  columns = [i for i in ['ne1', 'ne2']])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm, annot=True, cmap='Greens')

In [None]:
hate_list_echo =  []
hate_list_all = []
not_found = 0
for f in total:
    try:
        if f in found:
            hate_list_echo.append(author_hate_classify[f])
        hate_list_all.append(author_hate_classify[f])
    except:
        not_found += 1

In [None]:
results = {}
failed = 0
for i in tqdm(CAS):
    desc = nx.shortest_path_length(CAS[i], i)
    levels = {}
    base_time = CAS[i].nodes[i]['time'].timestamp()
    timelines = []
    content_arr = []
    for d in desc:
        timelines.append(CAS[i].nodes[i]['time'].timestamp() - base_time)
        content_arr.append(d)
    timelines = np.array(timelines)
    content_arr = np.array(content_arr)
    sorter = np.argsort(timelines)
    timelines = timelines[sorter]
    content_arr = timelines[sorter]
    
    filt = timelines < 86400
    timelines = timelines[filt]
    content_arr = content_arr[filt]
    vel = np.gradient(np.arange(len(content_arr)), timelines)
    acc = np.gradient(vel, timelines)
    zero_crossings = np.where(np.diff(np.sign(acc)))[0]
    results[i] = [timelines, zero_crossings, CAS[i].nodes[i]['hate'], author_hate_classify[post_author[i]], int(post_author[i] in all_echo_users)]

In [None]:
mappings = np.zeros((len(results), 5))
counter = 0
for i in tqdm(results):
    mappings[counter][0] = len(results[i][0])
    mappings[counter][1] = len(results[i][1])
    mappings[counter][2] = results[i][2]
    mappings[counter][3] = results[i][3]
    mappings[counter][4] = results[i][4]
    counter += 1

In [None]:
non_echo_hate = np.where((mappings[:, 3] == 2) & (mappings[:, 4] == 0))
echo_hate = np.where((mappings[:, 3] == 2) & (mappings[:, 4] == 1))

In [None]:
X = mappings[non_echo_hate][:, 0].reshape(-1, 1)
y = mappings[non_echo_hate][:, 1].reshape(-1, 1)
reg = LinearRegression().fit(X, y)
preds_non_echo_hate = reg.predict(X)
X_non_echo_hate = mappings[non_echo_hate][:, 0].reshape(-1, 1)
plt.plot(mappings[non_echo_hate][:, 0], mappings[non_echo_hate][:, 1], 'o')
plt.plot(mappings[non_echo_hate][:, 0], preds_non_echo_hate)

In [None]:
X = mappings[echo_hate][:, 0].reshape(-1, 1)
y = mappings[echo_hate][:, 1].reshape(-1, 1)
reg = LinearRegression().fit(X, y)
preds_echo_hate = reg.predict(X)
X_echo_hate = mappings[echo_hate][:, 0].reshape(-1, 1)
plt.plot(mappings[echo_hate][:, 0], mappings[echo_hate][:, 1], 'o')
plt.plot(mappings[echo_hate][:, 0], preds_echo_hate)

In [None]:
plt.figure(figsize=(10, 10), dpi=80)
plt.plot(mappings[non_echo_hate][:, 0], preds_non_echo_hate, label='non echo hate')
plt.plot(mappings[echo_hate][:, 0], preds_echo_hate, label='echo hate')
plt.legend()

In [None]:
def BreadthFirstLevels(G,root):
    """
    Generate a sequence of bipartite directed graphs, each consisting
    of the edges from level i to level i+1 of G. Edges that connect
    vertices within the same level are not included in the output.
    The vertices in each level can be listed by iterating over each
    output graph.
    """
    visited = set()
    currentLevel = [root]
    while currentLevel:
        for v in currentLevel:
            visited.add(v)
        nextLevel = set()
        levelGraph = {v:set() for v in currentLevel}
        for v in currentLevel:
            for w in G[v]:
                if w not in visited:
                    levelGraph[v].add(w)
                    nextLevel.add(w)
        yield nextLevel
        currentLevel = nextLevel

In [None]:
dict_plots = {
    0: {
        'h': {}
    },
    1: {
        'h': {}
    },
    2: {
        'h': {}
    } 
}

for i in tqdm(CAS):
    h = 1
    arr = [[h, 1]]
    bfs = BreadthFirstLevels(CAS[i],i)
    hate_index = CAS[i].nodes[i]['hate']
#     hate_index = author_hate_classify[P_dict[i]['author']]
    for b in bfs:
        h += 1
        dict_plots[hate_index]['h'][h] = dict_plots[hate_index]['h'].get(h, [])
        dict_plots[hate_index]['h'][h].append(len(b))

In [None]:
RANGE_TO_SHOW = 10

In [None]:
dict_to_strip = list()
for i in range(0, 3):
    for j in dict_plots[0]['h']:
        for k in dict_plots[0]['h'][j]:
            if j <= RANGE_TO_SHOW:
                dict_to_strip.append([i, # species
                                      j, # depth
                                      k # width
                                     ])
dict_to_strip = np.array(dict_to_strip)

In [None]:
size_cascade = pd.DataFrame(columns=['hate', 'depth', 'width'])

In [None]:
size_cascade['hate'] = dict_to_strip[:, 0]
size_cascade['depth'] = dict_to_strip[:, 1]
size_cascade['width'] = dict_to_strip[:, 2]

In [None]:
plt.figure(figsize=(15, 5))
sns.stripplot(x="depth", y="width", hue="hate",
              data=size_cascade, dodge=True, alpha=.25, zorder=1)
sns.pointplot(x="depth", y="width", hue="hate",
              data=size_cascade, dodge=.8 - .8 / 3,
              join=False, palette="dark",
              markers="d", scale=.75, ci=None)

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[3:], labels[3:], title="Hate intensity",
          handletextpad=0, columnspacing=1,
          loc="lower right", ncol=3, frameon=True)
plt.show()

In [None]:
sns.displot(x="depth", y="width", hue="hate",
              data=size_cascade,
    kind="kde", height=6,
    multiple="fill", clip=(0, None),
    palette="ch:rot=-.25,hue=1,light=.75",
)

In [None]:
sns.catplot(x="depth", y="width", hue="hate",
               data=size_cascade, kind="violin", aspect=2, height=10)

In [None]:
plt.figure(figsize=(15, 5))
g = sns.catplot(x="depth", y="width", hue="hate",
                capsize=.2, palette="Reds",  aspect=.75,
                kind="point", data=size_cascade, width=20, height=5)
g.despine(left=True)