In [1]:
# Imports
import pandas as pd
import numpy as np
from IPython.display import display

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Load data
df = pd.read_csv("./preproc_data/clustered_BoW_d2v_final.csv")

# Print length
print(len(df.index))

# Check
display(df.head())

166342


Unnamed: 0,id,subreddit,clean_text,label,bow_kmeans_label,bow_birch_label,bow_hdbscan_label,d2v_kmeans_label,d2v_birch_label,d2v_hdbscan_label,df_copy_kmeans_label,df_copy_birch_label,df_copy_hdbscan_label
0,l4q8j5,mentalhealth,feel le depressed empty inside right happy im...,1,1,1,0,0,0,0,0,0,0
1,lxdqv0,mentalhealth,depression ive dealing stuff idk call put lab...,1,0,1,0,1,0,0,1,0,0
2,e0plue,happy,finally fixed car took one day two week second...,0,1,1,0,0,0,0,0,0,0
3,j3jv3k,depression_help,help depressed little sister doesnt tell anyt...,1,0,0,0,1,0,0,1,1,0
4,ksbtnt,CasualConversation,got first job ever finally pay family rent go...,0,1,1,0,0,0,0,0,0,0


In [3]:
# Because we don't know which clustered group matches with our label,
# we need to also flip the clustered label and compare with label
# It is faster to flip our label than to flip multiple columns
# In the first pass, we will treat label=1 as depressive and 
# label=0 as non-depressive, then label=1 as non-depressive and
# label=0 as depressive
df['flip_label'] = (~df['label'].astype(bool)).astype(int)

# Increment all hdbscan_labels (since the range for their labels was [-1, 0])
df['bow_hdbscan_label'] = df['bow_hdbscan_label'] + 1
df['d2v_hdbscan_label'] = df['d2v_hdbscan_label'] + 1
df['df_copy_hdbscan_label'] = df['df_copy_hdbscan_label'] + 1

In [4]:
# Because classification models only predicted on test data,
# We need to filter the results for only the test data rows
test = pd.read_csv("./train_test_data/test_data.csv", low_memory=False)

# Select all columns that are numerical
test_nums = test.select_dtypes(['number'])

# Drop NaNs
test_nums = test_nums.dropna()

# Filter out rows that were dropped
test = test[test["index"].isin(test_nums["index"])]

# Filter the results for only the test data rows
df = df[df["id"].isin(test["id"])]

In [5]:
# Comparison of metrics for kmeans for dif features
print("Classification metrics for kmeans")
print("\nPrediction Accuracy")
print("Accuracy score between label and bow_kmeans_label is:", 
      accuracy_score(df['label'], df['bow_kmeans_label']))

print("Accuracy score between label and d2v_kmeans_label is:", 
      accuracy_score(df['label'], df['d2v_kmeans_label']))

print("Accuracy score between label and df_copy_kmeans_label is:", 
      accuracy_score(df['label'], df['df_copy_kmeans_label']))

print("\nPrecision")
print("Precision score between label and bow_kmeans_label is:", 
      precision_score(df['label'], df['bow_kmeans_label']))

print("Precision score between label and d2v_kmeans_label is:", 
      precision_score(df['label'], df['d2v_kmeans_label']))

print("Precision score between label and df_copy_kmeans_label is:", 
      precision_score(df['label'], df['df_copy_kmeans_label']))

print("\nRecall")
print("Recall score between label and bow_kmeans_label is:", 
      recall_score(df['label'], df['bow_kmeans_label']))

print("Recall score between label and d2v_kmeans_label is:", 
      recall_score(df['label'], df['d2v_kmeans_label']))

print("Recall score between label and df_copy_kmeans_label is:", 
      recall_score(df['label'], df['df_copy_kmeans_label']))

print("\nF1")
print("F1 score between label and bow_kmeans_label is:", 
      f1_score(df['label'], df['bow_kmeans_label']))

print("F1 score between label and d2v_kmeans_label is:", 
      f1_score(df['label'], df['d2v_kmeans_label']))

print("F1 score between label and df_copy_kmeans_label is:", 
      f1_score(df['label'], df['df_copy_kmeans_label']))

Classification metrics for kmeans

Prediction Accuracy
Accuracy score between label and bow_kmeans_label is: 0.28680073345958457
Accuracy score between label and d2v_kmeans_label is: 0.8129678059338082
Accuracy score between label and df_copy_kmeans_label is: 0.7588000120239277

Precision
Precision score between label and bow_kmeans_label is: 0.3300742870836329
Precision score between label and d2v_kmeans_label is: 0.8304665185655348
Precision score between label and df_copy_kmeans_label is: 0.8237461463267914

Recall
Recall score between label and bow_kmeans_label is: 0.41400661256387133
Recall score between label and d2v_kmeans_label is: 0.7865344153892395
Recall score between label and df_copy_kmeans_label is: 0.658551247370003

F1
F1 score between label and bow_kmeans_label is: 0.36730666666666667
F1 score between label and d2v_kmeans_label is: 0.8079036739734485
F1 score between label and df_copy_kmeans_label is: 0.7319436092737356


In [6]:
# Comparison of metrics for birch for dif features
print("Classification metrics for birch")
print("\nPrediction Accuracy")
print("Accuracy score between label and bow_birch_label is:", 
      accuracy_score(df['label'], df['bow_birch_label']))

print("Accuracy score between label and d2v_birch_label is:", 
      accuracy_score(df['label'], df['d2v_birch_label']))

print("Accuracy score between label and df_copy_birch_label is:", 
      accuracy_score(df['label'], df['df_copy_birch_label']))

print("\nPrecision")
print("Precision score between label and bow_birch_label is:", 
      precision_score(df['label'], df['bow_birch_label']))

print("Precision score between label and d2v_birch_label is:", 
      precision_score(df['label'], df['d2v_birch_label']))

print("Precision score between label and df_copy_birch_label is:", 
      precision_score(df['label'], df['df_copy_birch_label']))

print("\nRecall")
print("Recall score between label and bow_birch_label is:", 
      recall_score(df['label'], df['bow_birch_label']))

print("Recall score between label and d2v_birch_label is:", 
      recall_score(df['label'], df['d2v_birch_label']))

print("Recall score between label and df_copy_birch_label is:", 
      recall_score(df['label'], df['df_copy_birch_label']))

print("\nF1")
print("F1 score between label and bow_birch_label is:", 
      f1_score(df['label'], df['bow_birch_label']))

print("F1 score between label and d2v_birch_label is:", 
      f1_score(df['label'], df['d2v_birch_label']))

print("F1 score between label and df_copy_birch_label is:", 
      f1_score(df['label'], df['df_copy_birch_label']))

Classification metrics for birch

Prediction Accuracy
Accuracy score between label and bow_birch_label is: 0.4299756515465777
Accuracy score between label and d2v_birch_label is: 0.5028406528992695
Accuracy score between label and df_copy_birch_label is: 0.575825893528121

Precision
Precision score between label and bow_birch_label is: 0.4615104821109715
Precision score between label and d2v_birch_label is: 0.9363636363636364
Precision score between label and df_copy_birch_label is: 0.8603655054254712

Recall
Recall score between label and bow_birch_label is: 0.8390141268409979
Recall score between label and d2v_birch_label is: 0.006191764352269312
Recall score between label and df_copy_birch_label is: 0.18112413585813045

F1
F1 score between label and bow_birch_label is: 0.5954732598075816
F1 score between label and d2v_birch_label is: 0.01230217975515079
F1 score between label and df_copy_birch_label is: 0.2992501365645329


In [7]:
# Comparison of metrics for hdbscan for dif features
print("Classification metrics for hdbscan")
print("\nPrediction Accuracy")
print("Accuracy score between label and bow_hdbscan_label is:", 
      accuracy_score(df['label'], df['bow_hdbscan_label']))

print("Accuracy score between label and d2v_hdbscan_label is:", 
      accuracy_score(df['label'], df['d2v_hdbscan_label']))

print("Accuracy score between label and df_copy_hdbscan_label is:", 
      accuracy_score(df['label'], df['df_copy_hdbscan_label']))

print("\nPrecision")
print("Precision score between label and bow_hdbscan_label is:", 
      precision_score(df['label'], df['bow_hdbscan_label']))

print("Precision score between label and d2v_hdbscan_label is:", 
      precision_score(df['label'], df['d2v_hdbscan_label']))

print("Precision score between label and df_copy_hdbscan_label is:", 
      precision_score(df['label'], df['df_copy_hdbscan_label']))

print("\nRecall")
print("Recall score between label and bow_hdbscan_label is:", 
      recall_score(df['label'], df['bow_hdbscan_label']))

print("Recall score between label and d2v_hdbscan_label is:", 
      recall_score(df['label'], df['d2v_hdbscan_label']))

print("Recall score between label and df_copy_hdbscan_label is:", 
      recall_score(df['label'], df['df_copy_hdbscan_label']))

print("\nF1")
print("F1 score between label and bow_hdbscan_label is:", 
      f1_score(df['label'], df['bow_hdbscan_label']))

print("F1 score between label and d2v_hdbscan_label is:", 
      f1_score(df['label'], df['d2v_hdbscan_label']))

print("F1 score between label and df_copy_hdbscan_label is:", 
      f1_score(df['label'], df['df_copy_hdbscan_label']))

Classification metrics for hdbscan

Prediction Accuracy
Accuracy score between label and bow_hdbscan_label is: 0.44861273935130913
Accuracy score between label and d2v_hdbscan_label is: 0.4856163765894129
Accuracy score between label and df_copy_hdbscan_label is: 0.4661376138515646

Precision
Precision score between label and bow_hdbscan_label is: 0.4705415660572611
Precision score between label and d2v_hdbscan_label is: 0.49230620342591697
Precision score between label and df_copy_hdbscan_label is: 0.48164464023494863

Recall
Recall score between label and bow_hdbscan_label is: 0.8200180342651037
Recall score between label and d2v_hdbscan_label is: 0.9174030658250676
Recall score between label and df_copy_hdbscan_label is: 0.8872858431018936

F1
F1 score between label and bow_hdbscan_label is: 0.5979616438356165
F1 score between label and d2v_hdbscan_label is: 0.6407608011084519
F1 score between label and df_copy_hdbscan_label is: 0.6243654822335025


# Compare with Flip Label

In [8]:
# Comparison of metrics for kmeans for dif features
print("Classification metrics for kmeans")
print("\nPrediction Accuracy")
print("Accuracy score between flip_label and bow_kmeans_label is:", 
      accuracy_score(df['flip_label'], df['bow_kmeans_label']))

print("Accuracy score between flip_label and d2v_kmeans_label is:", 
      accuracy_score(df['flip_label'], df['d2v_kmeans_label']))

print("Accuracy score between flip_label and df_copy_kmeans_label is:", 
      accuracy_score(df['flip_label'], df['df_copy_kmeans_label']))

print("\nPrecision")
print("Precision score between flip_label and bow_kmeans_label is:", 
      precision_score(df['flip_label'], df['bow_kmeans_label']))

print("Precision score between flip_label and d2v_kmeans_label is:", 
      precision_score(df['flip_label'], df['d2v_kmeans_label']))

print("Precision score between flip_label and df_copy_kmeans_label is:", 
      precision_score(df['flip_label'], df['df_copy_kmeans_label']))

print("\nRecall")
print("Recall score between flip_label and bow_kmeans_label is:", 
      recall_score(df['flip_label'], df['bow_kmeans_label']))

print("Recall score between flip_label and d2v_kmeans_label is:", 
      recall_score(df['flip_label'], df['d2v_kmeans_label']))

print("Recall score between flip_label and df_copy_kmeans_label is:", 
      recall_score(df['flip_label'], df['df_copy_kmeans_label']))

print("\nF1")
print("F1 score between label and bow_kmeans_label is:", 
      f1_score(df['flip_label'], df['bow_kmeans_label']))

print("F1 score between label and d2v_kmeans_label is:", 
      f1_score(df['flip_label'], df['d2v_kmeans_label']))

print("F1 score between label and df_copy_kmeans_label is:", 
      f1_score(df['flip_label'], df['df_copy_kmeans_label']))

Classification metrics for kmeans

Prediction Accuracy
Accuracy score between flip_label and bow_kmeans_label is: 0.7131992665404154
Accuracy score between flip_label and d2v_kmeans_label is: 0.18703219406619173
Accuracy score between flip_label and df_copy_kmeans_label is: 0.2411999879760724

Precision
Precision score between flip_label and bow_kmeans_label is: 0.6699257129163672
Precision score between flip_label and d2v_kmeans_label is: 0.16953348143446526
Precision score between flip_label and df_copy_kmeans_label is: 0.1762538536732085

Recall
Recall score between flip_label and bow_kmeans_label is: 0.8404280904280904
Recall score between flip_label and d2v_kmeans_label is: 0.1605940355940356
Recall score between flip_label and df_copy_kmeans_label is: 0.14093314093314094

F1
F1 score between label and bow_kmeans_label is: 0.745552977571539
F1 score between label and d2v_kmeans_label is: 0.16494272393244205
F1 score between label and df_copy_kmeans_label is: 0.15662690855634626


In [9]:
# Comparison of metrics for birch for dif features
print("Classification metrics for birch")
print("\nPrediction Accuracy")
print("Accuracy score between flip_label and bow_birch_label is:", 
      accuracy_score(df['flip_label'], df['bow_birch_label']))

print("Accuracy score between flip_label and d2v_birch_label is:", 
      accuracy_score(df['flip_label'], df['d2v_birch_label']))

print("Accuracy score between flip_label and df_copy_birch_label is:", 
      accuracy_score(df['flip_label'], df['df_copy_birch_label']))

print("\nPrecision")
print("Precision score between flip_label and bow_birch_label is:", 
      precision_score(df['flip_label'], df['bow_birch_label']))

print("Precision score between flip_label and d2v_birch_label is:", 
      precision_score(df['flip_label'], df['d2v_birch_label']))

print("Precision score between flip_label and df_copy_birch_label is:", 
      precision_score(df['flip_label'], df['df_copy_birch_label']))

print("\nRecall")
print("Recall score between flip_label and bow_birch_label is:", 
      recall_score(df['flip_label'], df['bow_birch_label']))

print("Recall score between flip_label and d2v_birch_label is:", 
      recall_score(df['flip_label'], df['d2v_birch_label']))

print("Recall score between flip_label and df_copy_birch_label is:", 
      recall_score(df['flip_label'], df['df_copy_birch_label']))

print("\nF1")
print("F1 score between label and bow_birch_label is:", 
      f1_score(df['flip_label'], df['bow_birch_label']))

print("F1 score between label and d2v_birch_label is:", 
      f1_score(df['flip_label'], df['d2v_birch_label']))

print("F1 score between label and df_copy_birch_label is:", 
      f1_score(df['flip_label'], df['df_copy_birch_label']))

Classification metrics for birch

Prediction Accuracy
Accuracy score between flip_label and bow_birch_label is: 0.5700243484534223
Accuracy score between flip_label and d2v_birch_label is: 0.49715934710073045
Accuracy score between flip_label and df_copy_birch_label is: 0.42417410647187903

Precision
Precision score between flip_label and bow_birch_label is: 0.5384895178890285
Precision score between flip_label and d2v_birch_label is: 0.06363636363636363
Precision score between flip_label and df_copy_birch_label is: 0.13963449457452884

Recall
Recall score between flip_label and bow_birch_label is: 0.9791366041366041
Recall score between flip_label and d2v_birch_label is: 0.00042087542087542086
Recall score between flip_label and df_copy_birch_label is: 0.0294011544011544

F1
F1 score between label and bow_birch_label is: 0.6948414899517856
F1 score between label and d2v_birch_label is: 0.0008362202843148966
F1 score between label and df_copy_birch_label is: 0.04857455051157246


In [10]:
# Comparison of metrics for hdbscan for dif features
print("Classification metrics for hdbscan")
print("\nPrediction Accuracy")
print("Accuracy score between flip_label and bow_hdbscan_label is:", 
      accuracy_score(df['flip_label'], df['bow_hdbscan_label']))

print("Accuracy score between flip_label and d2v_hdbscan_label is:", 
      accuracy_score(df['flip_label'], df['d2v_hdbscan_label']))

print("Accuracy score between flip_label and df_copy_hdbscan_label is:", 
      accuracy_score(df['flip_label'], df['df_copy_hdbscan_label']))

print("\nPrecision")
print("Precision score between flip_label and bow_hdbscan_label is:", 
      precision_score(df['flip_label'], df['bow_hdbscan_label']))

print("Precision score between flip_label and d2v_hdbscan_label is:", 
      precision_score(df['flip_label'], df['d2v_hdbscan_label']))

print("Precision score between flip_label and df_copy_hdbscan_label is:", 
      precision_score(df['flip_label'], df['df_copy_hdbscan_label']))

print("\nRecall")
print("Recall score between flip_label and bow_hdbscan_label is:", 
      recall_score(df['flip_label'], df['bow_hdbscan_label']))

print("Recall score between flip_label and d2v_hdbscan_label is:", 
      recall_score(df['flip_label'], df['d2v_hdbscan_label']))

print("Recall score between flip_label and df_copy_hdbscan_label is:", 
      recall_score(df['flip_label'], df['df_copy_hdbscan_label']))

print("\nF1")
print("F1 score between label and bow_hdbscan_label is:", 
      f1_score(df['flip_label'], df['bow_hdbscan_label']))

print("F1 score between label and d2v_hdbscan_label is:", 
      f1_score(df['flip_label'], df['d2v_hdbscan_label']))

print("F1 score between label and df_copy_hdbscan_label is:", 
      f1_score(df['flip_label'], df['df_copy_hdbscan_label']))

Classification metrics for hdbscan

Prediction Accuracy
Accuracy score between flip_label and bow_hdbscan_label is: 0.5513872606486909
Accuracy score between flip_label and d2v_hdbscan_label is: 0.5143836234105871
Accuracy score between flip_label and df_copy_hdbscan_label is: 0.5338623861484354

Precision
Precision score between flip_label and bow_hdbscan_label is: 0.5294584339427388
Precision score between flip_label and d2v_hdbscan_label is: 0.5076937965740831
Precision score between flip_label and df_copy_hdbscan_label is: 0.5183553597650514

Recall
Recall score between flip_label and bow_hdbscan_label is: 0.9228595478595478
Recall score between flip_label and d2v_hdbscan_label is: 0.9462481962481962
Recall score between flip_label and df_copy_hdbscan_label is: 0.9550865800865801

F1
F1 score between label and bow_hdbscan_label is: 0.6728771206873877
F1 score between label and d2v_hdbscan_label is: 0.6608301316369591
F1 score between label and df_copy_hdbscan_label is: 0.6719969541

# Results with All Data

In [11]:
# Reset df
df = pd.read_csv("./preproc_data/clustered_BoW_d2v_final.csv")

# Because we don't know which clustered group matches with our label,
# we need to also flip the clustered label and compare with label
# It is faster to flip our label than to flip multiple columns
# In the first pass, we will treat label=1 as depressive and 
# label=0 as non-depressive, then label=1 as non-depressive and
# label=0 as depressive
df['flip_label'] = (~df['label'].astype(bool)).astype(int)

# Increment all hdbscan_labels (since the range for their labels was [-1, 0])
df['bow_hdbscan_label'] = df['bow_hdbscan_label'] + 1
df['d2v_hdbscan_label'] = df['d2v_hdbscan_label'] + 1
df['df_copy_hdbscan_label'] = df['df_copy_hdbscan_label'] + 1

In [12]:
# Comparison of metrics for kmeans for dif features
print("Classification metrics for kmeans")
print("\nPrediction Accuracy")
print("Accuracy score between label and bow_kmeans_label is:", 
      accuracy_score(df['label'], df['bow_kmeans_label']))

print("Accuracy score between label and d2v_kmeans_label is:", 
      accuracy_score(df['label'], df['d2v_kmeans_label']))

print("Accuracy score between label and df_copy_kmeans_label is:", 
      accuracy_score(df['label'], df['df_copy_kmeans_label']))

print("\nPrecision")
print("Precision score between label and bow_kmeans_label is:", 
      precision_score(df['label'], df['bow_kmeans_label']))

print("Precision score between label and d2v_kmeans_label is:", 
      precision_score(df['label'], df['d2v_kmeans_label']))

print("Precision score between label and df_copy_kmeans_label is:", 
      precision_score(df['label'], df['df_copy_kmeans_label']))

print("\nRecall")
print("Recall score between label and bow_kmeans_label is:", 
      recall_score(df['label'], df['bow_kmeans_label']))

print("Recall score between label and d2v_kmeans_label is:", 
      recall_score(df['label'], df['d2v_kmeans_label']))

print("Recall score between label and df_copy_kmeans_label is:", 
      recall_score(df['label'], df['df_copy_kmeans_label']))

print("\nF1")
print("F1 score between label and bow_kmeans_label is:", 
      f1_score(df['label'], df['bow_kmeans_label']))

print("F1 score between label and d2v_kmeans_label is:", 
      f1_score(df['label'], df['d2v_kmeans_label']))

print("F1 score between label and df_copy_kmeans_label is:", 
      f1_score(df['label'], df['df_copy_kmeans_label']))

Classification metrics for kmeans

Prediction Accuracy
Accuracy score between label and bow_kmeans_label is: 0.2835122819251903
Accuracy score between label and d2v_kmeans_label is: 0.8161919419028267
Accuracy score between label and df_copy_kmeans_label is: 0.7619302401077298

Precision
Precision score between label and bow_kmeans_label is: 0.3263503086419753
Precision score between label and d2v_kmeans_label is: 0.835619759829507
Precision score between label and df_copy_kmeans_label is: 0.8239098437430309

Recall
Recall score between label and bow_kmeans_label is: 0.40680982037656
Recall score between label and d2v_kmeans_label is: 0.7872652511602183
Recall score between label and df_copy_kmeans_label is: 0.6662779233895207

F1
F1 score between label and bow_kmeans_label is: 0.36216511286887093
F1 score between label and d2v_kmeans_label is: 0.8107221345219302
F1 score between label and df_copy_kmeans_label is: 0.7367567387908399


In [13]:
# Comparison of metrics for birch for dif features
print("Classification metrics for birch")
print("\nPrediction Accuracy")
print("Accuracy score between label and bow_birch_label is:", 
      accuracy_score(df['label'], df['bow_birch_label']))

print("Accuracy score between label and d2v_birch_label is:", 
      accuracy_score(df['label'], df['d2v_birch_label']))

print("Accuracy score between label and df_copy_birch_label is:", 
      accuracy_score(df['label'], df['df_copy_birch_label']))

print("\nPrecision")
print("Precision score between label and bow_birch_label is:", 
      precision_score(df['label'], df['bow_birch_label']))

print("Precision score between label and d2v_birch_label is:", 
      precision_score(df['label'], df['d2v_birch_label']))

print("Precision score between label and df_copy_birch_label is:", 
      precision_score(df['label'], df['df_copy_birch_label']))

print("\nRecall")
print("Recall score between label and bow_birch_label is:", 
      recall_score(df['label'], df['bow_birch_label']))

print("Recall score between label and d2v_birch_label is:", 
      recall_score(df['label'], df['d2v_birch_label']))

print("Recall score between label and df_copy_birch_label is:", 
      recall_score(df['label'], df['df_copy_birch_label']))

print("\nF1")
print("F1 score between label and bow_birch_label is:", 
      f1_score(df['label'], df['bow_birch_label']))

print("F1 score between label and d2v_birch_label is:", 
      f1_score(df['label'], df['d2v_birch_label']))

print("F1 score between label and df_copy_birch_label is:", 
      f1_score(df['label'], df['df_copy_birch_label']))

Classification metrics for birch

Prediction Accuracy
Accuracy score between label and bow_birch_label is: 0.43031224825960973
Accuracy score between label and d2v_birch_label is: 0.5025429536737565
Accuracy score between label and df_copy_birch_label is: 0.5785670486106936

Precision
Precision score between label and bow_birch_label is: 0.4616601052039567
Precision score between label and d2v_birch_label is: 0.9400826446280992
Precision score between label and df_copy_birch_label is: 0.8632321885072802

Recall
Recall score between label and bow_birch_label is: 0.8388799384423017
Recall score between label and d2v_birch_label is: 0.005470459518599562
Recall score between label and df_copy_birch_label is: 0.186753071873422

F1
F1 score between label and bow_birch_label is: 0.5955639774827258
F1 score between label and d2v_birch_label is: 0.01087762078940448
F1 score between label and df_copy_birch_label is: 0.3070733828878697


In [14]:
# Comparison of metrics for hdbscan for dif features
print("Classification metrics for hdbscan")
print("\nPrediction Accuracy")
print("Accuracy score between label and bow_hdbscan_label is:", 
      accuracy_score(df['label'], df['bow_hdbscan_label']))

print("Accuracy score between label and d2v_hdbscan_label is:", 
      accuracy_score(df['label'], df['d2v_hdbscan_label']))

print("Accuracy score between label and df_copy_hdbscan_label is:", 
      accuracy_score(df['label'], df['df_copy_hdbscan_label']))

print("\nPrecision")
print("Precision score between label and bow_hdbscan_label is:", 
      precision_score(df['label'], df['bow_hdbscan_label']))

print("Precision score between label and d2v_hdbscan_label is:", 
      precision_score(df['label'], df['d2v_hdbscan_label']))

print("Precision score between label and df_copy_hdbscan_label is:", 
      precision_score(df['label'], df['df_copy_hdbscan_label']))

print("\nRecall")
print("Recall score between label and bow_hdbscan_label is:", 
      recall_score(df['label'], df['bow_hdbscan_label']))

print("Recall score between label and d2v_hdbscan_label is:", 
      recall_score(df['label'], df['d2v_hdbscan_label']))

print("Recall score between label and df_copy_hdbscan_label is:", 
      recall_score(df['label'], df['df_copy_hdbscan_label']))

print("\nF1")
print("F1 score between label and bow_hdbscan_label is:", 
      f1_score(df['label'], df['bow_hdbscan_label']))

print("F1 score between label and d2v_hdbscan_label is:", 
      f1_score(df['label'], df['d2v_hdbscan_label']))

print("F1 score between label and df_copy_hdbscan_label is:", 
      f1_score(df['label'], df['df_copy_hdbscan_label']))

Classification metrics for hdbscan

Prediction Accuracy
Accuracy score between label and bow_hdbscan_label is: 0.4493152661384377
Accuracy score between label and d2v_hdbscan_label is: 0.485565882338796
Accuracy score between label and df_copy_hdbscan_label is: 0.46623222036527157

Precision
Precision score between label and bow_hdbscan_label is: 0.4708821050013129
Precision score between label and d2v_hdbscan_label is: 0.49226321834630327
Precision score between label and df_copy_hdbscan_label is: 0.4816926026896938

Recall
Recall score between label and bow_hdbscan_label is: 0.8193305600307789
Recall score between label and d2v_hdbscan_label is: 0.9172097049558756
Recall score between label and df_copy_hdbscan_label is: 0.8879697982542621

F1
F1 score between label and bow_hdbscan_label is: 0.5980534980868466
F1 score between label and d2v_hdbscan_label is: 0.6406772259267347
F1 score between label and df_copy_hdbscan_label is: 0.6245750528541226


In [15]:
# Comparison of metrics for kmeans for dif features
print("Classification metrics for kmeans")
print("\nPrediction Accuracy")
print("Accuracy score between flip_label and bow_kmeans_label is:", 
      accuracy_score(df['flip_label'], df['bow_kmeans_label']))

print("Accuracy score between flip_label and d2v_kmeans_label is:", 
      accuracy_score(df['flip_label'], df['d2v_kmeans_label']))

print("Accuracy score between flip_label and df_copy_kmeans_label is:", 
      accuracy_score(df['flip_label'], df['df_copy_kmeans_label']))

print("\nPrecision")
print("Precision score between flip_label and bow_kmeans_label is:", 
      precision_score(df['flip_label'], df['bow_kmeans_label']))

print("Precision score between flip_label and d2v_kmeans_label is:", 
      precision_score(df['flip_label'], df['d2v_kmeans_label']))

print("Precision score between flip_label and df_copy_kmeans_label is:", 
      precision_score(df['flip_label'], df['df_copy_kmeans_label']))

print("\nRecall")
print("Recall score between flip_label and bow_kmeans_label is:", 
      recall_score(df['flip_label'], df['bow_kmeans_label']))

print("Recall score between flip_label and d2v_kmeans_label is:", 
      recall_score(df['flip_label'], df['d2v_kmeans_label']))

print("Recall score between flip_label and df_copy_kmeans_label is:", 
      recall_score(df['flip_label'], df['df_copy_kmeans_label']))

print("\nF1")
print("F1 score between label and bow_kmeans_label is:", 
      f1_score(df['flip_label'], df['bow_kmeans_label']))

print("F1 score between label and d2v_kmeans_label is:", 
      f1_score(df['flip_label'], df['d2v_kmeans_label']))

print("F1 score between label and df_copy_kmeans_label is:", 
      f1_score(df['flip_label'], df['df_copy_kmeans_label']))

Classification metrics for kmeans

Prediction Accuracy
Accuracy score between flip_label and bow_kmeans_label is: 0.7164877180748097
Accuracy score between flip_label and d2v_kmeans_label is: 0.1838080580971733
Accuracy score between flip_label and df_copy_kmeans_label is: 0.23806975989227014

Precision
Precision score between flip_label and bow_kmeans_label is: 0.6736496913580247
Precision score between flip_label and d2v_kmeans_label is: 0.16438024017049296
Precision score between flip_label and df_copy_kmeans_label is: 0.1760901562569691

Recall
Recall score between flip_label and bow_kmeans_label is: 0.839794151596768
Recall score between flip_label and d2v_kmeans_label is: 0.1548792804924971
Recall score between flip_label and df_copy_kmeans_label is: 0.14241054251635243

F1
F1 score between label and bow_kmeans_label is: 0.747602329165953
F1 score between label and d2v_kmeans_label is: 0.15948838908183668
F1 score between label and df_copy_kmeans_label is: 0.15746963683864146


In [16]:
# Comparison of metrics for birch for dif features
print("Classification metrics for birch")
print("\nPrediction Accuracy")
print("Accuracy score between flip_label and bow_birch_label is:", 
      accuracy_score(df['flip_label'], df['bow_birch_label']))

print("Accuracy score between flip_label and d2v_birch_label is:", 
      accuracy_score(df['flip_label'], df['d2v_birch_label']))

print("Accuracy score between flip_label and df_copy_birch_label is:", 
      accuracy_score(df['flip_label'], df['df_copy_birch_label']))

print("\nPrecision")
print("Precision score between flip_label and bow_birch_label is:", 
      precision_score(df['flip_label'], df['bow_birch_label']))

print("Precision score between flip_label and d2v_birch_label is:", 
      precision_score(df['flip_label'], df['d2v_birch_label']))

print("Precision score between flip_label and df_copy_birch_label is:", 
      precision_score(df['flip_label'], df['df_copy_birch_label']))

print("\nRecall")
print("Recall score between flip_label and bow_birch_label is:", 
      recall_score(df['flip_label'], df['bow_birch_label']))

print("Recall score between flip_label and d2v_birch_label is:", 
      recall_score(df['flip_label'], df['d2v_birch_label']))

print("Recall score between flip_label and df_copy_birch_label is:", 
      recall_score(df['flip_label'], df['df_copy_birch_label']))

print("\nF1")
print("F1 score between label and bow_birch_label is:", 
      f1_score(df['flip_label'], df['bow_birch_label']))

print("F1 score between label and d2v_birch_label is:", 
      f1_score(df['flip_label'], df['d2v_birch_label']))

print("F1 score between label and df_copy_birch_label is:", 
      f1_score(df['flip_label'], df['df_copy_birch_label']))

Classification metrics for birch

Prediction Accuracy
Accuracy score between flip_label and bow_birch_label is: 0.5696877517403903
Accuracy score between flip_label and d2v_birch_label is: 0.49745704632624355
Accuracy score between flip_label and df_copy_birch_label is: 0.42143295138930637

Precision
Precision score between flip_label and bow_birch_label is: 0.5383398947960433
Precision score between flip_label and d2v_birch_label is: 0.05991735537190083
Precision score between flip_label and df_copy_birch_label is: 0.1367678114927198

Recall
Recall score between flip_label and bow_birch_label is: 0.9782849172758753
Recall score between flip_label and d2v_birch_label is: 0.00034869180454020777
Recall score between flip_label and df_copy_birch_label is: 0.02959070796460177

F1
F1 score between label and bow_birch_label is: 0.6945024178094178
F1 score between label and d2v_birch_label is: 0.0006933486348204466
F1 score between label and df_copy_birch_label is: 0.04865463316265001


In [17]:
# Comparison of metrics for hdbscan for dif features
print("Classification metrics for hdbscan")
print("\nPrediction Accuracy")
print("Accuracy score between flip_label and bow_hdbscan_label is:", 
      accuracy_score(df['flip_label'], df['bow_hdbscan_label']))

print("Accuracy score between flip_label and d2v_hdbscan_label is:", 
      accuracy_score(df['flip_label'], df['d2v_hdbscan_label']))

print("Accuracy score between flip_label and df_copy_hdbscan_label is:", 
      accuracy_score(df['flip_label'], df['df_copy_hdbscan_label']))

print("\nPrecision")
print("Precision score between flip_label and bow_hdbscan_label is:", 
      precision_score(df['flip_label'], df['bow_hdbscan_label']))

print("Precision score between flip_label and d2v_hdbscan_label is:", 
      precision_score(df['flip_label'], df['d2v_hdbscan_label']))

print("Precision score between flip_label and df_copy_hdbscan_label is:", 
      precision_score(df['flip_label'], df['df_copy_hdbscan_label']))

print("\nRecall")
print("Recall score between flip_label and bow_hdbscan_label is:", 
      recall_score(df['flip_label'], df['bow_hdbscan_label']))

print("Recall score between flip_label and d2v_hdbscan_label is:", 
      recall_score(df['flip_label'], df['d2v_hdbscan_label']))

print("Recall score between flip_label and df_copy_hdbscan_label is:", 
      recall_score(df['flip_label'], df['df_copy_hdbscan_label']))

print("\nF1")
print("F1 score between label and bow_hdbscan_label is:", 
      f1_score(df['label'], df['bow_hdbscan_label']))

print("F1 score between label and d2v_hdbscan_label is:", 
      f1_score(df['label'], df['d2v_hdbscan_label']))

print("F1 score between label and df_copy_hdbscan_label is:", 
      f1_score(df['label'], df['df_copy_hdbscan_label']))

Classification metrics for hdbscan

Prediction Accuracy
Accuracy score between flip_label and bow_hdbscan_label is: 0.5506847338615624
Accuracy score between flip_label and d2v_hdbscan_label is: 0.5144341176612041
Accuracy score between flip_label and df_copy_hdbscan_label is: 0.5337677796347284

Precision
Precision score between flip_label and bow_hdbscan_label is: 0.5291178949986871
Precision score between flip_label and d2v_hdbscan_label is: 0.5077367816536967
Precision score between flip_label and df_copy_hdbscan_label is: 0.5183073973103062

Recall
Recall score between flip_label and bow_hdbscan_label is: 0.9207267218160831
Recall score between flip_label and d2v_hdbscan_label is: 0.9461090804155444
Recall score between flip_label and df_copy_hdbscan_label is: 0.955535782993459

F1
F1 score between label and bow_hdbscan_label is: 0.5980534980868466
F1 score between label and d2v_hdbscan_label is: 0.6406772259267347
F1 score between label and df_copy_hdbscan_label is: 0.62457505285