In [17]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_score, \
          accuracy_score, fbeta_score, f1_score, \
          roc_auc_score, average_precision_score, \
          log_loss, PrecisionRecallDisplay, RocCurveDisplay


In [18]:
df = pd.read_csv('res.csv')

df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label,baseline_pred,fancy_pred
0,0,0,I love sci-fi and am willing to put up with a ...,0,0,0.001385
1,1,1,"Worth the entertainment value of a rental, esp...",0,0,0.005809
2,2,2,its a totally average film with a few semi-alr...,0,0,0.002432
3,3,3,STAR RATING: ***** Saturday Night **** Friday ...,0,0,0.001326
4,4,4,"First off let me say, If you haven't enjoyed a...",0,1,0.996071
...,...,...,...,...,...,...
24995,24995,24995,Just got around to seeing Monster Man yesterda...,1,1,0.995831
24996,24996,24996,I got this as part of a competition prize. I w...,1,1,0.946916
24997,24997,24997,I got Monster Man in a box set of three films ...,1,1,0.957072
24998,24998,24998,"Five minutes in, i started to feel how naff th...",1,0,0.990291


# Baseline

In [21]:
y_pred_class = df['baseline_pred'] > 0.5
preds = df['baseline_pred']
test_labels = df['label']

tn, fp, fn, tp = confusion_matrix(test_labels, y_pred_class).ravel()
false_positive_rate = fp / (fp + tn)
false_negative_rate = fn / (tp + fn)
true_negative_rate = tn / (tn + fp)
recall = recall_score(test_labels, y_pred_class) # or optionally tp / (tp + fn)
precision = precision_score(test_labels, y_pred_class)
accuracy = accuracy_score(test_labels, y_pred_class)
f1= f1_score(test_labels, y_pred_class)
f2 = fbeta_score(test_labels, y_pred_class, beta = 2)
roc_auc = roc_auc_score(test_labels, preds)
avg_precision = average_precision_score(test_labels, preds)
loss = log_loss(test_labels, preds)


print("true positive", tp)
print("true negative", tn)
print("false positive", fp)
print("false negative", fn)

print("false positive rate", false_positive_rate)
print("false negetive rate", false_negative_rate)
print("false negetive rate", false_negative_rate)
print("true negetive rate", true_negative_rate)
print("recall", recall)
print("precision", precision)
print("accuracy", accuracy)
print("f1", f1)
print("f2", f2)
print("roc_auc", roc_auc)
print("avg_precision", avg_precision)
print("loss", loss)

true positive 11088
true negative 10895
false positive 1605
false negative 1412
false positive rate 0.1284
false negetive rate 0.11296
false negetive rate 0.11296
true negetive rate 0.8716
recall 0.88704
precision 0.8735523516899079
accuracy 0.87932
f1 0.8802445123645458
f2 0.8843092530266538
roc_auc 0.8793200000000001
avg_precision 0.8313558780430159
loss 4.16819086949284


# Fancy

In [22]:
y_pred_class = df['fancy_pred'] > 0.5
preds = df['fancy_pred']
test_labels = df['label']

tn, fp, fn, tp = confusion_matrix(test_labels, y_pred_class).ravel()
false_positive_rate = fp / (fp + tn)
false_negative_rate = fn / (tp + fn)
true_negative_rate = tn / (tn + fp)
recall = recall_score(test_labels, y_pred_class) # or optionally tp / (tp + fn)
precision = precision_score(test_labels, y_pred_class)
accuracy = accuracy_score(test_labels, y_pred_class)
f1= f1_score(test_labels, y_pred_class)
f2 = fbeta_score(test_labels, y_pred_class, beta = 2)
roc_auc = roc_auc_score(test_labels, preds)
avg_precision = average_precision_score(test_labels, preds)
loss = log_loss(test_labels, preds)


print("true positive", tp)
print("true negative", tn)
print("false positive", fp)
print("false negative", fn)

print("false positive rate", false_positive_rate)
print("false negetive rate", false_negative_rate)
print("false negetive rate", false_negative_rate)
print("true negetive rate", true_negative_rate)
print("recall", recall)
print("precision", precision)
print("accuracy", accuracy)
print("f1", f1)
print("f2", f2)
print("roc_auc", roc_auc)
print("avg_precision", avg_precision)
print("loss", loss)

true positive 11730
true negative 11568
false positive 932
false negative 770
false positive rate 0.07456
false negetive rate 0.0616
false negetive rate 0.0616
true negetive rate 0.92544
recall 0.9384
precision 0.926393934607487
accuracy 0.93192
f1 0.9323583180987203
f2 0.9359739555073251
roc_auc 0.980803152
avg_precision 0.979465607339773
loss 0.2728717570501193


In [33]:
thresh = 0.28

middle_df = df[(df['fancy_pred'] > 0.5 - thresh) & (df['fancy_pred'] < 0.5 + thresh)]
middle_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,text,label,baseline_pred,fancy_pred
160,160,160,At the heart of almost every truly great crime...,0,0,0.248236
183,183,183,I was at Wrestlemania VI in Toronto as a 10 ye...,0,0,0.241949
311,311,311,I bought Jack-O a number of months ago at a Bl...,0,1,0.774665
405,405,405,"First of all, the genre of this movie isn't co...",0,1,0.767297
735,735,735,The CinemaScope color cinematography of Leon S...,0,1,0.224134
...,...,...,...,...,...,...
22766,22766,22766,This movie is still alive and kicking today th...,1,1,0.750530
23563,23563,23563,The Falcon and the Snowman is based on a true ...,1,0,0.764648
23777,23777,23777,An interesting companion piece to true documen...,1,1,0.761285
24195,24195,24195,***SPOIERS*** Atlanta crime auctioneer with Bu...,1,1,0.227503


In [37]:
middle_df['text'].iloc[0]

'At the heart of almost every truly great crime thriller is a carefully considered, methodically planned-out high stakes super-crime, which 9 times out of 10 is committed by a bunch of likable, grey-scale morality underdogs for who life isn\'t fair, for whom getting back at the man is, well, something worth cheering for. First-time screenwriter James V. Simpson\'s script for Armored gets this half right. He made extra-double-sure that we\'ve got nothing but sympathy for the recently orphaned, Iraq war veteran Ty Hackett (Stomp the Yard\'s Columbus Short), who\'s about to have his house taken away by an evil bank (brother, I\'ve been there). And he gave Ty a good family friend in Mike (Matt Dillon) who is super nice and gets him a job at the armored car company that he works at with Baines (Lawrence Fishbourne) and some weird French dude (Jean Reno). These guys like to have fun and play pranks, but they are also serious armored car guys too, so that means they carry guns and are tough.<

# Bootstrap resampling

In [44]:
samplings = []
frac = 0.01

for i in range(10000):
    data = df.sample(frac=frac)
    
    y_pred_class = data['fancy_pred'] > 0.5
    test_labels = data['label']
    accuracy_fancy = accuracy_score(test_labels, y_pred_class)
    
    y_pred_class = data['baseline_pred'] > 0.5
    test_labels = data['label']
    accuracy_baseline = accuracy_score(test_labels, y_pred_class)
    
    samplings.append(accuracy_fancy > accuracy_baseline)
    
print(f"Bootstrap sampling with {frac}", sum(samplings)/len(samplings)*100)

Bootstrap sampling with 0.01 99.41


In [45]:
99.41 - 100

-0.5900000000000034