<a href="https://colab.research.google.com/github/ChrisBagdon/Citation_Classification/blob/main/tools.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from string import punctuation
from math import log
import csv
import numpy as np
import pandas as pd

In [32]:
### Tokenize method
# Takes in a text string and tokens dictionary
# Updates tokens dictionary with counts of tokens from text
def tokenize(text, tokens):
  cur_token =""
  for char in text:
    # Check if is apart of token
    if char.isalnum() or char == "'":
      cur_token += char
      continue
    # Check for space
    elif char == " ":
      if len(cur_token) > 0:
        if cur_token in tokens:
          tokens[cur_token] += 1
          cur_token = ""
          continue
        else:
          tokens[cur_token] = 1
          cur_token = ""
          continue
    # Check if punctuation
    else:
      if len(cur_token) > 0:
        if cur_token in tokens:
          tokens[cur_token] += 1
          cur_token = ""
        else:
          tokens[cur_token] = 1
          cur_token = ""
      if char in tokens:
        tokens[char] += 1
        continue
      else:
        tokens[char] = 1
        continue
  if len(cur_token) > 0:
    if cur_token in tokens:
          tokens[cur_token] += 1
          cur_token = ""
    else:
      tokens[cur_token] = 1
      cur_token = ""


In [33]:
# First test of the tokenize method
tokens = {}
text_1 = "By clustering with lowly aggressive close kin (King 1989a,b; Viblanc et al. 2010; Arnaud, Dobson & Murie 2012), breeding females may decrease the time/energy cost of maintaining territorial boundaries (Festa-Bianchet & Boag 1982; Murie & Harris 1988), which could ultimately lead to increases in net energy income (TA) or higher allocations in somatic or reproductive functions."
tokenize(text_1, tokens)

In [34]:
# Second test of the tokenize method
text_2 = "Ophthalmic symptoms are rare manifestations of the intracranial arachnoid cyst, and include unilateral exophthalmos, visual field abnormality, decreased visual acuity and isolated palsies of the third, fourth and sixth cranial nerves [1–5]."
tokenize(text_2, tokens)

In [35]:
# Display tokens dict results after two tests
print(tokens.items())

dict_items([('By', 1), ('clustering', 1), ('with', 1), ('lowly', 1), ('aggressive', 1), ('close', 1), ('kin', 1), ('(', 3), ('King', 1), ('1989a', 1), (',', 8), ('b', 1), (';', 3), ('Viblanc', 1), ('et', 1), ('al', 1), ('.', 3), ('2010', 1), ('Arnaud', 1), ('Dobson', 1), ('&', 3), ('Murie', 2), ('2012', 1), (')', 3), ('breeding', 1), ('females', 1), ('may', 1), ('decrease', 1), ('the', 3), ('time', 1), ('/', 1), ('energy', 2), ('cost', 1), ('of', 3), ('maintaining', 1), ('territorial', 1), ('boundaries', 1), ('Festa', 1), ('-', 1), ('Bianchet', 1), ('Boag', 1), ('1982', 1), ('Harris', 1), ('1988', 1), ('which', 1), ('could', 1), ('ultimately', 1), ('lead', 1), ('to', 1), ('increases', 1), ('in', 2), ('net', 1), ('income', 1), ('TA', 1), ('or', 2), ('higher', 1), ('allocations', 1), ('somatic', 1), ('reproductive', 1), ('functions', 1), ('Ophthalmic', 1), ('symptoms', 1), ('are', 1), ('rare', 1), ('manifestations', 1), ('intracranial', 1), ('arachnoid', 1), ('cyst', 1), ('and', 3), ('in

In [36]:
### Baseline classifier: Naive Bayes
class naive_bayes:
  def __init__(self):
      self.labels = {}
      self.doc_count = 0
      self.bin_size = 0
  
  def train(self, X, Y):
    for string, label in zip(X, Y):
      # Count instances of labels
      if label not in self.labels:
        self.labels[label] = {'count':1, 'terms':{}}
      else:
        self.labels[label]['count'] += 1
      # Count tokens from document
      tokenize(string, self.labels[label]['terms'])
      # Increase total document count
      self.doc_count += 1
    
    # Tally bin_size for smoothing
    terms_list = []
    for label, labels_dic in self.labels.items():
      terms_list = terms_list + list(labels_dic['terms'].keys())
    self.bin_size += len(set(terms_list))
    # Calculate class statistics
    for label, labels_dic in self.labels.items():
      # Calculate label prior probability
      self.labels[label]["prior"] = labels_dic['count'] / self.doc_count
      # Save total number of tokens in label + smoothing
      self.labels[label]["term_count"] = sum(labels_dic['terms'].values())+self.bin_size
      # Calculate probability of each token in label + smoothing
      terms = labels_dic['terms'].keys()
      #for term in terms:
        #print((labels_dic['terms'][term]+1) / labels_dic["term_count"])
      self.labels[label]["term_probs"] = {term:(labels_dic['terms'][term]+1) /
                                          labels_dic["term_count"]
                                           for term in terms}

  def predict(self, X, use_log=True):
    predictions = []

    for string in X:
      tokens = {}
      tokenize(string, tokens)
      probabilities = []
      for label, label_dic in self.labels.items():
        if use_log:
          prob = sum(log(label_dic["term_probs"][token])*count 
                        if token in label_dic['terms']
                        else log(1/label_dic['term_count'])*count 
                        for token, count in tokens.items()) \
                  + log(label_dic['prior'])
        else:
          prob = label_dic['prior']
          for token, count in tokens.items():
            if token in label_dic['terms'].keys():
              prob = prob*(label_dic["term_probs"][token]**count)
            else:
              prob = prob*((1/label_dic['term_count'])**count)
              #print(prob)
        probabilities.append((label, prob))
      predictions.append(max(probabilities, key=lambda item:item[1])[0])

    return predictions

In [37]:
### Evaluation method
# Takes predictions and gold_standard lists
# Returns a confusion matrix (2D list) and scores (P, R, F-1 values)
def evaluate(predictions, gold_standard):
    # Collect all unique labels from predictions and gold_std
    labels_set = set(predictions + gold_standard)
    labels = {}
    for i, label in enumerate(labels_set):
        labels[label] = i
    # Create confusion matrix
    confusion_matrix = np.zeros((len(labels_set),len(labels_set)))
    for pred, gold in zip(predictions, gold_standard):
        confusion_matrix[labels[pred]][labels[gold]] += 1
    labels_index = list(labels_set); labels_index.append('overall')
    columns = []
    # Create scores table
    scores = pd.DataFrame(np.zeros((len(labels_set), 3)))
    scores.columns = ['Precision', 'Recall', 'F1']
    overall_TP = 0
    # Calculate P, R, F1 and populate scores table
    for label in labels_set:
        i = labels[label]
        scores['Precision'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=0)[i]
        scores['Recall'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=1)[i]
        # Possible error case: P == 0 == R; divide by 0
        if scores['Precision'][i] == 0 and scores['Recall'][i] == 0:
            scores['F1'][i] = 0
        else:
            scores['F1'][i] = 2 * (scores['Precision'][i]*scores['Recall'][i]/(scores['Precision'][i]+scores['Recall'][i]))
        overall_TP += confusion_matrix[i][i]
    scores.loc[len(labels_set)] = [overall_TP / np.sum(confusion_matrix)] * 3
    scores.index = labels_index
    return (confusion_matrix, scores)

# Testing the Naive Bayes classifier (color vs. size)

In [38]:
# Elementary train/test sets for Naive Bayes with color vs. size
X_train_color = ['Red Blue Blue',
                 'Blue Blue Green',
                 'Blue Yellow',
                 'Big Small Blue']
Y_train_color = ['color', 'color', 'color', 'size']
X_test_color = ['Blue Blue Blue Big Small']

In [39]:
# Instantiate and train color model
color_model = naive_bayes()
color_model.train(X_train_color, Y_train_color)

In [40]:
# Make predictions with color model
color_predictions = color_model.predict(X_test_color)
color_predictions

['color']

In [41]:
# Color model values
color_model.labels

{'color': {'count': 3,
  'terms': {'Red': 1, 'Blue': 5, 'Green': 1, 'Yellow': 1},
  'prior': 0.75,
  'term_count': 14,
  'term_probs': {'Red': 0.14285714285714285,
   'Blue': 0.42857142857142855,
   'Green': 0.14285714285714285,
   'Yellow': 0.14285714285714285}},
 'size': {'count': 1,
  'terms': {'Big': 1, 'Small': 1, 'Blue': 1},
  'prior': 0.25,
  'term_count': 9,
  'term_probs': {'Big': 0.2222222222222222,
   'Small': 0.2222222222222222,
   'Blue': 0.2222222222222222}}}

# Applying NB classifier to citation data

Here, we are applying our Naive Bayes classifier above to the citation datasets used in this project. In this step, we are only taking the citation strings as data. Moving forward, we will be adding the sentiment feature to our data after the sentiment classifier is established.

In [12]:
# Open training set
with open('scicite/tsv/train.tsv') as train_file_text:
    train_data_text = csv.reader(train_file_text, delimiter="\t")
    X_train_text, Y_train_text = [],[]
    for row in train_data_text:
        X_train_text.append(row[2])
        Y_train_text.append(row[3])

In [13]:
# Training set 'label' values
Y_train_text[:5]

['background', 'background', 'background', 'background', 'background']

In [14]:
# Training set 'text' values
X_train_text[:5]

['However, how frataxin interacts with the Fe-S cluster biosynthesis components remains unclear as direct one-to-one interactions with each component were reported (IscS [12,22], IscU/Isu1 [6,11,16] or ISD11/Isd11 [14,15]).',
 'In the study by Hickey et al. (2012), spikes were sampled from the field at the point of physiological\\nrobinson et al.: genomic regions influencing root traits in barley 11 of 13\\nmaturity, dried, grain threshed by hand, and stored at −20C to preserve grain dormancy before germination testing.',
 'The drug also reduces catecholamine secretion, thereby reducing stress and leading to a modest (10-20%) reduction in heart rate and blood pressure, which may be particularly beneficial in patients with cardiovascular disease.(7) Unlike midazolam, dexmedetomidine does not affect the ventilatory response to carbon dioxide.',
 'By clustering with lowly aggressive close kin (King 1989a,b; Viblanc et al. 2010; Arnaud, Dobson & Murie 2012), breeding females may decrease t

In [15]:
# Open development sets
with open('scicite/tsv/dev.tsv') as dev_file_text:
    dev_data_text = csv.reader(dev_file_text, delimiter="\t")
    X_dev_text, Y_dev_text = [],[]
    for row in dev_data_text:
        X_dev_text.append(row[2])
        Y_dev_text.append(row[3])

In [16]:
# Instantiate and train text model
text_model = naive_bayes()
text_model.train(X_train_text, Y_train_text)

In [17]:
# Text model's label-wise token counts
text_model.labels

{'background': {'count': 4840,
  'terms': {'However': 112,
   ',': 14305,
   'how': 30,
   'frataxin': 4,
   'interacts': 16,
   'with': 1118,
   'the': 6206,
   'Fe': 14,
   '-': 3404,
   'S': 49,
   'cluster': 16,
   'biosynthesis': 12,
   'components': 42,
   'remains': 21,
   'unclear': 11,
   'as': 1081,
   'direct': 38,
   'one': 148,
   'to': 2722,
   'interactions': 35,
   'each': 66,
   'component': 19,
   'were': 322,
   'reported': 234,
   '(': 6108,
   'IscS': 3,
   '[': 2406,
   '12': 191,
   '22': 114,
   ']': 2396,
   'IscU': 2,
   '/': 423,
   'Isu1': 1,
   '6': 234,
   '11': 203,
   '16': 152,
   'or': 654,
   'ISD11': 1,
   'Isd11': 2,
   '14': 160,
   '15': 160,
   ')': 6128,
   '.': 10488,
   'In': 335,
   'study': 184,
   'by': 924,
   'Hickey': 4,
   'et': 5107,
   'al': 5107,
   '2012': 268,
   'spikes': 2,
   'sampled': 3,
   'from': 642,
   'field': 37,
   'at': 365,
   'point': 44,
   'of': 5290,
   'physiological': 21,
   '\\': 580,
   'nrobinson': 1,
   ':':

In [18]:
# Make predictions with text model
predictions_text = text_model.predict(X_dev_text)

In [19]:
# Get confusion matrix and scores of text model
cf_text, scores_text = evaluate(predictions_text, Y_dev_text)
print(scores_text)

            Precision    Recall        F1
method       0.725490  0.740000  0.732673
result       0.560976  0.821429  0.666667
background   0.871747  0.805842  0.837500
overall      0.789301  0.789301  0.789301


# NB model's term probabilities (only text)

In [20]:
background_text = pd.DataFrame(text_model.labels['background']['term_probs'].items()).sort_values(by=[1], ascending=False).reset_index()
result_text = pd.DataFrame(text_model.labels['result']['term_probs'].items()).sort_values(by=[1], ascending=False).reset_index()
method_text = pd.DataFrame(text_model.labels['method']['term_probs'].items()).sort_values(by=[1], ascending=False).reset_index()

term_prob_text = pd.DataFrame([background_text[0][:50],background_text[1][:50],result_text[0][:50],result_text[1][:50],method_text[0][:50],method_text[1][:50],]).transpose()

In [21]:
term_prob_text.columns = ['background','background_prob', 'result', 'result_prob', 'method', 'method_prob']
term_prob_text.head(10)

Unnamed: 0,background,background_prob,result,result_prob,method,method_prob
0,",",0.056401,",",0.034595,",",0.039839
1,.,0.041353,.,0.028823,.,0.030278
2,the,0.024471,the,0.018712,the,0.027081
3,),0.024163,of,0.016628,(,0.022378
4,(,0.024085,in,0.01499,),0.022297
5,and,0.022898,(,0.014887,and,0.01874
6,of,0.02086,),0.014532,of,0.017057
7,et,0.020138,and,0.014291,-,0.01483
8,al,0.020138,et,0.013181,[,0.011442
9,;,0.01603,al,0.013169,],0.011391


# Exploration of where the baseline makes mistakes

In [22]:
cf

NameError: name 'cf' is not defined

In [None]:
dev_tsv = pd.read_csv('scicite/tsv/dev.tsv', sep='\t', 
                      names=["citingPaperID", "source", "string", "true_label"])
dev_tsv.head()

In [None]:
dev_tsv["prediction_label"] = predictions
dev_tsv.head()

In [None]:
dev_background = dev_tsv.copy()
dev_background = dev_background[dev_background['true_label'] == "background"]
dev_background = dev_background[dev_background['prediction_label'] != "background"]
dev_background.head()

In [None]:
list(dev_background[dev_background['prediction_label'] == "method"]['string'])

In [None]:
dev_result = dev_tsv.copy()
dev_result = dev_result[dev_result['true_label'] == "result"]
dev_result = dev_result[dev_result['prediction_label'] != "result"]
dev_result.head()

In [None]:
list(dev_result[dev_result['prediction_label'] == "method"]['string'])

In [None]:
dev_method = dev_tsv.copy()
dev_method = dev_method[dev_method['true_label'] == "method"]
dev_method = dev_method[dev_method['prediction_label'] != "method"]
dev_method.head()

# Adding sentiment data and applying NB classifier

The sentiment classifier has been implemented and the original citation data has been labeled with sentiment labels (positive, neutral, negative). New CSVs have been created with the sentiment feature, which will be appended to each of the texts when creating the training and dev sets for the new 'sentiment' Naive Bayes model below. The Naive Bayes baseline model remains unchanged from above.

In [48]:
# Open sentiment training set
with open('scicite/sentiment_csv/train_sent.csv') as train_file_sent:
    train_data_sent = csv.reader(train_file_sent, delimiter=",")
    # Skip the first row (header)
    next(train_data_sent)
    X_train_sent, Y_train_sent = [],[]
    for row in train_data_sent:
        # Append sentiment feature as an additional token
        X_train_sent.append(row[1] + ' ' + 'Sentiment' + row[3])
        Y_train_sent.append(row[2])

In [49]:
# Training set 'label' values
Y_train_sent[:5]

['background', 'background', 'background', 'background', 'background']

In [50]:
# Training set 'text + sent' values
X_train_sent[:5]

['However how frataxin interacts with the Fe-S cluster biosynthesis components remains unclear as direct one-to-one interactions with each component were reported (IscS [1222] IscU/Isu1 [61116] or ISD11/Isd11 [1415]). Sentimentneutral',
 'In the study by Hickey et al. (2012) spikes were sampled from the field at the point of physiological\\nrobinson et al.: genomic regions influencing root traits in barley 11 of 13\\nmaturity dried grain threshed by hand and stored at −20C to preserve grain dormancy before germination testing. Sentimentpositive',
 'The drug also reduces catecholamine secretion thereby reducing stress and leading to a modest (10-20%) reduction in heart rate and blood pressure which may be particularly beneficial in patients with cardiovascular disease.(7) Unlike midazolam dexmedetomidine does not affect the ventilatory response to carbon dioxide. Sentimentneutral',
 'By clustering with lowly aggressive close kin (King 1989ab; Viblanc et al. 2010; Arnaud Dobson & Murie 2

In [51]:
# Open sentiment dev set
with open('scicite/sentiment_csv/dev_sent.csv') as dev_file_sent:
    dev_data_sent = csv.reader(dev_file_sent, delimiter=",")
    # Skip the first row (header)
    next(dev_data_sent)
    X_dev_sent, Y_dev_sent = [], []
    for row in dev_data_sent:
        # Append sentiment feature as an additional token
        X_dev_sent.append(row[3] + ' ' + 'Sentiment' + row[5])
        Y_dev_sent.append(row[4])

In [52]:
# Instantiate and train sentiment model
sentiment_model = naive_bayes()
sentiment_model.train(X_train_sent, Y_train_sent)

In [53]:
# Sentiment model's label-wise token counts
sentiment_model.labels

{'background': {'count': 4840,
  'terms': {'However': 112,
   'how': 30,
   'frataxin': 4,
   'interacts': 16,
   'with': 1118,
   'the': 6206,
   'Fe': 14,
   '-': 3404,
   'S': 49,
   'cluster': 16,
   'biosynthesis': 12,
   'components': 42,
   'remains': 21,
   'unclear': 11,
   'as': 1081,
   'direct': 38,
   'one': 148,
   'to': 2722,
   'interactions': 35,
   'each': 66,
   'component': 19,
   'were': 322,
   'reported': 234,
   '(': 6108,
   'IscS': 3,
   '[': 2406,
   '1222': 1,
   ']': 2396,
   'IscU': 2,
   '/': 423,
   'Isu1': 1,
   '61116': 1,
   'or': 654,
   'ISD11': 1,
   'Isd11': 2,
   '1415': 4,
   ')': 6128,
   '.': 10488,
   'Sentimentneutral': 3863,
   'In': 335,
   'study': 184,
   'by': 924,
   'Hickey': 4,
   'et': 5107,
   'al': 5107,
   '2012': 268,
   'spikes': 2,
   'sampled': 3,
   'from': 642,
   'field': 37,
   'at': 365,
   'point': 44,
   'of': 5290,
   'physiological': 21,
   '\\': 580,
   'nrobinson': 1,
   ':': 286,
   'genomic': 20,
   'regions': 45

In [54]:
# Make predictions with sentiment model
predictions_sentiment = sentiment_model.predict(X_dev_sent)

In [55]:
# Get confusion matrix and scores of sentiment model
cf_sentiment, scores_sentiment = evaluate(predictions_sentiment, Y_dev_sent)
print(scores_sentiment)

            Precision    Recall        F1
method       0.737255  0.737255  0.737255
result       0.585366  0.837209  0.688995
background   0.862454  0.806957  0.833783
overall      0.790393  0.790393  0.790393


# Shuffling of sentiment label assignments

Here, we're experimenting with the idea of shuffling the sentiment classifier's results amongst the data. In other words, we're trying to see how the NB/DistilBERT models perform when given random sentiment labels to the citations. This does not mean that the sentiment classifier is randomly assigning new sentiment labels to the data; rather, the existing sentiment labels created by the sentiment classifier are being shuffled. Therefore, the distribution of sentiment classes (positive, negative, neutral) remains the same, but intent-sentiment classwise distributions will change.

In [24]:
# Shuffling
train_sent_shuffled = pd.read_csv('scicite/sentiment_csv/train_sent.csv')
train_sent_shuffled['sentiment_shuffled'] = list(train_sent_shuffled['sentiment'][np.random.RandomState(seed=1).permutation(train_sent_shuffled.index)])
train_sent_shuffled.to_csv('scicite/sentiment_csv/train_sent_shuffled.csv')

dev_sent_shuffled = pd.read_csv('scicite/sentiment_csv/dev_sent.csv')
dev_sent_shuffled['sentiment_shuffled'] = list(dev_sent_shuffled['sentiment'][np.random.RandomState(seed=1).permutation(dev_sent_shuffled.index)])
dev_sent_shuffled.to_csv('scicite/sentiment_csv/dev_sent_shuffled.csv')

test_sent_shuffled = pd.read_csv('scicite/sentiment_csv/test_sent.csv')
test_sent_shuffled['sentiment_shuffled'] = list(test_sent_shuffled['sentiment'][np.random.RandomState(seed=1).permutation(test_sent_shuffled.index)])
test_sent_shuffled.to_csv('scicite/sentiment_csv/test_sent_shuffled.csv')

In [25]:
# Open shuffled sentiment training set
with open('scicite/sentiment_csv/train_sent_shuffled.csv') as train_file_sent_shuffled:
    train_data_sent_shuffled = csv.reader(train_file_sent_shuffled, delimiter=",")
    # Skip the first row (header)
    next(train_data_sent_shuffled)
    X_train_sent_shuffled, Y_train_sent_shuffled = [],[]
    for row in train_data_sent_shuffled:
        # Append sentiment feature as an additional token
        X_train_sent_shuffled.append(row[2] + ' ' + 'Sentiment' + row[7])
        Y_train_sent_shuffled.append(row[3])

In [28]:
# Training set 'label' values
Y_train_sent_shuffled[:5]

['background', 'background', 'background', 'background', 'background']

In [29]:
# Training set 'text + sent' values
X_train_sent_shuffled[:5]

['However how frataxin interacts with the Fe-S cluster biosynthesis components remains unclear as direct one-to-one interactions with each component were reported (IscS [1222] IscU/Isu1 [61116] or ISD11/Isd11 [1415]). Sentimentpositive',
 'In the study by Hickey et al. (2012) spikes were sampled from the field at the point of physiological\\nrobinson et al.: genomic regions influencing root traits in barley 11 of 13\\nmaturity dried grain threshed by hand and stored at −20C to preserve grain dormancy before germination testing. Sentimentneutral',
 'The drug also reduces catecholamine secretion thereby reducing stress and leading to a modest (10-20%) reduction in heart rate and blood pressure which may be particularly beneficial in patients with cardiovascular disease.(7) Unlike midazolam dexmedetomidine does not affect the ventilatory response to carbon dioxide. Sentimentneutral',
 'By clustering with lowly aggressive close kin (King 1989ab; Viblanc et al. 2010; Arnaud Dobson & Murie 2

In [30]:
# Open shuffled sentiment dev set
with open('scicite/sentiment_csv/dev_sent_shuffled.csv') as dev_file_sent_shuffled:
    dev_data_sent_shuffled = csv.reader(dev_file_sent_shuffled, delimiter=",")
    # Skip the first row (header)
    next(dev_data_sent_shuffled)
    X_dev_sent_shuffled, Y_dev_sent_shuffled = [], []
    for row in dev_data_sent_shuffled:
        # Append sentiment feature as an additional token
        X_dev_sent_shuffled.append(row[4] + ' ' + 'Sentiment' + row[7])
        Y_dev_sent_shuffled.append(row[5])

In [42]:
# Instantiate and train shuffled sentiment model
shuffled_sentiment_model = naive_bayes()
shuffled_sentiment_model.train(X_train_sent_shuffled, Y_train_sent_shuffled)

In [43]:
# Shuffled sentiment model's label-wise token counts
shuffled_sentiment_model.labels

{'background': {'count': 4840,
  'terms': {'However': 112,
   'how': 30,
   'frataxin': 4,
   'interacts': 16,
   'with': 1118,
   'the': 6206,
   'Fe': 14,
   '-': 3404,
   'S': 49,
   'cluster': 16,
   'biosynthesis': 12,
   'components': 42,
   'remains': 21,
   'unclear': 11,
   'as': 1081,
   'direct': 38,
   'one': 148,
   'to': 2722,
   'interactions': 35,
   'each': 66,
   'component': 19,
   'were': 322,
   'reported': 234,
   '(': 6108,
   'IscS': 3,
   '[': 2406,
   '1222': 1,
   ']': 2396,
   'IscU': 2,
   '/': 423,
   'Isu1': 1,
   '61116': 1,
   'or': 654,
   'ISD11': 1,
   'Isd11': 2,
   '1415': 4,
   ')': 6128,
   '.': 10488,
   'Sentimentpositive': 1412,
   'In': 335,
   'study': 184,
   'by': 924,
   'Hickey': 4,
   'et': 5107,
   'al': 5107,
   '2012': 268,
   'spikes': 2,
   'sampled': 3,
   'from': 642,
   'field': 37,
   'at': 365,
   'point': 44,
   'of': 5290,
   'physiological': 21,
   '\\': 580,
   'nrobinson': 1,
   ':': 286,
   'genomic': 20,
   'regions': 4

In [44]:
# Make predictions with shuffled sentiment model
predictions_sentiment_shuffled = shuffled_sentiment_model.predict(X_dev_sent_shuffled)

In [45]:
# Get confusion matrix and scores of shuffled sentiment model
cf_sentiment_shuffled, scores_sentiment_shuffled = evaluate(predictions_sentiment_shuffled, Y_dev_sent_shuffled)
print(scores_sentiment_shuffled)

            Precision    Recall        F1
result       0.552846  0.829268  0.663415
method       0.733333  0.733333  0.733333
background   0.864312  0.803109  0.832587
overall      0.786026  0.786026  0.786026
