<a href="https://colab.research.google.com/github/ChrisBagdon/Citation_Classification/blob/main/bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install/import libraries
#!pip install transformers
#!pip install datasets
import transformers
import pandas as pd
import datasets

# Setting up the BERT models

In [None]:
# Import pretrained distilbert tokenizer
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [3]:
# Use ClassLabel object as labels
from datasets.features.features import ClassLabel
labels = ClassLabel(num_classes=3, names=['background', 'result', 'method'])

In [4]:
### Preprocess function
# Required to tokenize and batch data
def preprocess_function(batch):
    tokens = tokenizer(batch['text'], padding=True, truncation=True, max_length=128)
    tokens['label'] = labels.str2int(batch['label'])
    return tokens

In [5]:
# Form batches with padding
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [6]:
# Evaluation function
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

### Compute metrics
# Returns argmax predictions based on given predictions
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Apply BERT model to only citation texts

In [7]:
# Read in datasets (CSVs)
data_path = "scicite/csv/"
data_text = datasets.load_dataset('csv', data_files={'train':data_path+'train.csv', 'test':data_path+'dev.csv'})
data_text['train'][1]

Using custom data configuration default-7ce5d716e95771b2
Reusing dataset csv (/Users/shdpark/.cache/huggingface/datasets/csv/default-7ce5d716e95771b2/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/2 [00:00<?, ?it/s]

{'text': 'In the study by Hickey et al. (2012) spikes were sampled from the field at the point of physiological\\nrobinson et al.: genomic regions influencing root traits in barley 11 of 13\\nmaturity dried grain threshed by hand and stored at −20C to preserve grain dormancy before germination testing.',
 'label': 'background'}

In [8]:
# Tokenize and batch data
tokenized_data_text = data_text.map(preprocess_function, batched=True)

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [9]:
# Instantiate RoBERTa model
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

text_model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=3)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifi

In [12]:
# Training arguments for text model
training_args_text = TrainingArguments(
    output_dir="./results-Roberta",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    remove_unused_columns=True,
    evaluation_strategy="epoch"
    )

# Instantiate Trainer for text model
trainer_text = Trainer(
    model=text_model,
    args=training_args_text,
    train_dataset=tokenized_data_text["train"],
    eval_dataset=tokenized_data_text["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,)

In [13]:
trainer_text.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 8243
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2580


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [79]:
# Instantiate classifier
from transformers import pipeline
classifier = pipeline(task='text-classification', model="./results/checkpoint-2500", tokenizer=tokenizer)

OSError: We couldn't connect to 'https://huggingface.co' to load this model, couldn't find it in the cached files and it looks like ./results/checkpoint-2500 is not the path to a directory containing a config.json file.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [14]:
import csv
with open('scicite/tsv/dev.tsv') as dev_file_text:
    dev_data_text = csv.reader(dev_file_text, delimiter="\t")
    X_dev_text, Y_dev_text = [],[]
    for row in dev_data_text:
        X_dev_text.append(row[2])
        Y_dev_text.append(row[3])

In [None]:
predictions = classifier(X_dev_text)

In [None]:
labels = {'LABEL_0':'background', 'LABEL_1':'result', 'LABEL_2':'method'}
preds = [labels[x['label']] for x in predictions]

In [90]:
import numpy as np
import pandas as pd
def evaluate(predictions, gold_standard):
    # Collect all unique labels from predictions and gold_std
    labels_set = set(predictions + gold_standard)
    labels = {}
    for i, label in enumerate(labels_set):
        labels[label] = i
    # Create confusion matrix
    confusion_matrix = np.zeros((len(labels_set),len(labels_set)))
    for pred, gold in zip(predictions, gold_standard):
        confusion_matrix[labels[pred]][labels[gold]] += 1
    labels_index = list(labels_set); labels_index.append('overall')
    columns = []
    # Create scores table
    scores = pd.DataFrame(np.zeros((len(labels_set), 3)))
    scores.columns = ['Precision', 'Recall', 'F1']
    overall_TP = 0
    # Calculate P, R, F1 and populate scores table
    for label in labels_set:
        i = labels[label]
        scores['Precision'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=0)[i]
        scores['Recall'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=1)[i]
        # Possible error case: P == 0 == R; divide by 0
        if scores['Precision'][i] == 0 and scores['Recall'][i] == 0:
            scores['F1'][i] = 0
        else:
            scores['F1'][i] = 2 * (scores['Precision'][i]*scores['Recall'][i]/(scores['Precision'][i]+scores['Recall'][i]))
        overall_TP += confusion_matrix[i][i]
    scores.loc[len(labels_set)] = [overall_TP / np.sum(confusion_matrix)] * 3
    scores.index = labels_index
    return (confusion_matrix, scores)

In [None]:
cf, scores = evaluate(preds, Y_dev)

In [None]:
print(scores)
print(cf)

# Applying BERT model to citation data with sentiment labels added

In [59]:
## Append sentiment labels to citation texts for test and dev sets
data_path = "scicite/sentiment_csv/"

# Training set
df_sentiment_train = pd.read_csv(data_path + 'train_sent.csv')
df_sentiment_train['text'] = df_sentiment_train['text'] + ' [SEP] ' + df_sentiment_train['sentiment']
df_sentiment_train.to_csv(data_path + 'train_sent_bert.csv')

# Dev set
df_sentiment_dev = pd.read_csv(data_path + 'dev_sent.csv')
df_sentiment_dev['text'] = df_sentiment_dev['text'] + ' [SEP] ' + df_sentiment_dev['sentiment']
df_sentiment_dev.to_csv(data_path + 'dev_sent_bert.csv')

In [64]:
# Read in datasets (CSVs)
data_sentiment = datasets.load_dataset('csv', data_files={'train':data_path+'train_sent_bert.csv', 'test':data_path+'dev_sent_bert.csv'})
data_sentiment = data_sentiment.remove_columns(['Unnamed: 0', 'Unnamed: 0.1', 'ID', 'explicit', 'sentiment'])
data_sentiment['train'][1]

Using custom data configuration default-62e27be68a4b47fc
Reusing dataset csv (/Users/shdpark/.cache/huggingface/datasets/csv/default-62e27be68a4b47fc/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/2 [00:00<?, ?it/s]

{'text': 'In the study by Hickey et al. (2012) spikes were sampled from the field at the point of physiological\\nrobinson et al.: genomic regions influencing root traits in barley 11 of 13\\nmaturity dried grain threshed by hand and stored at −20C to preserve grain dormancy before germination testing. [SEP] positive',
 'label': 'background'}

In [73]:
# Sentiment model's tokenizer
tokenizer_sentiment = AutoTokenizer.from_pretrained("distilbert-base-uncased", sep_token='[SEP]')

### Preprocess function for sentiment model
# Required to tokenize and batch data
def preprocess_function_sentiment(batch):
    tokens = tokenizer_sentiment(batch['text'], padding=True, truncation=True, max_length=128)
    tokens['label'] = labels.str2int(batch['label'])
    return tokens

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /Users/shdpark/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.19.2",
  "vocab_size": 30522
}

loading file https://huggingface.co/distilbert-base-uncased/resolve/main/vocab.txt from cache at /Users/shdpark/.cache/huggingface/transformers/0e1bbfda7f6

In [74]:
# Tokenize and batch data
tokenized_data_sentiment = data_sentiment.map(preprocess_function_sentiment, batched=True)

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [75]:
# Instantiate RoBERTa model
sentiment_model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=3)

loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /Users/shdpark/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
 

In [76]:
# Training arguments for sentiment model
training_args_sentiment = TrainingArguments(
    output_dir="./results-Roberta-sentiment",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    remove_unused_columns=True,
    evaluation_strategy="epoch"
    )

# Instantiate Trainer for text model
trainer_sentiment = Trainer(
    model=sentiment_model,
    args=training_args_sentiment,
    train_dataset=tokenized_data_sentiment["train"],
    eval_dataset=tokenized_data_sentiment["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [77]:
trainer_sentiment.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 8243
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2580


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7783,0.660148,0.75655
2,0.5592,0.586838,0.769651
3,0.4795,0.63646,0.784934
4,0.4136,0.656125,0.772926
5,0.3655,0.621763,0.79476


Saving model checkpoint to ./results-Roberta-sentiment/checkpoint-500
Configuration saved in ./results-Roberta-sentiment/checkpoint-500/config.json
Model weights saved in ./results-Roberta-sentiment/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ./results-Roberta-sentiment/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./results-Roberta-sentiment/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 916
  Batch size = 16
Saving model checkpoint to ./results-Roberta-sentiment/checkpoint-1000
Configuration saved in ./results-Roberta-sentiment/checkpoint-1000/config.json
Model weights saved in ./results-Roberta-sentiment/checkpoint-1000/pytorch_model.bin
tokeniz

TrainOutput(global_step=2580, training_loss=0.5140291169632313, metrics={'train_runtime': 10518.5564, 'train_samples_per_second': 3.918, 'train_steps_per_second': 0.245, 'total_flos': 2711054877914880.0, 'train_loss': 0.5140291169632313, 'epoch': 5.0})

In [80]:
# Instantiate classifier for sentiment model
sentiment_classifier = pipeline(task='text-classification', model="./results-Roberta-sentiment/checkpoint-2500", tokenizer=tokenizer_sentiment)

loading configuration file ./results-Roberta-sentiment/checkpoint-2500/config.json
Model config RobertaConfig {
  "_name_or_path": "./results-Roberta-sentiment/checkpoint-2500",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.19.2",
  "type_vocab_size": 1,
  "use_cache": 

In [85]:
# Import dev dataset
with open('scicite/sentiment_csv/dev_sent_bert.csv') as dev_file_sentiment:
    dev_data_sentiment = csv.reader(dev_file_sentiment, delimiter=",")
    next(dev_data_sentiment)
    X_dev_sentiment, Y_dev_sentiment = [],[]
    for row in dev_data_sentiment:
        X_dev_sentiment.append(row[4])
        Y_dev_sentiment.append(row[5])

In [87]:
print(X_dev_sentiment[:5], Y_dev_sentiment[:5])

['These results are in contrast with the findings of Santos et al.(16) who reported a significant association between low sedentary time and healthy CVF among Portuguese [SEP] positive', '…nest burrows in close proximity of one another appears to be well founded as previously shown by several studies that measured distances between kin vs. non-kin nest burrows including in long-term data sets (King 1989b; Viblanc et al. 2010; Arnaud Dobson & Murie 2012; Dobson et al. 2012). [SEP] neutral', 'This is clearly in contrast to the results of earlier investigations ( Laprise & Peltier 1989a  Pierre - humbert & Wyman 1985  Clark & Peltier 1977 )  where it was found that the criteria for static and dynamic instabilities are simultaneously satisfied . [SEP] positive', '…in a subset of alcoholics (Chen et al. 2004; McElroy et al. 2009; Mistlberger and Nadeau 1992; Rosenwasser et al. 2005ab; Seggio et al. 2009; Spanagel et al. 2005a); and (vi) The Clock gene is important for the circadian regulati

In [88]:
predictions_sentiment = sentiment_classifier(X_dev_sentiment)

Disabling tokenizer parallelism, we're using DataLoader multithreading already


In [89]:
labels = {'LABEL_0':'background', 'LABEL_1':'result', 'LABEL_2':'method'}
preds_sentiment = [labels[x['label']] for x in predictions_sentiment]

In [91]:
cf_sentiment, scores_sentiment = evaluate(preds_sentiment, Y_dev_sentiment)

In [93]:
print(scores_sentiment)
print(cf_sentiment)

            Precision    Recall        F1
background   0.828996  0.830540  0.829767
result       0.764228  0.661972  0.709434
method       0.709804  0.763713  0.735772
overall      0.787118  0.787118  0.787118
[[446.  24.  67.]
 [ 41.  94.   7.]
 [ 51.   5. 181.]]


# Training DistilBERT model with shuffled sentiment data

In [1]:
# Shuffling for BERT sets

# Sentiment Classifier

In [None]:
import pandas as pd
import datasets

In [None]:
import gc
import torch
from GPUtil import showUtilization as gpu_usage
from numba import cuda
gc.collect()
torch.cuda.empty_cache()

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()

    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

free_gpu_cache()

In [None]:
data = datasets.load_dataset('csv', data_files={'train':'sentimentAnnotations_CSV/train_anno_2.csv', 'test':'sentimentAnnotations_CSV/test_anno.csv'})

In [None]:
data['train'] = data['train'].rename_columns({'Column1':'ID', 'Column2':'exp', 'Column3':'text', 'Column4':'type', 'Column5':'label'})
data['test'] = data['test'].rename_columns({'Column1':'ID', 'Column2':'exp', 'Column3':'text', 'Column4':'type', 'Column5':'label'})

In [None]:
data

In [None]:
data['train'] = data['train'].remove_columns(['exp', 'ID', 'type'])
data['test'] = data['test'].remove_columns(['exp', 'ID', 'type'])

In [None]:
data['train'][1]

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [None]:
from datasets.features.features import ClassLabel
labels = ClassLabel(num_classes=3, names=['positive', 'negative', 'neutral'])

In [None]:
def preprocess_function(batch):

    tokens = tokenizer(batch['text'], padding='max_length', truncation=True)
    tokens['label'] = labels.str2int(batch['label'])
    return tokens

In [None]:
tokenized_data = data.map(preprocess_function, batched=True)

In [None]:
tokenized_data

In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
import numpy as np

from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):

    logits, labels = eval_pred

    predictions = np.argmax(logits, axis=-1)

    return metric.compute(predictions=predictions, references=labels)

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)

In [None]:
training_args = TrainingArguments(
    output_dir="sent-results-distilbert",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    weight_decay=0.01,
    #remove_unused_columns=True,
    evaluation_strategy="epoch"
    )

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
from transformers import pipeline
classifier = pipeline(task='text-classification', model='sent-results-distilbert/checkpoint-3000/', tokenizer=tokenizer)

In [None]:
import csv
with open('sentimentAnnotations_CSV/test_anno.csv') as dev_file:
  dev_data = csv.reader(dev_file)
  X_dev, Y_dev = [],[]
  for row in dev_data:
    X_dev.append(row[2])
    Y_dev.append(row[4])
  X_dev = X_dev[1:]
  Y_dev = Y_dev[1:]

In [None]:
predictions = classifier(X_dev)

In [None]:
labels = {'LABEL_0':'positive', 'LABEL_1':'negative', 'LABEL_2':'neutral'}
preds = [labels[x['label']] for x in predictions]

In [None]:
Y_dev

In [None]:
import numpy as np
import pandas as pd
def evaluate(predictions, gold_standard):
    # Collect all unique labels from predictions and gold_std
    labels_set = set(predictions + gold_standard)
    labels = {}
    for i, label in enumerate(labels_set):
        labels[label] = i
    # Create confusion matrix
    confusion_matrix = np.zeros((len(labels_set),len(labels_set)))
    for pred, gold in zip(predictions, gold_standard):
        confusion_matrix[labels[pred]][labels[gold]] += 1
    labels_index = list(labels_set); labels_index.append('overall')
    columns = []
    # Create scores table
    scores = pd.DataFrame(np.zeros((len(labels_set), 3)))
    scores.columns = ['Precision', 'Recall', 'F1']
    overall_TP = 0
    # Calculate P, R, F1 and populate scores table
    for label in labels_set:
        i = labels[label]
        scores['Precision'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=0)[i]
        scores['Recall'][i] = confusion_matrix[i][i] / np.sum(confusion_matrix, axis=1)[i]
        # Possible error case: P == 0 == R; divide by 0
        if scores['Precision'][i] == 0 and scores['Recall'][i] == 0:
            scores['F1'][i] = 0
        else:
            scores['F1'][i] = 2 * (scores['Precision'][i]*scores['Recall'][i]/(scores['Precision'][i]+scores['Recall'][i]))
        overall_TP += confusion_matrix[i][i]
    scores.loc[len(labels_set)] = [overall_TP / np.sum(confusion_matrix)] * 3
    scores.index = labels_index
    return (confusion_matrix, scores)

In [None]:
cf, scores = evaluate(preds, Y_dev)

In [None]:
print(scores)
print(cf)

In [None]:
data = datasets.load_dataset('csv', data_files={'train':'sentimentAnnotations_CSV/train_anno_2.csv'})

In [None]:
data["train"].num_rows

In [None]:
cross_0 = pd.read_pickle("cross_val_scores_0.pkl")
cross_1 = pd.read_pickle("cross_val_scores_1.pkl")
cross_2 = pd.read_pickle("cross_val_scores_2.pkl")
cross_3 = pd.read_pickle("cross_val_scores_3.pkl")
cross_4 = pd.read_pickle("cross_val_scores_4.pkl")

In [None]:
df_concat = pd.concat([cross_1,cross_0,cross_2,cross_3,cross_4])

In [None]:
by_row_index = df_concat.groupby(df_concat.index)
df_means = by_row_index.mean()

In [None]:
df_means.head()

In [None]:
cross_4

In [None]:
cross_4

In [None]:
print(df_means)