In [0]:
import torch

if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla P4


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Set the seed value all over the place to make this reproducible.
import random
import numpy as np
import glob
import os.path
import codecs
import pandas as pd
import string
import matplotlib.pyplot as plt
import time
import datetime
import seaborn as sns
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import torch.nn as nn

seed_val = 16
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
torch.backends.cudnn.deterministic = True
root_path = "/content/drive/My Drive/"

  import pandas.util.testing as tm


In [0]:
dataset_path = root_path + "causal_datasets_with_features/"
train_csv = dataset_path + "unbalanced_train_dataset_80_20.csv"
dev_csv = dataset_path + "unbalanced_dev_dataset_80_20.csv"
test_csv = dataset_path + "climate_change_with_predictions.csv"

df_train = pd.read_csv(train_csv)
df_dev = pd.read_csv(dev_csv)
df_test = pd.read_csv(test_csv)


def get_features(df):
  framenet_causal = df['Framenet causal score'].values
  causal_links_presence = df['Causal link presence'].values
  #causal_verbs_presence= df['Causal verb presence'].values
  pretrained_classifier_score = df['Pre-trained causal classifier labels'].values
  return framenet_causal, causal_links_presence, pretrained_classifier_score

train_framenet_causal, train_causal_links, train_pretrained_label = get_features(df_train)
dev_framenet_causal, dev_causal_links, dev_pretrained_label = get_features(df_dev)
test_framenet_causal, test_causal_links, test_pretrained_label = get_features(df_test)

In [0]:
train_labels = df_train['Labels'].values
dev_labels = df_dev['Labels'].values

print(train_labels.shape)
print(dev_labels.shape)

(5061,)
(1072,)


In [0]:
x_train = np.column_stack((train_framenet_causal, train_pretrained_label, train_causal_links))
x_dev = np.column_stack((dev_framenet_causal, dev_pretrained_label, dev_causal_links))
x_test = np.column_stack((test_framenet_causal, test_pretrained_label, test_causal_links))

#x_train = train_pretrained_label.reshape(-1,1)
#x_dev = dev_pretrained_label.reshape(-1,1)

In [0]:
from sklearn.linear_model import LogisticRegression

logreg_model = LogisticRegression(random_state=0,class_weight='balanced')
logreg_model.fit(x_train, train_labels)
predicted_labels = logreg_model.predict(x_dev)

test_predicted_labels = logreg_model.predict(x_test)


In [0]:
# df_dev['Log reg classifier predictions'] = predicted_labels
# df_dev.to_csv(root_path + "causal_datasets_with_features/dev_80_20_with_predictions.csv")
df_test['Log reg predictions'] = test_predicted_labels
df_test.to_csv(test_csv)

In [0]:
print("Accuracy of classifier - ", accuracy_score(dev_labels, predicted_labels))

Accuracy of classifier -  0.6940298507462687


In [0]:
print("Confusion matrix values - ")
tn, fp, fn, tp = confusion_matrix(dev_labels, predicted_labels).ravel()
print("TP - ", tp)
print("FN - ", fn)
print("TN - ", tn)
print("FP - ", fp)

Confusion matrix values - 
TP -  23
FN -  19
TN -  721
FP -  309


In [0]:
print("F1-score of classifier on each class - ", f1_score(dev_labels, predicted_labels, average=None))
print("Macro averaged F1-score of classifier - ", f1_score(dev_labels, predicted_labels, average='macro'))
print("Weighted averaged F1-score of classifier - ", f1_score(dev_labels, predicted_labels, average='weighted'))

F1-score of classifier on each class -  [0.81468927 0.12299465]
Macro averaged F1-score of classifier -  0.46884195897157016
Weighted averaged F1-score of classifier -  0.7875892900222895
