In [2]:
import json
import logging
import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, TensorDataset)
from torch.nn import CrossEntropyLoss, MSELoss
import os
from os.path import join
import sys
import pickle
import csv
from transformers import BertTokenizer,BertForSequenceClassification
from transformers.optimization import AdamW

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# torch.cuda.set_device(6)

In [4]:
# set dimension of interest
# dim = 'romance'
dims = [
        'social_support',
        'conflict',
        'trust',
        'fun',
        'similarity',
        'identity',
        'respect',
        'romance',
        'knowledge',
        'power'
        ]

In [5]:
# DATA_DIR = 'data/%s'%dim # directory of where the train/test/dev files are stored
# OUTPUT_DIR = 'weights/BERT/%s' %dim # where the model weights will be stored
BERT_MODEL = 'bert-base-cased' # BERT model type
CACHE_DIR = 'cache/' # where BERT will look for pre-trained models to load parameters from

num_labels = 2

OUTPUT_MODE = 'classification'
CONFIG_NAME = "config.json"
WEIGHTS_NAME = "pytorch_model.bin"

In [6]:
# load model
model = BertForSequenceClassification.from_pretrained(BERT_MODEL,cache_dir=CACHE_DIR, num_labels=num_labels)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
import json
sentences = {
    1: "Thnx. Seems crowded. I hope the 1.0 will allow get closer...",
    2: "Look, Dave, I know that you’re sincere and that you’re trying to do a competent job, and that you’re trying to be helpful, but I can assure the problem is with the AO-units, and with your test gear"
    }

scoring = {}
for id in sentences:
    sentence = sentences[id]
    scoring.update({sentence: {}})
    print(f'\n{id}: {sentence}')
    for dim in dims:
        DATA_DIR = 'data/%s'%dim
        OUTPUT_DIR = 'weights/BERT/%s' %dim
        # load pretrained BERT model for specific dimension
        output_model_file = os.path.join(OUTPUT_DIR, WEIGHTS_NAME)
        model.load_state_dict(torch.load(output_model_file))
        model.to(device)
        tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=False)

        input_ids = torch.tensor([tokenizer.encode(sentence,add_special_tokens=True)]).cuda()
        with torch.no_grad():
            outputs = model(input_ids)[0]
            score = torch.softmax(outputs,1)
        scoring[sentence].update({dim: round(score[0,1].item(), 3)})
        print("\t%s:\t%1.3f" % (dim,score[0,1].item()))

# with open('C:\\Users\\fonta\\PycharmProjects\\10_dimensions_classifier\\out\\manual_scoring.txt', 'w') as convert_file:
#      convert_file.write(json.dumps(scoring))


1: Thnx. Seems crowded. I hope the 1.0 will allow get closer...
	social_support:	0.102
	conflict:	0.041
	trust:	0.028
	fun:	0.025
	similarity:	0.033
	identity:	0.013
	respect:	0.007
	romance:	0.000
	knowledge:	0.046
	power:	0.011

2: Look, Dave, I know that you’re sincere and that you’re trying to do a competent job, and that you’re trying to be helpful, but I can assure the problem is with the AO-units, and with your test gear
	social_support:	0.716
	conflict:	0.018
	trust:	0.030
	fun:	0.025
	similarity:	0.022
	identity:	0.003
	respect:	0.003
	romance:	0.000
	knowledge:	0.013
	power:	0.011
