In [2]:
from datasets import load_dataset
from transformers import (
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
)
import pandas as pd
from sklearn.metrics import cohen_kappa_score
from transformers import pipeline

In [3]:
# load model from saved directory
model_directory="path" # path to the saved directory
model = RobertaForSequenceClassification.from_pretrained(model_directory)
tokenizer= RobertaTokenizerFast.from_pretrained(model_directory, truncation=True)
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
tokenizer_kwargs = {'truncation':True,'max_length':512,}

A matching Triton is not available, some optimizations will not be enabled.
Error caught was: No module named 'triton'


In [4]:
# get run data on model to get model labels
def get_model_labels(df,classifier):
    res=classifier(df.text.to_list(),**tokenizer_kwargs)
    df["model_label"]=[res[i]["label"] for i in range(0,len(df))]
    df["score"]=[res[i]["score"] for i in range(0,len(df))]
    #df=df.replace({"model_label":{0: "0",1: "E1", 2: "E2", 3: "E3", 4:"E4", 5 : "E5", 6: "S1", 7:"S2",8: "S3", 9: "S4", 10:"G1"}}) # if zero label included
    df=df.replace({"model_label":{0: "E1", 1: "E2", 2: "E3", 3:"E4", 4 : "E5", 5: "S1", 6:"S2",7: "S3", 8: "S4", 9:"G1"}})
    return df

In [5]:
def evaluate(df):
    df['agreement'] = (df['label']==df['model_label']).astype(int) # accuracy
    print("Cohen kappa score = {}".format(cohen_kappa_score(df.label,df.model_label)))
    print("Accuracy = {}".format(df.agreement.mean()))
    return df


In [6]:
# inserts new line into text, used to make ouptput more readable by human when needed
def insert_newlines(string, every=64):
    return ' \n '.join(string[i:i+every] for i in range(0, len(string), every))

In [7]:
path_test= "path to test dataset"
test_set=pd.read_csv(path_test,index_col=0).reset_index(drop=True)

In [8]:
test_set=evaluate(get_model_labels(test_set,classifier))

Cohen kappa score = 0.756339591147959
Accuracy = 0.8048780487804879


In [9]:
len(test_set)

369

In [10]:
# accuracy by label
test_set.groupby("label")["agreement"].mean()

label
0     0.000000
E1    0.974576
E2    1.000000
E3    0.954545
E4    0.909091
E5    0.928571
G1    0.898551
S1    0.760563
S2    0.000000
S3    0.000000
S4    0.750000
Name: agreement, dtype: float64

In [11]:
test_set[test_set["label"]=="S2"]

Unnamed: 0,text,label,model_label,score,agreement
0,"With special training programs, we ensure that...",S2,G1,0.977822,0
2,We took a close look at our companys remunerat...,S2,G1,0.999825,0
36,"Furthermore, we expect our suppliers to allow ...",S2,S1,0.999738,0
76,Development opportunities at work and in priva...,S2,S1,0.999768,0
164,Source: 2018 Annual Report > Management Report...,S2,G1,0.99433,0


In [12]:
test_set.at[0,"text"]

'With special training programs, we ensure that our employees are \n  aware of the security risks involved in the area in which they  \n work and are able to respond in an appropriate way. For example, \n  we instruct them on the applicable standards for their place of \n  work and on the Groups compliance policy. Our security policies \n  expressly include the protection of personal rights and adheren \n ce to legal requirements. This promotes a security culture which \n  is both accepted throughout the Group and complies with prevail \n ing law.'

In [16]:
test_set[test_set["label"]=="S3"]

Unnamed: 0,text,label,model_label,score,agreement
60,Communication with our stakeholders takes plac...,S3,S4,0.999737,0
294,"Through our human resources activities, we con...",S3,S1,0.999744,0


In [17]:
test_set.at[294,"text"]

'Through our human resources activities, we contribute directly t \n o SDG 4 Quality Education, SDG 5 Gender Equality, SDG 8 Decent W \n ork and Economic Growth and SDG 10 Reduced Inequalities. To enab \n le our con- tribution to the SDGs and prepare the company for th \n e future, GP further developed its comprehensive Human Resources \n  strategy, called the People Agenda.'

In [40]:
# evaluate non zero label set
evaluate(test_set[test_set["label"]!="0"])

Cohen kappa score = 0.8507228050989372
Accuracy = 0.8839285714285714


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['agreement'] = (df['label']==df['model_label']).astype(int)


Unnamed: 0,text,label,model_label,score,agreement
0,"With special training programs, we ensure that...",S2,G1,0.977822,0
1,An important objective of our strategy is fost...,E5,E5,0.999718,1
2,We took a close look at our companys remunerat...,S2,G1,0.999825,0
3,"The health, safety and professional and person...",S1,S1,0.999748,1
4,Our actions are determined by the principles o...,G1,G1,0.999809,1
...,...,...,...,...,...
364,"We not only want to be the Employer of Choice,...",S1,S1,0.999713,1
365,We aim to cut the specific primary energy usag...,E1,E1,0.999819,1
366,"Responsible Management, Marketplace & Communit...",G1,G1,0.981110,1
367,In 2020 our annual employee survey included qu...,S1,G1,0.985651,0


In [20]:
# some test sets might need processing first depending on the formating
test_set_FP=pd.read_csv("txt_set_fp.csv").reset_index(drop=True)
test_set_FP["FP_label"]=test_set_FP["label"]
test_set_FP["label"]=test_set_FP["FP_label"].str[:2]

In [22]:
test_set_FP=evaluate(get_model_labels(test_set_FP,classifier))

Cohen kappa score = 0.8990066225165563
Accuracy = 0.9180327868852459


In [24]:
test_set_FP[test_set_FP["agreement"]==0]

Unnamed: 0,text,label,FP_label,model_label,score,agreement
0,"In our Business Conduct Guidelines, we commit ...",S3,S3-1,G1,0.999805,0
18,"At 44.6%, we succeeded in slightly improving t...",E1,E1-3,E3,0.665774,0
24,Our global society today faces the major chall...,G1,G1-3,S3,0.642532,0
30,Natural raw materials often form the basis\nof...,G1,"G1-2, S2-1",E5,0.998215,0
49,"By 2025, prevent and significantly\nreduce mar...",E3,E3-1,E2,0.99967,0


In [29]:
test_set_FP.at[49,"text"]

'By 2025, prevent and significantly\nreduce marine pollution of all\nkinds, in particular from land-based\nactivities, including marine debris\nand nutrient pollution'

In [21]:
test_set_MJ=pd.read_csv("..\\MJ_test_set_labeld.csv").reset_index(drop=True)
test_set_MJ["MJ_label"]=test_set_MJ["Label"]
test_set_MJ["label"]=test_set_MJ["MJ_label"].str[:2]
test_set_MJ=test_set_MJ.rename(columns={"Text": "text"})

In [33]:
test_set_MJ=evaluate(get_model_labels(test_set_MJ,classifier))

Cohen kappa score = 0.6521324431121407
Accuracy = 0.7099099099099099


In [34]:
evaluate(test_set_MJ[test_set_MJ["label"]!="0"])

Cohen kappa score = 0.7300882612108108
Accuracy = 0.7801980198019802


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['agreement'] = (df['label']==df['model_label']).astype(int)


Unnamed: 0,ID,text,Label,MJ_label,label,model_label,score,agreement
0,174,When we purchase energy sources through RWE Su...,"S3-1,G1-2","S3-1,G1-2",S3,E1,0.998955,0
1,53,The realignment of our company requires increa...,S1-13,S1-13,S1,S1,0.999631,1
2,441,"In our Business Conduct Guidelines, we commit ...",G1-1,G1-1,G1,G1,0.999805,1
3,115,Particularly against the background of the tra...,S3-4,S3-4,S3,S3,0.999379,1
4,465,Qualified safety experts supervise the impleme...,S1-14,S1-14,S1,S1,0.999686,1
...,...,...,...,...,...,...,...,...
549,454,Good leadership is essential to Infineons succ...,G1-1,G1-1,G1,S1,0.998776,0
551,243,Responsible procurement of raw materials\n We ...,"S2-1,G1-2","S2-1,G1-2",S2,G1,0.999749,0
552,154,The compliance function at RWE AG also plays a...,G1-1,G1-1,G1,G1,0.999802,1
553,358,"The 80/20 model\n Since 2017, we have been usi...",S1-13,S1-13,S1,S1,0.999734,1


In [36]:
test_set_MJ.groupby("label")["agreement"].mean()

label
0     0.000000
E1    0.902913
E2    0.611111
E3    0.952381
E4    0.909091
E5    0.741935
G1    0.528571
S1    0.899408
S2    0.380952
S3    0.482759
S4    0.761905
Name: agreement, dtype: float64

In [37]:
test_set_MJ[test_set_MJ["label"]=="S2"]

Unnamed: 0,ID,text,Label,MJ_label,label,model_label,score,agreement
12,140,"On 11 June 2021, the German Federal Parlament ...",S2-1,S2-1,S2,S2,0.992912,1
53,593,Ensuring that our suppliers adhere to BMW Bril...,S2-1,S2-1,S2,G1,0.999819,0
74,599,Symrise is involved in the Initiative for Sust...,S2-1,S2-1,S2,S2,0.996348,1
132,589,Our Siemens Business Conduct Guidelines\n(BCG)...,"S2-1,S1-1","S2-1,S1-1",S2,G1,0.999829,0
156,141,Depending on the region and the local politica...,"S2-5,G1-2","S2-5,G1-2",S2,S2,0.995769,1
185,225,Suppliers that present especially high levels ...,"S2-1,G1-2","S2-1,G1-2",S2,S4,0.999412,0
188,587,"Large companies, such as Siemens, operating wi...",S2-1,S2-1,S2,S2,0.996243,1
195,600,Natural raw materials often form the basis\nof...,S2-1,S2-1,S2,E5,0.998215,0
264,237,Everyone who identifies irregularities in our ...,S2-2,S2-2,S2,G1,0.999813,0
302,235,"As a rule, our supplier management runs throug...","S2-1,G1-2","S2-1,G1-2",S2,G1,0.980325,0


In [39]:
test_set_MJ.at[53,"text"]

'Ensuring that our suppliers adhere to BMW Brilliance’s\nstandards and expectations for social responsibility is\na core aspect of our supply chain management framework.\nSince 2020, we have had in place the BMW\nBrilliance Supplier Social Responsibility Code of Conduct\nwhile continuing to promote and integrate social responsibility\nmeasures across our entire supply chain. In\n2021, we made key progress in our Supply Chain Social\nResponsibility Initiatives.'