# Data Combiner

Here we try to bring together all the data into one clean dataframe

In [34]:
# !pip install pandas_profiling
# !pip install ipywidgets

In [35]:
import pandas as pd
import os
import plotly.express as px
import numpy as np

In [36]:
from pandas_profiling import ProfileReport
from statistics import mean
from scipy.stats import f_oneway


In [37]:
result_combiner = []

### Zero Shot Fulltext 

In [38]:
topic_modeler = os.listdir(r"..\Data\Resultate\Zero Shot Learning Fulltext")
topic_modeler

['abb_sustainability_performanceFullText_TopicModeling.csv',
 'adecco_group_cidFullText_TopicModeling.csv',
 'allreal_report_employeeFullText_TopicModeling.csv',
 'aluminium_tadjikistan_glencoreFullText_TopicModeling.csv',
 'amag_aluminium_austriaFullText_TopicModeling.csv',
 'america_rome_romanFullText_TopicModeling.csv',
 'axa_health_customerFullText_TopicModeling.csv',
 'baloise_risk_managementFullText_TopicModeling.csv',
 'bcge_financial_boardFullText_TopicModeling.csv',
 'bell_food_groupFullText_TopicModeling.csv',
 'bouygues_sustainability_reportFullText_TopicModeling.csv',
 'braun_employee_managementFullText_TopicModeling.csv',
 'bucher_industries_employeeFullText_TopicModeling.csv',
 'business_avaloq_reportFullText_TopicModeling.csv',
 'celgene_report_employeeFullText_TopicModeling.csv',
 'chemical_mitsubishi_employeeFullText_TopicModeling.csv',
 'child_cocoa_schoolFullText_TopicModeling.csv',
 'cid_burckhardt_compressionFullText_TopicModeling.csv',
 'cid_client_riskFullText_To

In [39]:
topic_modeler = os.listdir(r"..\Data\Resultate\Zero Shot Learning Fulltext")

topic_modeler_list = []

for file in topic_modeler:
    df_topic_modeler = pd.read_csv(
        fr"..\Data\Resultate\Zero Shot Learning Fulltext\{file}", index_col=0) 
    df_topic_modeler = df_topic_modeler.rename(columns={"Score": "Zero Shot Fulltext",})
    new_col = file[:-26]
    df_topic_modeler['Label'] = df_topic_modeler['Label'].replace({'employee affairs': 'labour law'})
    # print(new_col)

    idx = 0
    df_topic_modeler.insert(loc=idx, column='Company', value=new_col)
    topic_modeler_list.append(df_topic_modeler)  

df_topic_modeler = pd.concat(topic_modeler_list)
result_combiner.append(df_topic_modeler)

In [40]:
df_topic_modeler.head()

Unnamed: 0,Company,Label,Zero Shot Fulltext
0,abb_sustainability_performance,social issues,0.249538
1,abb_sustainability_performance,human rights,0.22675
2,abb_sustainability_performance,sustainability,0.224777
3,abb_sustainability_performance,labour law,0.215665
4,abb_sustainability_performance,fraud,0.08327


### Cosine Similarities Sentence by Sentence

In [41]:
sentence_cosine_similarity = os.listdir(r"..\Data\Resultate\Sentence by Sentence Cosine Similarity raw")

sentence_cosine_similarity_list = []

for file in sentence_cosine_similarity:
    # if file[-27:-4] == "FullText_Cosine_Scoring":
    df_sentence_cosine_similarity_list_list = pd.read_csv(
        fr"..\Data\Resultate\Sentence by Sentence Cosine Similarity raw\{file}", index_col=0)
    df_sentence_cosine_similarity_list_list = df_sentence_cosine_similarity_list_list.rename(columns={"Cosine Similarity Mean": "Sentence Cosine Similarity Wiki Summarized",})
    new_col = file[:-15]
    # print(new_col)
    idx = 0
    df_sentence_cosine_similarity_list_list.insert(loc=idx, column='Company', value=new_col)
    sentence_cosine_similarity_list.append(df_sentence_cosine_similarity_list_list)

df_sentence_cosine_similarity_list_list = pd.concat(sentence_cosine_similarity_list)
result_combiner.append(df_sentence_cosine_similarity_list_list)
len(df_sentence_cosine_similarity_list_list)

590

Mean Calculation

In [42]:
cosine_mean = []
cosine_results = df_sentence_cosine_similarity_list_list["Sentence Cosine Similarity Wiki Summarized"].tolist()
for element in cosine_results:
    test = element
    test_2 = test[1:-1]
    new_test = test_2.split(", ")
    non_tensor_list = []

    for subelement in new_test:
        non_tensor_list.append(float(subelement[9:14]))
    mean_result = round(mean(non_tensor_list), 3)
    cosine_mean.append(mean_result)


In [43]:
len(cosine_mean)

590

In [44]:
df_sentence_cosine_similarity_list_list["Sentence Cosine Similarity Wiki Summarized Mean"] = cosine_mean
df_sentence_cosine_similarity_list_list.head()
result_combiner.append(df_sentence_cosine_similarity_list_list)

In [45]:
def mean_cosine_calculator(cosine_results):

    non_tensor_list = []
    for element in cosine_results:
        test = element
        test_2 = test[1:-1]
        new_test = test_2.split(", ")
        new_list = []
        for element in new_test:
            new_list.append(element[9:14])
            string_element = str(element[0])
            non_tensor_list.append(float(string_element[8:14]))

    mean_result = round(mean(non_tensor_list))

Mean Calculation with Threshold 0.1

In [46]:
cosine_mean_threshold = []
cosine_results_threshold = df_sentence_cosine_similarity_list_list["Sentence Cosine Similarity Wiki Summarized"].tolist()
for element in cosine_results_threshold:
    test = element
    test_2 = test[1:-1]
    new_test = test_2.split(", ")
    non_tensor_list = []

    for tensor in new_test:
        float_tensor = float(tensor[9:14])
        if float_tensor > 0.0:
            non_tensor_list.append(float_tensor)
        else:
            continue

    if len(non_tensor_list) == 0:
        non_tensor_list.append(0.05)

        
    mean_result = round(mean(non_tensor_list), 3)
    cosine_mean_threshold.append(mean_result)


In [47]:
len(cosine_mean_threshold)

590

In [48]:
df_sentence_cosine_similarity_list_list["Sentence Cosine Similarity Wiki Summarized Mean Threshold 0"] = cosine_mean_threshold
df_sentence_cosine_similarity_list_list.head()
result_combiner.append(df_sentence_cosine_similarity_list_list)

### Cosine Similarity Sentence by Sentence Human Rights

In [49]:
sentence_cosine_similarity_hr = os.listdir(r"..\Data\Resultate\Sentence by Sentence Cosine Similarity raw human rights")

sentence_cosine_similarity_list_hr = []

for file in sentence_cosine_similarity_hr:
    # if file[-27:-4] == "FullText_Cosine_Scoring":
    df_sentence_cosine_similarity_list_hr = pd.read_csv(
        fr"..\Data\Resultate\Sentence by Sentence Cosine Similarity raw human rights\{file}", index_col=0)
    df_sentence_cosine_similarity_list_hr = df_sentence_cosine_similarity_list_hr.rename(columns={"Cosine Similarity Mean": "Sentence by Sentence Cosine Similarity human rights",})
    new_col = file[:-28]
    # print(new_col)
    idx = 0
    df_sentence_cosine_similarity_list_hr.insert(loc=idx, column='Company', value=new_col)
    sentence_cosine_similarity_list_hr.append(df_sentence_cosine_similarity_list_hr)

df_sentence_cosine_similarity_list_hr = pd.concat(sentence_cosine_similarity_list_hr)
# result_combiner.append(df_sentence_cosine_similarity_list_hr)
len(df_sentence_cosine_similarity_list_hr)

590

In [50]:
df_sentence_cosine_similarity_list_hr.head()

Unnamed: 0,Company,Label,Sentence by Sentence Cosine Similarity human rights
0,abb_sustainability_performance,sustainability,"[tensor([[0.3533]]), tensor([[0.0915]]), tenso..."
1,abb_sustainability_performance,human rights,"[tensor([[0.1359]]), tensor([[0.0887]]), tenso..."
2,abb_sustainability_performance,fraud,"[tensor([[-0.0050]]), tensor([[0.0324]]), tens..."
3,abb_sustainability_performance,social issues,"[tensor([[0.1108]]), tensor([[0.0350]]), tenso..."
4,abb_sustainability_performance,labour law,"[tensor([[0.0646]]), tensor([[0.0733]]), tenso..."


In [51]:
cosine_mean = []
cosine_results = df_sentence_cosine_similarity_list_hr["Sentence by Sentence Cosine Similarity human rights"].tolist()
for element in cosine_results:
    test = element
    test_2 = test[1:-1]
    new_test = test_2.split(", ")
    non_tensor_list = []

    for subelement in new_test:
        non_tensor_list.append(float(subelement[9:14]))
    mean_result = round(mean(non_tensor_list), 3)
    cosine_mean.append(mean_result)


In [52]:
df_sentence_cosine_similarity_list_hr["Sentence by Sentence Cosine Similarity human rights mean"] = cosine_mean
df_sentence_cosine_similarity_list_hr.head()


Unnamed: 0,Company,Label,Sentence by Sentence Cosine Similarity human rights,Sentence by Sentence Cosine Similarity human rights mean
0,abb_sustainability_performance,sustainability,"[tensor([[0.3533]]), tensor([[0.0915]]), tenso...",0.169
1,abb_sustainability_performance,human rights,"[tensor([[0.1359]]), tensor([[0.0887]]), tenso...",0.098
2,abb_sustainability_performance,fraud,"[tensor([[-0.0050]]), tensor([[0.0324]]), tens...",-0.005
3,abb_sustainability_performance,social issues,"[tensor([[0.1108]]), tensor([[0.0350]]), tenso...",0.071
4,abb_sustainability_performance,labour law,"[tensor([[0.0646]]), tensor([[0.0733]]), tenso...",0.045


In [53]:
cosine_mean_threshold = []
cosine_results_threshold = df_sentence_cosine_similarity_list_hr["Sentence by Sentence Cosine Similarity human rights"].tolist()
for element in cosine_results_threshold:
    test = element
    test_2 = test[1:-1]
    new_test = test_2.split(", ")
    non_tensor_list = []

    for tensor in new_test:
        float_tensor = float(tensor[9:14])
        if float_tensor > 0.0:
            non_tensor_list.append(float_tensor)
        else:
            continue

    if len(non_tensor_list) == 0:
        non_tensor_list.append(0.05)

        
    mean_result = round(mean(non_tensor_list), 3)
    cosine_mean_threshold.append(mean_result)

len(cosine_mean_threshold)


590

In [54]:
df_sentence_cosine_similarity_list_hr["Sentence by Sentence Cosine Similarity human rights Mean Threshold 0"] = cosine_mean_threshold
result_combiner.append(df_sentence_cosine_similarity_list_hr)
df_sentence_cosine_similarity_list_hr.head()


Unnamed: 0,Company,Label,Sentence by Sentence Cosine Similarity human rights,Sentence by Sentence Cosine Similarity human rights mean,Sentence by Sentence Cosine Similarity human rights Mean Threshold 0
0,abb_sustainability_performance,sustainability,"[tensor([[0.3533]]), tensor([[0.0915]]), tenso...",0.169,0.188
1,abb_sustainability_performance,human rights,"[tensor([[0.1359]]), tensor([[0.0887]]), tenso...",0.098,0.125
2,abb_sustainability_performance,fraud,"[tensor([[-0.0050]]), tensor([[0.0324]]), tens...",-0.005,0.062
3,abb_sustainability_performance,social issues,"[tensor([[0.1108]]), tensor([[0.0350]]), tenso...",0.071,0.104
4,abb_sustainability_performance,labour law,"[tensor([[0.0646]]), tensor([[0.0733]]), tenso...",0.045,0.106


### Zero Shot Sentence by sentence

In [76]:
zero_shot_sentence = os.listdir(r"..\Data\Resultate\Zero Shot Learning Satzweise Raw")

zero_shot_sentence_list = []

for file in zero_shot_sentence:
    # if file[-27:-4] == "FullText_Cosine_Scoring":
    df_zero_shot_sentence = pd.read_csv(
        fr"..\Data\Resultate\Zero Shot Learning Satzweise Raw\{file}", index_col=0)
    df_zero_shot_sentence = df_zero_shot_sentence.rename(columns={"Zero Shot Score Raw": "Zero Shot Learning Sentence Raw",})
    new_col = file[:-25]
    # print(new_col)
    idx = 0
    df_zero_shot_sentence.insert(loc=idx, column='Company', value=new_col)
    zero_shot_sentence_list.append(df_zero_shot_sentence)

df_zero_shot_sentence = pd.concat(zero_shot_sentence_list)
# result_combiner.append(df_zero_shot_sentence)
df_zero_shot_sentence = df_zero_shot_sentence.drop("Classified Text", axis=1)
len(df_zero_shot_sentence)


590

In [77]:
df_zero_shot_sentence.head()

Unnamed: 0,Company,Label,Zero Shot Learning Sentence Raw
0,abb_sustainability_performance,sustainability,"[0.7101063132286072, 0.41432586312294006, 0.50..."
1,abb_sustainability_performance,human rights,"[0.22032485902309418, 0.21426941454410553, 0.1..."
2,abb_sustainability_performance,fraud,"[0.057856060564517975, 0.16787868738174438, 0...."
3,abb_sustainability_performance,social issues,"[0.0063936966471374035, 0.1543944627046585, 0...."
4,abb_sustainability_performance,labour law,"[0.005319107323884964, 0.049131620675325394, 0..."


In [57]:
zero_shot_mean = []
zero_shot_results = df_zero_shot_sentence["Zero Shot Learning Sentence Raw"].tolist()
for element in zero_shot_results:
    test = element
    test_2 = test[1:-1]
    new_test = test_2.split(", ")
    non_tensor_list = []
    for subelement in new_test:
        non_tensor_list.append(float(subelement))
    mean_result = round(mean(non_tensor_list), 3)
    zero_shot_mean.append(mean_result)


len(zero_shot_mean)


590

In [82]:
df_zero_shot_sentence["Zero Shot Learning Sentence Mean"] = zero_shot_mean
df_zero_shot_sentence.head()

Unnamed: 0,Company,Label,Zero Shot Learning Sentence Raw,Zero Shot Learning Sentence Mean
0,abb_sustainability_performance,sustainability,"[0.7101063132286072, 0.41432586312294006, 0.50...",0.597
1,abb_sustainability_performance,human rights,"[0.22032485902309418, 0.21426941454410553, 0.1...",0.161
2,abb_sustainability_performance,fraud,"[0.057856060564517975, 0.16787868738174438, 0....",0.112
3,abb_sustainability_performance,social issues,"[0.0063936966471374035, 0.1543944627046585, 0....",0.086
4,abb_sustainability_performance,labour law,"[0.005319107323884964, 0.049131620675325394, 0...",0.044


In [83]:
zero_shot_mean_thresh = []
zero_shot_results = df_zero_shot_sentence["Zero Shot Learning Sentence Raw"].tolist()
for element in zero_shot_results:
    test = element
    test_2 = test[1:-1]
    new_test = test_2.split(", ")
    non_tensor_list = []
    for tensor in new_test:
        float_tensor = float(tensor)
        if float_tensor > 0.1:
            non_tensor_list.append(float_tensor)
        else:
            continue

    if len(non_tensor_list) == 0:
        non_tensor_list.append(0.05)

    mean_result = round(mean(non_tensor_list), 3)
    zero_shot_mean_thresh.append(mean_result)


len(zero_shot_mean_thresh)


590

In [84]:
df_zero_shot_sentence["Zero Shot Learning Sentence Mean Threshold"] = zero_shot_mean_thresh
result_combiner.append(df_zero_shot_sentence)
df_zero_shot_sentence.head()

Unnamed: 0,Company,Label,Zero Shot Learning Sentence Raw,Zero Shot Learning Sentence Mean,Zero Shot Learning Sentence Mean Threshold
0,abb_sustainability_performance,sustainability,"[0.7101063132286072, 0.41432586312294006, 0.50...",0.597,0.597
1,abb_sustainability_performance,human rights,"[0.22032485902309418, 0.21426941454410553, 0.1...",0.161,0.207
2,abb_sustainability_performance,fraud,"[0.057856060564517975, 0.16787868738174438, 0....",0.112,0.164
3,abb_sustainability_performance,social issues,"[0.0063936966471374035, 0.1543944627046585, 0....",0.086,0.135
4,abb_sustainability_performance,labour law,"[0.005319107323884964, 0.049131620675325394, 0...",0.044,0.118


### Getting the PDF File Name back

In [61]:
pdf_name_files = os.listdir(r"..\Data\Resultate\TF-IDF 40")

pdf_name = []


for file in pdf_name_files:
    if file[-14:-4] == "bow_tf_ifd":
        df_pdf_name = pd.read_csv(
            fr"..\Data\Resultate\TF-IDF 40\{file}", index_col=0) 
        # df_pdf_name = df_simscore_para.rename(columns={"Score": "Paragraphed Cosine Similarity",})      
        new_col = file[:-15]
        # print(new_col)
        idx = 0
        df_pdf_name.insert(loc=idx, column='Company', value=new_col)
        pdf_name.append(df_pdf_name)
        
    else:
        print("didnt work")
df_pdf_name = pd.concat(pdf_name)


### Labeling Fake Reports

In [95]:
fake_reports = os.listdir(r"..\Data\Nachhaltigkeitsberichte\Fake")
fake_reports_en = []
for report in fake_reports:
    report = report[:-4]
    fake_reports_en.append(report)

german_reports = os.listdir(r"..\Data\Nachhaltigkeitsberichte\DE")
de_reports = []
for report in german_reports:
    report = report[:-4]
    de_reports.append(report)

list_of_name = df_pdf_name["PDF Name"].tolist()

compiler_list = []

for report in list_of_name:
    if report in fake_reports_en:
        print("Fake Report!")
        compiler_list.append("Fake")
    elif report in de_reports:
        print("DE Report!")
        compiler_list.append("DE")
    else:
        compiler_list.append("Normal Report")

df_pdf_name["Report Type"] = compiler_list

Fake Report!
DE Report!
DE Report!
DE Report!
Fake Report!
Fake Report!
DE Report!
Fake Report!
Fake Report!


In [96]:
len(df_pdf_name)

118

In [93]:
df_pdf_name[df_pdf_name["Report Type"] == "Fake"]

Unnamed: 0,Company,PDF Name,TF-IDF,Top N Words,Company Name,Report Type
0,america_rome_roman,Ancient Rome Did Not Fall_ Why Real Story is E...,"[('goth', 0.2164768687769304), ('with', 0.2105...","[('america', 25), ('rome', 22), ('roman', 20),...",america_rome_roman,Fake
0,effective_said_million,How the Collapse of Sam Bankman-Fried’s Crypto...,"[('disrupted', 0.3463513040823731), ('http', 0...","[('effective', 16), ('said', 14), ('million', ...",effective_said_million,Fake
0,football_german_team,"Opinion _ Germany’s Coach Is Out of His Depth,...","[('out', 0.30094334381457943), ('new', 0.30094...","[('football', 10), ('german', 9), ('team', 9),...",football_german_team,Fake
0,palace_race_buckingham,Buckingham Palace race row raises awkward ques...,"[('royal', 0.2919344592693881), ('awkward', 0....","[('palace', 17), ('race', 16), ('buckingham', ...",palace_race_buckingham,Fake
0,team_manager_design,Avoid the Reorg from Hell with Six Key Princip...,"[('hotmail', 0.3193639801508921), ('that', 0.2...","[('team', 42), ('manager', 37), ('design', 28)...",team_manager_design,Fake


### Merging Center

In [69]:
result_zero_shot_fulltext = pd.DataFrame(df_topic_modeler)
result_zero_shot_fulltext

Unnamed: 0,Company,Label,Zero Shot Fulltext
0,abb_sustainability_performance,social issues,0.249538
1,abb_sustainability_performance,human rights,0.226750
2,abb_sustainability_performance,sustainability,0.224777
3,abb_sustainability_performance,labour law,0.215665
4,abb_sustainability_performance,fraud,0.083270
...,...,...,...
0,zurich_commitment_insurance,sustainability,0.380456
1,zurich_commitment_insurance,human rights,0.190758
2,zurich_commitment_insurance,labour law,0.181221
3,zurich_commitment_insurance,social issues,0.176646


Unnamed: 0,Company,Label,Zero Shot Learning Sentence Raw
0,abb_sustainability_performance,sustainability,"[0.7101063132286072, 0.41432586312294006, 0.50..."
1,abb_sustainability_performance,human rights,"[0.22032485902309418, 0.21426941454410553, 0.1..."
2,abb_sustainability_performance,fraud,"[0.057856060564517975, 0.16787868738174438, 0...."
3,abb_sustainability_performance,social issues,"[0.0063936966471374035, 0.1543944627046585, 0...."
4,abb_sustainability_performance,labour law,"[0.005319107323884964, 0.049131620675325394, 0..."
...,...,...,...
0,zurich_commitment_insurance,sustainability,"[0.3816305696964264, 0.8076730966567993, 0.460..."
1,zurich_commitment_insurance,human rights,"[0.3158322870731354, 0.17286501824855804, 0.18..."
2,zurich_commitment_insurance,fraud,"[0.16614747047424316, 0.011172414757311344, 0...."
3,zurich_commitment_insurance,social issues,"[0.11318854987621307, 0.0064867655746638775, 0..."


In [85]:
result_df_sentence_cosine_similarity_list_list = pd.merge(result_zero_shot_fulltext, df_sentence_cosine_similarity_list_list, on=["Company", "Label"])
result_df_sentence_cosine_similarity_list_list = pd.merge(result_zero_shot_fulltext, df_sentence_cosine_similarity_list_list, on=["Company", "Label"])
result_df_sentence_cosine_similarity_list_hr = pd.merge(result_df_sentence_cosine_similarity_list_list, df_sentence_cosine_similarity_list_hr, on=["Company", "Label"])
result_df_zero_shot_sentence = pd.merge(result_df_sentence_cosine_similarity_list_hr, df_zero_shot_sentence, on=["Company", "Label"])
result_df_zero_shot_sentence.head()



Unnamed: 0,Company,Label,Zero Shot Fulltext,Sentence Cosine Similarity Wiki Summarized,Sentence Cosine Similarity Wiki Summarized Mean,Sentence Cosine Similarity Wiki Summarized Mean Threshold 0,Sentence by Sentence Cosine Similarity human rights,Sentence by Sentence Cosine Similarity human rights mean,Sentence by Sentence Cosine Similarity human rights Mean Threshold 0,Zero Shot Learning Sentence Raw,Zero Shot Learning Sentence Mean,Zero Shot Learning Sentence Mean Threshold
0,abb_sustainability_performance,social issues,0.249538,"[tensor([[0.1108]]), tensor([[0.0350]]), tenso...",0.071,0.104,"[tensor([[0.1108]]), tensor([[0.0350]]), tenso...",0.071,0.104,"[0.0063936966471374035, 0.1543944627046585, 0....",0.086,0.135
1,abb_sustainability_performance,human rights,0.22675,"[tensor([[0.1447]]), tensor([[0.0357]]), tenso...",0.078,0.114,"[tensor([[0.1359]]), tensor([[0.0887]]), tenso...",0.098,0.125,"[0.22032485902309418, 0.21426941454410553, 0.1...",0.161,0.207
2,abb_sustainability_performance,sustainability,0.224777,"[tensor([[0.3533]]), tensor([[0.0915]]), tenso...",0.169,0.188,"[tensor([[0.3533]]), tensor([[0.0915]]), tenso...",0.169,0.188,"[0.7101063132286072, 0.41432586312294006, 0.50...",0.597,0.597
3,abb_sustainability_performance,labour law,0.215665,"[tensor([[0.0646]]), tensor([[0.0733]]), tenso...",0.045,0.106,"[tensor([[0.0646]]), tensor([[0.0733]]), tenso...",0.045,0.106,"[0.005319107323884964, 0.049131620675325394, 0...",0.044,0.118
4,abb_sustainability_performance,fraud,0.08327,"[tensor([[-0.0050]]), tensor([[0.0324]]), tens...",-0.005,0.062,"[tensor([[-0.0050]]), tensor([[0.0324]]), tens...",-0.005,0.062,"[0.057856060564517975, 0.16787868738174438, 0....",0.112,0.164


In [90]:
final_result = pd.merge(result_df_zero_shot_sentence, df_pdf_name, on=["Company"])
final_result = final_result.drop(['Company Name'], axis=1)
final_result.head()

Unnamed: 0,Company,Label,Zero Shot Fulltext,Sentence Cosine Similarity Wiki Summarized,Sentence Cosine Similarity Wiki Summarized Mean,Sentence Cosine Similarity Wiki Summarized Mean Threshold 0,Sentence by Sentence Cosine Similarity human rights,Sentence by Sentence Cosine Similarity human rights mean,Sentence by Sentence Cosine Similarity human rights Mean Threshold 0,Zero Shot Learning Sentence Raw,Zero Shot Learning Sentence Mean,Zero Shot Learning Sentence Mean Threshold,PDF Name,TF-IDF,Top N Words,Report Type
0,abb_sustainability_performance,social issues,0.249538,"[tensor([[0.1108]]), tensor([[0.0350]]), tenso...",0.071,0.104,"[tensor([[0.1108]]), tensor([[0.0350]]), tenso...",0.071,0.104,"[0.0063936966471374035, 0.1543944627046585, 0....",0.086,0.135,abb-group-sustainability-performance-report-2015,"[('performance', 0.25703375969085324), ('susta...","[('abb', 242), ('sustainability', 218), ('perf...",Normal Report
1,abb_sustainability_performance,human rights,0.22675,"[tensor([[0.1447]]), tensor([[0.0357]]), tenso...",0.078,0.114,"[tensor([[0.1359]]), tensor([[0.0887]]), tenso...",0.098,0.125,"[0.22032485902309418, 0.21426941454410553, 0.1...",0.161,0.207,abb-group-sustainability-performance-report-2015,"[('performance', 0.25703375969085324), ('susta...","[('abb', 242), ('sustainability', 218), ('perf...",Normal Report
2,abb_sustainability_performance,sustainability,0.224777,"[tensor([[0.3533]]), tensor([[0.0915]]), tenso...",0.169,0.188,"[tensor([[0.3533]]), tensor([[0.0915]]), tenso...",0.169,0.188,"[0.7101063132286072, 0.41432586312294006, 0.50...",0.597,0.597,abb-group-sustainability-performance-report-2015,"[('performance', 0.25703375969085324), ('susta...","[('abb', 242), ('sustainability', 218), ('perf...",Normal Report
3,abb_sustainability_performance,labour law,0.215665,"[tensor([[0.0646]]), tensor([[0.0733]]), tenso...",0.045,0.106,"[tensor([[0.0646]]), tensor([[0.0733]]), tenso...",0.045,0.106,"[0.005319107323884964, 0.049131620675325394, 0...",0.044,0.118,abb-group-sustainability-performance-report-2015,"[('performance', 0.25703375969085324), ('susta...","[('abb', 242), ('sustainability', 218), ('perf...",Normal Report
4,abb_sustainability_performance,fraud,0.08327,"[tensor([[-0.0050]]), tensor([[0.0324]]), tens...",-0.005,0.062,"[tensor([[-0.0050]]), tensor([[0.0324]]), tens...",-0.005,0.062,"[0.057856060564517975, 0.16787868738174438, 0....",0.112,0.164,abb-group-sustainability-performance-report-2015,"[('performance', 0.25703375969085324), ('susta...","[('abb', 242), ('sustainability', 218), ('perf...",Normal Report


In [97]:
len(final_result)

590

### Safe CSV for further analysis

In [92]:
final_result.to_csv(r'..\Data\Resultate\combined_results.csv')
