In [3]:
from transformers import pipeline

import yaml
import pandas as pd

def load_yaml_file(file_path):
    # reads the yml files as a dictionary, were each topic is a key and the values are a list of elements
    with open(file_path, "r", encoding='UTF-8') as stream:
        yaml_dict = yaml.safe_load(stream)
        return yaml_dict




def zeroshotNLP(text):
    topics = load_yaml_file('data/topic_g.yml')
    topic_list=[x.lower() for x in list(topics.keys())+['None']]
    zeroshot = pipeline("zero-shot-classification",
                      model="valhalla/distilbart-mnli-12-1")

    preddict = zeroshot(text, topic_list)

    return pd.DataFrame(preddict).drop("sequence",axis =1).head(3)

In [4]:
preddict = zeroshotNLP("Ich mag politische Diskussionen am meisten en die sind toll!")

In [15]:
pd.DataFrame(preddict).drop("sequence",axis =1).head(3).index 

RangeIndex(start=0, stop=3, step=1)

In [16]:

def hatespeachNLP(text):
    hate_model_path = "Hate-speech-CNERG/dehatebert-mono-german"
    hate_task = pipeline(
        "text-classification", model=hate_model_path, tokenizer=hate_model_path
    )
    preddict = hate_task(text)[0]
    return preddict

In [17]:
preddict = hatespeachNLP("Ich mag politische Diskussionen am meisten en die sind toll!")

In [18]:
preddict

{'label': 'NON_HATE', 'score': 0.9873091578483582}

In [36]:
x = str(round(preddict["score"],3)*100)
y = preddict["label"]

In [37]:
f"Mit einer Wahrscheinlichkeit von \033[1m{x}\033[0m % sagt das Modell {y} vorraus."

'Mit einer Wahrscheinlichkeit von \x1b[1m98.7\x1b[0m % sagt das Modell NON_HATE vorraus.'

In [35]:
x

98.7

In [4]:

def sentimentNLP(text):
    sentiment_model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
    sentiment_task = pipeline(
        "sentiment-analysis", model=sentiment_model_path, tokenizer=sentiment_model_path
    )
    preddict = sentiment_task(text)
    return preddict

In [5]:
preddict = sentimentNLP("Ich mag politische Diskussionen am meisten denn die sind toll!")
preddict

[{'label': 'positive', 'score': 0.941289484500885}]

In [4]:
import plotly.express as px
import pandas as pd

df = pd.read_csv("test.csv",index_col="Unnamed: 0")

df

Unnamed: 0,1 Pred,1 Pred Scores,2 Pred,2 Pred Scores,3 Pred,3 Pred Scores
0,bildung,0.234879,gesundheit,0.159915,soziale ursachen,0.117303
1,bildung,0.411706,gesundheit,0.215966,wissenschaft,0.066933
2,bildung,0.373872,gesundheit,0.281383,klima,0.08369
3,bildung,0.372408,gesundheit,0.282481,soziale ursachen,0.085872


In [18]:
df2 = pd.read_csv("test2.csv",index_col="Unnamed: 0").reset_index()
df2 = df2.rename({"index":"Pred"}, axis='columns')
df2["Pred"] = df2["Pred"].replace(0,"Pred 1")
df2["Pred"] = df2["Pred"].replace(1,"Pred 2")
df2["Pred"] = df2["Pred"].replace(2,"Pred 3")

df2

Unnamed: 0,Pred,labels,scores
0,Pred 1,bildung,0.234879
1,Pred 2,gesundheit,0.159915
2,Pred 3,soziale ursachen,0.117303
3,Pred 1,bildung,0.411706
4,Pred 2,gesundheit,0.215966
5,Pred 3,wissenschaft,0.066933
6,Pred 1,bildung,0.373872
7,Pred 2,gesundheit,0.281383
8,Pred 3,klima,0.08369
9,Pred 1,bildung,0.372408


In [20]:
fig = px.bar(df2, x="Pred", y="scores", color="labels", title="Long-Form Input")
fig.show()