# Klassifikation

In [1]:
# pip install transformers
# pip install tensorflow

In [2]:
#pip install pandas

In [3]:
from transformers import pipeline
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
text = """Dear Amazon, last week I ordered an Optimus Prime action figure
from your online store in Germany. Unfortunately, when I opened the package,
I discovered to my horror that I had been sent an action figure of Megatron
instead! As a lifelong enemy of the Decepticons, I hope you can understand my
dilemma. To resolve the issue, I demand an exchange of Megatron for the
Optimus Prime figure I ordered. Enclosed are copies of my records concerning
this purchase. I expect to hear from you soon. Sincerely, Bumblebee."""

example_sentences = [
    "I love this product! It has changed my life for the better.",
    "This is the worst purchase I have ever made. Completely dissatisfied.",
    "The quality is okay, but the price is too high.",
    "Excellent service and fast shipping. Highly recommend!",
    "The item arrived broken and customer service was unhelpful.",
    "I dont know.",
    "The government should fund renewable energy. It's environmentally friendly, and fossil fuels are running out."
]

argument_sentences = [
    "The government should increase the minimum wage to help low-income workers.",
    "Climate change is the most significant threat facing humanity today.",
    "We need stricter gun control laws to reduce violence.",
    "Renewable energy sources are essential for a sustainable future.",
    "Healthcare should be a universal right, not a privilege.",
    "I went to the store to buy some groceries.",
    "The sky is blue and the grass is green.",
    "She enjoys reading books in her free time.",
    "He is a software engineer at a tech company.",
    "They are planning a vacation to Europe next summer."
]

In [14]:
model = "chkla/roberta-argument"
classifier = pipeline('text-classification', model=model)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [15]:
# get possible labels
classifier.model.config.id2label

{0: 'NON-ARGUMENT', 1: 'ARGUMENT'}

In [19]:
output = classifier(argument_sentences)
pd.DataFrame(output)

# build a dataframe with sentences and their predicted labels
df = pd.DataFrame(output)
df['sentence'] = argument_sentences
df

Unnamed: 0,label,score,sentence
0,ARGUMENT,0.949186,The government should increase the minimum wag...
1,ARGUMENT,0.718152,Climate change is the most significant threat ...
2,ARGUMENT,0.96335,We need stricter gun control laws to reduce vi...
3,ARGUMENT,0.878091,Renewable energy sources are essential for a s...
4,ARGUMENT,0.808245,"Healthcare should be a universal right, not a ..."
5,NON-ARGUMENT,0.969055,I went to the store to buy some groceries.
6,NON-ARGUMENT,0.894958,The sky is blue and the grass is green.
7,NON-ARGUMENT,0.914584,She enjoys reading books in her free time.
8,NON-ARGUMENT,0.991769,He is a software engineer at a tech company.
9,NON-ARGUMENT,0.961836,They are planning a vacation to Europe next su...


# Text2Text

In [1]:
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pipe = pipeline("text2text-generation", model="google/flan-t5-base")






In [3]:
# use the pipeline to prompt the model with a text
pipe("translate English to Spanish: Hello, how are you?")
# output: [{'translation_text': 'Bonjour, comment ça va ?'}]



[{'generated_text': 'Hi, cómo estás?'}]

In [4]:
pipe("What is the capital of France?")

[{'generated_text': 'london'}]

In [27]:
#!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp312-cp312-win_amd64.whl.metadata (8.3 kB)
Downloading sentencepiece-0.2.0-cp312-cp312-win_amd64.whl (991 kB)
   ---------------------------------------- 0.0/992.0 kB ? eta -:--:--
   --------------------- ------------------ 524.3/992.0 kB 2.8 MB/s eta 0:00:01
   ---------------------------------------- 992.0/992.0 kB 3.3 MB/s eta 0:00:00
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.2.0


In [5]:
from transformers import T5Tokenizer, T5ForConditionalGeneration


tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

input_text = "translate English to German: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


<pad> Wie old sind Sie?</s>


# BERT

In [3]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the sentences
def tokenize_function(sentences):
    return tokenizer(sentences, padding="max_length", truncation=True, return_tensors="pt")

# Prepare the dataset
class ArgumentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Example labels (0 for non-argument, 1 for argument)
labels = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]

# Tokenize the sentences
encodings = tokenize_function(argument_sentences)

# Create the dataset
dataset = ArgumentDataset(encodings, labels)

# Load the BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Use the model for predictions
model.eval()
with torch.no_grad():
    outputs = model(**encodings)
    predictions = torch.argmax(outputs.logits, dim=-1)

# Print the predictions
for sentence, prediction in zip(argument_sentences, predictions):
    print(f"Sentence: {sentence}\nPrediction: {prediction.item()}\n")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Sentence: The government should increase the minimum wage to help low-income workers.
Prediction: 1

Sentence: Climate change is the most significant threat facing humanity today.
Prediction: 1

Sentence: We need stricter gun control laws to reduce violence.
Prediction: 1

Sentence: Renewable energy sources are essential for a sustainable future.
Prediction: 1

Sentence: Healthcare should be a universal right, not a privilege.
Prediction: 1

Sentence: I went to the store to buy some groceries.
Prediction: 1

Sentence: The sky is blue and the grass is green.
Prediction: 1

Sentence: She enjoys reading books in her free time.
Prediction: 1

Sentence: He is a software engineer at a tech company.
Prediction: 1

Sentence: They are planning a vacation to Europe next summer.
Prediction: 1



# langchain

In [1]:
#pip install langchain

Collecting langchain
  Downloading langchain-0.3.2-py3-none-any.whl.metadata (7.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.35-cp312-cp312-win_amd64.whl.metadata (9.9 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.10.9-cp312-cp312-win_amd64.whl.metadata (7.8 kB)
Collecting langchain-core<0.4.0,>=0.3.8 (from langchain)
  Downloading langchain_core-0.3.9-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.131-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp<4.0.0,>=3.8.3->langchain)
  Downloading aiohappyeyeballs-2.4.3-py3-none-any.whl.metadata (6.1 kB)
Colle