In [1]:
pip install dspy-ai

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install anthropic

Note: you may need to restart the kernel to use updated packages.


# Installing Packages

In [3]:
import os
import anthropic
import re
import pandas as pd
import numpy as np
import torch
import sklearn
from sklearn.model_selection import train_test_split

In [4]:
import litellm

In [5]:
import dspy

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
litellm.drop_params = True

# Loading Data

In [7]:
df_anno_train = pd.read_csv('plasticity_focalization_trainset.csv')

In [8]:
df_anno_train['Fokalisierung'] = df_anno_train['Fokalisierung'].replace({
    'intern': 'internal',
    'extern': 'external',
    'null': 'zero',
    None: 'zero',
    np.nan: 'zero'
})

In [9]:
df_anno_test = pd.read_csv('plasticity_2025_Anno_DEU_test_2nd_run.csv')

In [10]:
df_anno_test['Fokalisierung'] = df_anno_test['Fokalisierung'].replace({
    'intern': 'internal',
    'extern': 'external',
    'null': 'zero',
    None: 'zero',
    np.nan: 'zero'
})

In [11]:
df_anno_train.head()

Unnamed: 0,Autor,Titel,Absatz,Fokalisierung,Kommentar,Link
0,Goethe,Die Sängerin Antonelli,"Als ich mich in Neapel aufhielt, begegnete das...",internal,,https://www.projekt-gutenberg.org/goethe/anton...
1,Goethe,Die Sängerin Antonelli,"Eine Sängerin, Antonelli genannt, war zu meine...",internal,,https://www.projekt-gutenberg.org/goethe/anton...
2,Goethe,Die Sängerin Antonelli,Bei ihren bisherigen Verbindungen war ihr Geis...,internal,,https://www.projekt-gutenberg.org/goethe/anton...
3,Goethe,Die Sängerin Antonelli,"Es war ein Genueser, der sich um diese Zeit ei...",internal,,https://www.projekt-gutenberg.org/goethe/anton...
4,Tieck,Das grüne Band,Durch die Thäler und über die Wiesen wandelte ...,external,,https://www.projekt-gutenberg.org/tieck/grueba...


In [12]:
def balanced_sample_by_category(df, category_column, n_per_category=8, random_state=42):
    """
    Gibt ein balanciertes Sample aus dem DataFrame zurück mit n_per_category Einträgen pro Kategorie.

    Args:
        df (pd.DataFrame): Eingabedaten.
        category_column (str): Spaltenname, nach dem kategorisiert werden soll.
        n_per_category (int): Anzahl von Einträgen pro Kategorie.
        random_state (int): Seed für Reproduzierbarkeit.

    Returns:
        pd.DataFrame: Balanciertes Sample.
    """
    # Fehlende Kategorien ausschließen
    df_clean = df.dropna(subset=[category_column])

    # Alle eindeutigen Kategorien abrufen
    categories = df_clean[category_column].unique()

    # Sampling durchführen
    balanced_df = pd.concat([
        df_clean[df_clean[category_column] == cat].sample(
            n=min(n_per_category, len(df_clean[df_clean[category_column] == cat])),
            random_state=random_state
        )
        for cat in categories
    ])

    # Index zurücksetzen
    return balanced_df.reset_index(drop=True)

In [13]:
df_train_balanced = balanced_sample_by_category(df_anno_train, category_column="Fokalisierung", n_per_category=8)

In [14]:
df_train_balanced.describe()

Unnamed: 0,Autor,Titel,Absatz,Fokalisierung,Kommentar,Link
count,24,24,24,24,0.0,24
unique,14,14,24,3,0.0,14
top,Brentano,Baron Hüpfenstich,"Als ich mich in Neapel aufhielt, begegnete das...",internal,,https://www.projekt-gutenberg.org/brentano/hue...
freq,3,3,1,8,,3


# Preparing Dataset

Anleitung zur Erstellung eines Datensets: https://dspy-docs.vercel.app/docs/deep-dive/data-handling/loading-custom-data

In [15]:
from dspy.datasets.dataset import Dataset

In [16]:
train = df_train_balanced[["Absatz", "Fokalisierung"]].copy(deep=True)

In [17]:
train.head()

Unnamed: 0,Absatz,Fokalisierung
0,"Als ich mich in Neapel aufhielt, begegnete das...",internal
1,"Eine große Sorge hatte der gute König jetzt, d...",internal
2,"Es ist doch etwas Schönes, Herrliches, Erhaben...",internal
3,"Eine Sängerin, Antonelli genannt, war zu meine...",internal
4,Einen anderen Weg schlag ich ein; er ist aller...,internal


In [18]:
len(train)

24

In [19]:
class CSVDataset(Dataset):
    def __init__(self, df, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        
        df=df
        #self._train = df.iloc[0:35].to_dict(orient='records')
        self._dev = df.to_dict(orient='records')

In [20]:
dataset = CSVDataset(train)

In [21]:
len(dataset.dev)

24

In [22]:
#print(dataset.train[:3])

In [23]:
print(dataset.dev[:3])

[Example({'Absatz': "Als der Tag anbrach, noch ehe die Sonne aufgegangen war, kam schon die Frau und weckte die beiden Kinder: »Steht auf, ihr Faulenzer, wir wollen in den Wald gehen und Holz holen.« Dann gab sie jedem ein Stückchen Brot und sprach: »Da habt ihr etwas für den Mittag, aber eßt's nicht vorher auf, weiter kriegt ihr nichts.« Gretel nahm das Brot unter die Schürze, weil Hänsel die Steine in der Tasche hatte. Danach machten sie sich alle zusammen auf den Weg nach dem Wald. Als sie ein Weilchen gegangen waren, stand Hänsel still und guckte nach dem Haus zurück und tat das wieder und immer wieder. Der Vater sprach: »Hänsel, was guckst du da und bleibst zurück, hab acht und vergiß deine Beine nicht!« »Ach, Vater«, sagte Hänsel, »ich sehe nach meinem weißen Kätzchen, das sitzt oben auf dem Dach und will mir Ade sagen.« Die Frau sprach: »Narr, das ist dein Kätzchen nicht, das ist die Morgensonne, die auf den Schornstein scheint.« Hänsel aber hatte nicht nach dem Kätzchen gesehen

# Setting LLM

In [24]:
dspy.settings.configure(
    cache=None
)

In [25]:
api_key = os.getenv('MY_ANTHROPIC')

In [26]:
opus = dspy.LM('claude-3-5-haiku-20241022', api_key=api_key)

In [27]:
dspy.settings.configure(lm=opus)

# Setting Up Module + checking output

In [28]:
class Determinacy(dspy.Signature):
    """
    Your task is to classify the focalization of the following sentence 
    
    ### Labels
    There are three modes of focalization:
    - internal: A text passage is internally focalized precisely when a perceptual process is part of the depicted event and is presented from the perspective of a character.
    - external: A text passage is externally focalized precisely when a perceptual process is part of the depicted event and could be presented from the perspective of a character.
    - zero: A text passage is zero focalized precisely when circumstances of the narrated world are described as if they were independent of a particular perceptual process of a person or are not possible for a person to perceive synchronously.
    """
    #context = dspy.InputField(desc="contains annotation guidelines and scoring instructions")
    text_snippet = dspy.InputField(desc="contains a snippet of a narrative text")
    tag = dspy.OutputField(desc="contains only the **label** in lower case")

In [29]:
results = []
for text_snippet in train.Absatz[:5]:
    print(text_snippet)
    classify = dspy.ChainOfThought(Determinacy)
    response = classify(text_snippet=text_snippet)
    print(response)
    #print(response.tag)
    results.append(response.tag)

Als ich mich in Neapel aufhielt, begegnete daselbst eine Geschichte, die großes Aufsehen erregte und worüber die Urteile sehr verschieden waren. Die einen behaupteten, sie sei völlig ersonnen, die andern, sie sei wahr, aber es stecke ein Betrug dahinter. Diese Partei war wieder untereinander selbst uneinig; sie stritten, wer dabei betrogen haben könnte. Noch andere behaupteten, es sei keineswegs ausgemacht, daß geistige Naturen nicht sollten auf Elemente und Körper wirken können, und man müsse nicht jede wunderbare Begebenheit ausschließlich entweder für Lüge oder Trug erklären. Nun zur Geschichte selbst!
Prediction(
    reasoning='This text passage is zero focalized because it presents a narrative description of a situation and various perspectives without being anchored to a specific character\'s perceptual experience. The narrator is describing a story and the different opinions surrounding it in a detached, omniscient manner. The text provides an overview of multiple viewpoints abo

In [30]:
results

['zero', 'external', 'internal', 'external', 'internal']

# Setting Metric

Anleitung zu den Metriken in DSPY: https://dspy-docs.vercel.app/docs/building-blocks/metrics

In [31]:
from dspy.evaluate import Evaluate

In [32]:
def validate_tag(example, pred, trace=None):
    print(example.answer)
    print(pred.tag)
    return example.answer in pred.tag

# Trying out the Signature Optimizer

Anleitung zur Arbeit mit dem Optimizer bei zero-shot: https://dspy-docs.vercel.app/docs/deep-dive/teleprompter/signature-optimizer

In [33]:
class DeterminacyPipe(dspy.Module):
    def __init__(self):
        super().__init__()
        
        self.signature = Determinacy
        self.predictor = dspy.ChainOfThought(self.signature)
        
    def forward(self, text_snippet):
        result = self.predictor(text_snippet=text_snippet)
        return dspy.Prediction(
            tag = result.tag
        )      

In [34]:
devset = dataset.dev

In [35]:
evaluate = Evaluate(devset=devset, metric=validate_tag, num_threads=3, display_progress=True, display_table=True)

In [36]:
event_baseline = DeterminacyPipe()
devset_with_input = [dspy.Example({"text_snippet": r["Absatz"], "answer": r["Fokalisierung"]}).with_inputs("context", "text_snippet") for r in devset]

In [37]:
evaluate(event_baseline, devset=devset_with_input)

internal
zero
external
internal
zero
zero
external
zero
  0%|                                                    | 0/24 [00:00<?, ?it/s]
internal
internal
internal
Average Metric: 1.00 / 1 (100.0%):   0%|                 | 0/24 [00:00<?, ?it/s]
zero
internal
internal
internal
internal
external
zero
internal
external
zero
external
zero
internal
zero
zero
zeroage Metric: 2.00 / 2 (100.0%):   4%|▎       | 1/24 [00:00<00:00, 193.71it/s]
external
Average Metric: 2.00 / 3 (66.7%):   8%|▊        | 2/24 [00:00<00:00, 313.97it/s]zero
zero
internal
internal
external
internal
Average Metric: 2.00 / 4 (50.0%):  12%|█▏       | 3/24 [00:00<00:00, 298.92it/s]zero
zero
internal
external
external
zero
Average Metric: 3.00 / 5 (60.0%):  17%|█▌       | 4/24 [00:00<00:00, 282.60it/s]external
zero
Average Metric: 3.00 / 6 (50.0%):  21%|█▉       | 5/24 [00:00<00:00, 335.90it/s]external
zero
externalMetric: 3.00 / 7 (42.9%):  25%|██▎      | 6/24 [00:00<00:00, 372.83it/s]
internal
Average Metric: 10.00 / 24 (

2025/04/18 09:29:18 INFO dspy.evaluate.evaluate: Average Metric: 10 / 24 (41.7%)





Unnamed: 0,text_snippet,answer,tag,validate_tag
0,"Als der Tag anbrach, noch ehe die Sonne aufgegangen war, kam schon...",external,internal,
1,"Als ich mich in Neapel aufhielt, begegnete daselbst eine Geschicht...",internal,zero,
2,In den letzten Jahrzehnten ist das Interesse an Hungerkünstlern se...,zero,zero,✔️ [True]
3,"Wie gesagt, die Hand warf mich wieder zur Erde. Bald darauf erfaßt...",internal,internal,✔️ [True]
4,"Aber da keine Krankheit in ihm war, so war der Gedanke nicht graue...",internal,internal,✔️ [True]
5,"Es blieb daher nur noch die andere Seite neben dem Herrenkreuz, un...",external,zero,
6,"In M..., einer bedeutenden Stadt im oberen Italien, ließ die verwi...",zero,zero,✔️ [True]
7,"Die Jugend, welche die beiden Freunde Aeins und Azwei verband, war...",zero,external,
8,"Wenn man in jenen Tagen ein Ding durch die Fichtau bringen wollte,...",external,zero,
9,"Einen anderen Weg schlag ich ein; er ist allerdings etwas weit, ab...",internal,internal,✔️ [True]


41.67

# Using Copro

In [38]:
from dspy.teleprompt import COPRO, MIPROv2

In [40]:
teleprompter = dspy.teleprompt.COPRO(
    program_mode="basic",
    init_temperature=0.4,  
    breadth=4,
    metric=validate_tag,
)

In [None]:
#teleprompter = COPRO(
    #metric=validate_tag,
    #auto="light"
#)

In [41]:
kwargs = dict(num_threads=3, display_progress=True, display_table=0) # Used in Evaluate class in the optimization process
compiled_prompt_opt = teleprompter.compile(DeterminacyPipe(), trainset=devset_with_input, eval_kwargs=kwargs)

2025/04/18 09:29:58 INFO dspy.teleprompt.copro_optimizer: Iteration Depth: 1/3.
2025/04/18 09:29:58 INFO dspy.teleprompt.copro_optimizer: At Depth 1/3, Evaluating Prompt Candidate #1/2 for Predictor 1 of 1.


internal                                                 | 0/24 [00:00<?, ?it/s]
mixed focalization
externalMetric: 0.00 / 1 (0.0%):   4%|▍          | 1/24 [00:10<04:11, 10.95s/it]
internal
zeroage Metric: 0.00 / 2 (0.0%):   8%|▉          | 2/24 [00:12<01:58,  5.38s/it]
zero focalization
internalMetric: 1.00 / 3 (33.3%):  12%|█▎        | 3/24 [00:12<01:05,  3.13s/it]
internal focalization
internalMetric: 2.00 / 4 (50.0%):  17%|█▋        | 4/24 [00:19<01:30,  4.51s/it]
internal
externalMetric: 3.00 / 5 (60.0%):  21%|██        | 5/24 [00:20<01:02,  3.27s/it]
external
zeroage Metric: 4.00 / 6 (66.7%):  25%|██▌       | 6/24 [00:21<00:43,  2.43s/it]
zero focalization
externalMetric: 5.00 / 7 (71.4%):  29%|██▉       | 7/24 [00:30<01:19,  4.65s/it]
zero focalization
zeroage Metric: 5.00 / 8 (62.5%):  33%|███▎      | 8/24 [00:30<00:52,  3.28s/it]
zero focalization
internalMetric: 6.00 / 9 (66.7%):  38%|███▊      | 9/24 [00:33<00:45,  3.02s/it]
internal
internalMetric: 7.00 / 10 (70.0%):  42%|█

2025/04/18 09:31:39 INFO dspy.evaluate.evaluate: Average Metric: 16 / 24 (66.7%)
2025/04/18 09:31:39 INFO dspy.teleprompt.copro_optimizer: At Depth 1/3, Evaluating Prompt Candidate #2/2 for Predictor 1 of 1.



external
internal
internal
zero
zero
zero
internal
internal
internal
internal
external
zero
external
zero
zero
external
zero
zero
internal
internal
zero
internal
internal
external
zero
zero
internal
internal
external
internal
zero
zero
external
zero
zero
zero
external
zero
internal
external
external
internal
external
zero
internal
internal
zero
external
Average Metric: 10.00 / 24 (41.7%): 100%|█████| 24/24 [00:00<00:00, 3745.89it/s]

2025/04/18 09:31:39 INFO dspy.evaluate.evaluate: Average Metric: 10 / 24 (41.7%)





2025/04/18 09:31:52 INFO dspy.teleprompt.copro_optimizer: Iteration Depth: 2/3.
2025/04/18 09:31:52 INFO dspy.teleprompt.copro_optimizer: At Depth 2/3, Evaluating Prompt Candidate #1/1 for Predictor 1 of 1.


internal                                                 | 0/24 [00:00<?, ?it/s]
internal focalization
externalMetric: 1.00 / 1 (100.0%):   4%|▍        | 1/24 [00:12<04:49, 12.58s/it]
external focalization
zeroage Metric: 2.00 / 2 (100.0%):   8%|▊        | 2/24 [00:14<02:15,  6.15s/it]
zero focalization
internalMetric: 3.00 / 3 (100.0%):  12%|█▏       | 3/24 [00:14<01:17,  3.67s/it]
internal focalization
externalMetric: 4.00 / 4 (100.0%):  17%|█▌       | 4/24 [00:21<01:33,  4.66s/it]
external
internalMetric: 5.00 / 5 (100.0%):  21%|█▉       | 5/24 [00:23<01:09,  3.67s/it]
internal focalization
zeroage Metric: 6.00 / 6 (100.0%):  25%|██▎      | 6/24 [00:24<00:54,  3.01s/it]
zero focalization
zeroage Metric: 7.00 / 7 (100.0%):  29%|██▋      | 7/24 [00:29<01:01,  3.63s/it]
zero focalization
externalMetric: 8.00 / 8 (100.0%):  33%|███      | 8/24 [00:30<00:41,  2.60s/it]
zero focalization
internalMetric: 8.00 / 9 (88.9%):  38%|███▊      | 9/24 [00:31<00:33,  2.24s/it]
internal focalization

2025/04/18 09:33:09 INFO dspy.evaluate.evaluate: Average Metric: 15 / 24 (62.5%)





2025/04/18 09:33:21 INFO dspy.teleprompt.copro_optimizer: Iteration Depth: 3/3.
2025/04/18 09:33:21 INFO dspy.teleprompt.copro_optimizer: At Depth 3/3, Evaluating Prompt Candidate #1/1 for Predictor 1 of 1.


zero|                                                    | 0/24 [00:00<?, ?it/s]
external zero
internalMetric: 1.00 / 1 (100.0%):   4%|▍        | 1/24 [00:08<03:22,  8.80s/it]
zero focalization
externalMetric: 1.00 / 2 (50.0%):   8%|▊         | 2/24 [00:09<01:24,  3.84s/it]
zero focalization
internalMetric: 1.00 / 3 (33.3%):  12%|█▎        | 3/24 [00:09<00:51,  2.43s/it]
zero
internalMetric: 1.00 / 4 (25.0%):  17%|█▋        | 4/24 [00:17<01:31,  4.58s/it]
zero
externalMetric: 1.00 / 5 (20.0%):  21%|██        | 5/24 [00:18<01:00,  3.20s/it]
zero
externalMetric: 1.00 / 6 (16.7%):  25%|██▌       | 6/24 [00:18<00:38,  2.17s/it]
zero
zeroage Metric: 1.00 / 7 (14.3%):  29%|██▉       | 7/24 [00:27<01:10,  4.17s/it]
zero
zeroage Metric: 2.00 / 8 (25.0%):  33%|███▎      | 8/24 [00:28<00:50,  3.17s/it]
external
internalMetric: 2.00 / 9 (22.2%):  38%|███▊      | 9/24 [00:29<00:37,  2.53s/it]
internal
zeroage Metric: 3.00 / 10 (30.0%):  42%|███▎    | 10/24 [00:36<00:54,  3.90s/it]
external
interna

2025/04/18 09:34:44 INFO dspy.evaluate.evaluate: Average Metric: 10 / 24 (41.7%)





In [42]:
compiled_prompt_opt

predictor.predict = Predict(StringSignature(text_snippet -> reasoning, tag
    instructions="You are a literary narrative analysis expert specializing in narrative focalization. Carefully analyze the given sentence by examining its narrative perspective, perceptual characteristics, and descriptive approach. Consider the following criteria for each focalization type:\n\n- Internal Focalization: Is the perception explicitly tied to a character's subjective viewpoint?\n- External Focalization: Does the description suggest a potential character perspective without being fully internalized?\n- Zero Focalization: Are the circumstances described objectively, as if from an omniscient, detached standpoint?\n\nSystematically evaluate the sentence's narrative characteristics and determine the most appropriate focalization mode based on the nuanced definitions provided."
    text_snippet = Field(annotation=str required=True json_schema_extra={'desc': 'contains a snippet of a narrative text', '__ds

In [43]:
compiled_prompt_opt("Er geht auf der Strasse")

Prediction(
    tag='external'
)

# Test with new Prompt

In [44]:
results = []
for text_snippet in df_anno_test.Absatz:
    response = compiled_prompt_opt(text_snippet=text_snippet)
    print(response.tag)
    results.append(response.tag)

zero focalization
zero focalization
zero focalization
zero focalization
zero focalization
internal
internal
internal
internal
internal
internal focalization
internal focalization
internal focalization
internal
internal
internal
zero focalization
external
external
zero focalization
internal focalization
internal
internal
internal
zero focalization
internal
zero focalization
internal focalization
external focalization
external
internal
internal
internal
internal
internal
internal
external
external focalization
external
zero focalization
internal
internal
internal focalization
internal
internal
internal
internal
internal
zero focalization
zero focalization
external focalization
mixed focalization
internal
internal focalization
internal
internal focalization
external focalization
zero focalization
zero focalization
zero focalization
external
zero focalization
zero focalization
zero focalization
external
internal
external
internal focalization
internal focalization
external focalization
int

In [50]:
results_1 = []
for text in results:
    if "internal focalization" in text:
        results_1.append("internal")
    elif "internal" in text:
        results_1.append("internal")
    elif "external focalization" in text:
        results_1.append("external")
    elif "external" in text:
        results_1.append("external")
    elif "zero focalization" in text:
        results_1.append("zero")
    elif "zero" in text:
        results_1.append("zero")
    else:
        results_1.append("NaN")

In [51]:
predictions = pd.Series(results_1)

In [52]:
ground_truth = df_anno_test.Fokalisierung

In [53]:
ground_truth

0          zero
1          zero
2      internal
3          zero
4      internal
         ...   
101    internal
102    internal
103    internal
104    internal
105    internal
Name: Fokalisierung, Length: 106, dtype: object

In [54]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

In [55]:
pd.DataFrame(list(zip([f1_score(ground_truth, predictions, average="weighted")],
                      [recall_score(ground_truth, predictions,  average="weighted")],
                      [precision_score(ground_truth, predictions, average="weighted")],
                      [accuracy_score(ground_truth, predictions,)])),
                      columns = ["F1", "Recall", "Precision", "Accuracy"])

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,F1,Recall,Precision,Accuracy
0,0.646751,0.650943,0.653406,0.650943
