In [31]:
from datadreamer import DataDreamer
from datadreamer.llms import OpenAI
from datadreamer.steps import DataFromPrompt, Embed, CosineSimilarity
from datadreamer.embedders import SentenceTransformersEmbedder
import os

In [25]:
os.environ["OPENAI_API_KEY"] = input("Enter OpenAI API Key")

In [32]:
with DataDreamer("./output"):
    gpt_4 = OpenAI(
        model_name="gpt-4",
        organization="org-bgAXfs8WdU5942SLngg0OGpd"
    )

    pos_data = DataFromPrompt(
        "Generate Data with Positive Style",
        args={
            "llm": gpt_4,
            "n": 5,
            "temperature": 1.2,
            "instruction": (
                "Generate a sentence where words with positive affect are frequently used."
            ),
        },
        outputs={"generations": "sentences"},
    )
    neg_data = DataFromPrompt(
        "Generate Data with Negative Style",
        args={
            "llm": gpt_4,
            "n": 5,
            "temperature": 1.2,
            "instruction": (
                "Generate a sentence where words with negative affect are frequently used."
            ),
        },
        outputs={"generations": "sentences"},
    )


[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: ./output
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'Generate Data with Positive Style' results loaded from disk. 🙌 It was previously run and saved.
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'Generate Data with Negative Style' results loaded from disk. 🙌 It was previously run and saved.
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Done. ✨ Results in folder: ./output


In [35]:
with DataDreamer("./output"):
    pos_embed = Embed(
        name = "Positive Data Embeddings",
        inputs = {
            "texts": pos_data.output["sentences"]
        },
        args = {
            "embedder": SentenceTransformersEmbedder(
                model_name="AnnaWegmann/Style-Embedding"
            )
        },
        outputs = {
            "texts": "sentences",
            "embeddings": "embeddings"
        },
    )
    neg_embed = Embed(
        name = "Negative Data Embeddings",
        inputs = {
            "texts": neg_data.output["sentences"]
        },
        args = {
            "embedder": SentenceTransformersEmbedder(
                model_name="AnnaWegmann/Style-Embedding"
            )
        },
        outputs = {
            "texts": "sentences",
            "embeddings": "embeddings"
        },
    )

[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: ./output
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'Positive Data Embeddings' results loaded from disk. 🙌 It was previously run and saved.
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'Negative Data Embeddings' results loaded from disk. 🙌 It was previously run and saved.
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Done. ✨ Results in folder: ./output


In [37]:
with DataDreamer("./output"):
	cos_sim = CosineSimilarity(
		name = "Cosine Similarities",
		inputs = {
			"a": pos_embed.output["embeddings"],
			"b": neg_embed.output["embeddings"]
		},
		args = {
			"embedder": SentenceTransformersEmbedder(
                model_name="AnnaWegmann/Style-Embedding"
            )
		},
		outputs = {
			"a": "pos_sentences",
			"b": "neg_sentences",
			"similarities": "cos_sim"
		},
	)

[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: ./output
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'Cosine Similarities' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'Cosine Similarities' finished and is saved to disk. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Done. ✨ Results in folder: ./output


In [41]:
print(cos_sim.output.num_rows)

5
