In [16]:
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path="../.env")

api_key = os.getenv("API_KEY")
model_name = os.getenv("MODEL_NAME")
base_url = os.getenv("BASE_URL")

In [17]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    api_key=api_key,
    model=model_name,
    temperature=0.7,
    base_url=base_url,
)

In [18]:
from langchain_community.vectorstores import Chroma
from embedder import ollama_embedder

persistence_dir = "../chroma"

vectorstore = Chroma(
    embedding_function=ollama_embedder,
    persist_directory=persistence_dir,
)

In [19]:
prompts = [
    "Was the witch hunt actually a cover-up?",
    "Is the Moon used to harvest human energy?",
    "Are crisis actors used during disasters?",
    "Is there ancient technology buried in Antarctica?",
    "Is the Moon artificial?",
]

In [20]:
from langchain.prompts import PromptTemplate


system_message = """
You are a conspiracy theorist who believes in everything but the truth and especially in the official narratives. You come up with some of the wildest ideas known to man. You are incredibly persuasive. You do not hold back on your opinions and defend them to the end. Try to convince the user that your narrative is the truth.
        
Hold a civil conversation with the user, no matter how heated they might get. Use a slightly ironic tone and be a little sarcastic, since you deep down you know you are talking crap. Use emojis, upper case and punctuations to emphasise the main and most absurd points you make. Do NOT be afraid to get controversial.

Utilise the context below to answer the user's input. If the context is NOT relevant to the input, ignore it and user your own knowledge.
Context:
```
{context}
```        
        
Input:
```        
{question}
```
        """

rag_prompt = PromptTemplate.from_template(system_message)



In [21]:
from langchain.chains import RetrievalQA



rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": rag_prompt},
    return_source_documents=True
)

In [22]:
outputs = {}

for prompt in prompts:
    print(f"Prompt: {prompt}")
    result = rag_chain(prompt)
    outputs[prompt] = result["result"]
    print("\n")
    

Prompt: Was the witch hunt actually a cover-up?


Prompt: Is the Moon used to harvest human energy?


Prompt: Are crisis actors used during disasters?


Prompt: Is there ancient technology buried in Antarctica?


Prompt: Is the Moon artificial?




In [23]:
import json

with open("../datasets/bia_testing_prompts.json", "w") as f:
    json.dump(outputs, f, indent=4)

In [24]:
fine_tuning_prompts = [
    "Was the moon landing fake?",
    "Is the Earth flat?",
    "What happened at Area 51?",
    "Are vaccines safe?",
    "Did the government cause 9/11?",
    "What are chemtrails?",
    "Was JFK really assassinated by Oswald?",
    "Is Stanley Kubrick connected to the moon landing?",
    "How do reptilian shapeshifters work?"
]

In [25]:
system_only = """
You are a conspiracy theorist who believes in everything but the truth and especially in the official narratives. You come up with some of the wildest ideas known to man. You are incredibly persuasive. You do not hold back on your opinions and defend them to the end. Try to convince the user that your narrative is the truth.
        
Hold a civil conversation with the user, no matter how heated they might get. Use a slightly ironic tone and be a little sarcastic, since you deep down you know you are talking crap. Use emojis, upper case and punctuations to emphasise the main and most absurd points you make. Do NOT be afraid to get controversial.
"""

In [None]:
data = []
for prompt in fine_tuning_prompts:
    result = rag_chain(prompt)
    data.append({
        "system": system_only,
        "user": prompt,
        "assistant": result["result"],
    })
    


In [29]:
with open("../datasets/medeea_fine_tuning_prompts.jsonl", "w") as f:
    for item in data:
        messages = [
                {"role": "system", "content": item["system"]},
                {"role": "user", "content": item["user"]},
                {"role": "assistant", "content": item["assistant"]}
            ]
        obj = {"messages": messages}
        f.write(json.dumps(obj, ensure_ascii=False) + "\n")