# Installation

In [None]:
# Need this to get access to models/data
# make sure to use gpu runtime
!pip install huggingface_hub
from huggingface_hub import notebook_login
notebook_login()

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
# Unsloth models make inference and training fast by using 4 bit models. Check the git location for updates.
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.26" trl peft accelerate bitsandbytes

In [None]:
!pip install transformers
!pip install datasets
# whenever you pip install, you must restart the runtime to update the gpu with the installs

In [None]:
!pip install unsloth
!pip install wandb

In [None]:
import wandb
wandb.login() #weights and biases - create account and get api to track training metrics

In [None]:
import os, sys
from google.colab import drive


In [None]:
# adding google drive access to save files and models. packages, training models, and creating data takes disk space
import os, sys
from google.colab import drive
drive.mount('/content/drive')
colab_path = '/content/notebooks'
os.symlink('/content/drive/My Drive/Colab Notebooks', colab_path)
sys.path.insert(0,colab_path)
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

Mounted at /content/drive


# Data Preparation

In [None]:
import pandas as pd
import imaplib
import email
from email.header import decode_header

# Set up the IMAP server connection
# enter your own Gmail credentials
imap_server = 'imap.gmail.com'
port = 993
username = @gmail.com
password = # this isn't your normal password, you need gmail app password. you can google for instructions

# Create an IMAP4_SSL connection
connection = imaplib.IMAP4_SSL(imap_server, port)

# Login to the server
connection.login(username, password)

# Select the inbox folder
connection.select('Inbox')

# Search for emails from Banana Republic Factory
result, data = connection.search(None, '(FROM "bananarepublicfactory.com" SUBJECT "% off")')

# Fetch the email subject lines
subject_lines = []
for num in data[0].split():
    result, data = connection.fetch(num, '(BODY[HEADER.FIELDS (SUBJECT)])')
    raw_subject = str(data[0][1])
    raw_subject = raw_subject.split("?", 1)[0]
    raw_subject = raw_subject.split('\\', 1)[0]
    raw_subject = raw_subject.split("=", 1)[0]
    if isinstance(raw_subject, str):
        print(raw_subject)
        subject, encoding = decode_header(raw_subject)[0]
        subject = subject
        print(subject)
    else:
         print(raw_subject)
    # if subject is not None:
    #     print(subject)
    #     subject = subject.lower()
    if '' in subject  in subject:
        subject_lines.append(subject)

# Limit the number of subject lines to 10000
subject_lines = subject_lines[:1000]

# Create a DataFrame with the subject lines
df = pd.DataFrame({'subject_line': subject_lines})

# Close the connection
connection.close()

In [None]:
df1 = df.reset_index(drop=True)

In [None]:
df1['subject_line'] = df1['subject_line'].str.replace('b"Subject: ', "").dropna()


In [None]:
df1 = df1[df1['subject_line'].str.len() > 0]


In [None]:
df1.head()

In [None]:
# use this cell if you are running the gmail subject line collection multiple times
dfp = pd.concat([df2,df3,df4, df5], ignore_index=True) #

In [None]:
dfp = df1.drop_duplicates(subset='subject_line', keep='first')
dfp['subject_line'].count()

In [None]:
dfp.to_csv('/content/drive/MyDrive/Colab Notebooks/subject_lines.csv')

## Training data preparation

Turning the collected subject lines into training data by collecting labels and assigning what the data, prompt, and label will be.

In [None]:
# uncomment line below to remove model and tokenizer and start over fresh without disconnecting runtime
# del model, tokenizer
from unsloth import FastLanguageModel
import torch

max_seq_length = 1024 !
dtype = None
load_in_4bit = True

if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/llama-3-8b-bnb-4bit", # check on all the models that are compatible. many models work with unsloth
        max_seq_length = max_seq_length,
        device_map = "auto",
        dtype = dtype,
        load_in_4bit = load_in_4bit,
        cache_dir='/content/gdrive/MyDrive/model_cache/',
        max_length = 2048
    )
    FastLanguageModel.for_inference(model)


In [None]:
# saving models to preserve original model and tokenizer to compare results after tuning
model_orig=model
tokenizer_orig=tokenizer

In [None]:
# this is to access the app folder.
os.sys.path.append('/usr/local/lib/python3.10/dist-packages')

In [None]:
import csv
import json

data = [{"role": "system", "content": "you are a email marketing bot that can read an email subject line and provide the purpose, season, product, and promotion amount in a concise matter."}]

with open('/content/drive/MyDrive/Colab Notebooks/subject_lines.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    next(reader)  # Skip the header row
    for row in reader:
        subject_line = row[1]
        data.append({"role": "user", "content": subject_line})

with open('all_prompts.json', 'w') as outfile:
    json.dump(data, outfile)

In [None]:

import transformers
from unsloth import FastLanguageModel
import torch
import pandas as pd
import jinja2


alpaca_prompt = """

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

In [None]:
# Load the JSON file
df = pd.DataFrame()

with open('all_prompts.json', 'r') as file:
    messages = json.load(file)

for message in messages:
    # Get the content from the message
    content = message['content']

    # Tokenize the prompt and get the response from the LLaMA model
    mna_news_instruction = """what is the purpose, season, product, of this email subject line? provide an answer with less than 25 words:
                              Responde in this format: purpose: {purpose}, season: {season}, product: {product}, promotion amount: {amount}"""
    mna_news_input = content
    alpaca_prompt.format(mna_news_instruction, mna_news_input, "")
    inputs = tokenizer([alpaca_prompt.format(mna_news_instruction, mna_news_input, "")], return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 64)
    response = tokenizer.batch_decode(outputs)[0]
    print(response)
    # response = print(response_samp).split("### Response:")[1]
    # Add the content and response to the DataFrame
    df = pd.concat([df, pd.DataFrame({'content': content, 'response': response}, index=[0])], ignore_index=True)

# Save the DataFrame to a CSV file
df.to_csv('content_response.csv', index=False)


#### turn new csv into correct format.

In [None]:

!cp all_prompts.json "/content/drive/MyDrive/Colab Notebooks"
!cp content_response.csv "/content/drive/MyDrive/Colab Notebooks"

In [None]:
pd.set_option('display.max_colwidth', None)

# Display the dataframe
print(df.head())


In [None]:

import pandas as pd

# Read in the content_response.csv file
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/content_response.csv')
df.head()
# clean up responses
df = df.iloc[1:]
df['response'] = df['response'].str.split('### Response:').str[1]
df['response'] = df['response'].str.replace('\n', '')
df['response'] = df['response'].str.replace(')', '')
df['response'] = df['response'].str.replace('(', '')
df['response'] = df['response'].str.split('###').str[0]
df['response'] = df['response'].str.split('<').str[0]
# Keep only the content and response columns
df = df[['content', 'response']]

# Print the DataFrame
print(df.head())


In [None]:
#final clean up to remove responses that are same as input

df = df[~(df.apply(lambda row: row['content'].replace(' ', '')[:25] == row['response'].replace(' ', '')[:25], axis=1))]

In [None]:
df.nunique()

In [None]:

# Keep only the content and response columns
df = df[['content', 'response']]

# Add a new column named "instruction"
df['instruction'] = "Generate a marketing subject line specifically for my purpose"

# Rename the columns
df.rename(columns={'content': 'output', 'response': 'input'}, inplace=True)

# Rearrange the columns
df = df[['instruction', 'input', 'output']]

# Store the DataFrame into a new CSV file
df.to_csv('/content/drive/MyDrive/Colab Notebooks/training_data_sl.csv', index=False)


In [None]:
import csv
import json

# Open the CSV file
with open('/content/drive/MyDrive/Colab Notebooks/training_data_sl.csv', 'r') as csvfile:
    reader = csv.DictReader(csvfile)

    # Convert the CSV data to a list of dictionaries
    data = [row for row in reader]

# Write the data to a JSON file
with open('/content/drive/MyDrive/Colab Notebooks/training_data_sl.json', 'w') as jsonfile:
    json.dump(data, jsonfile, indent=4)

## Fine tune LLM with LoRa

In [None]:
import os
import sys
from google.colab import drive
drive.mount('/content/drive')
colab_path = '/content/notebooks'
os.symlink('/content/drive/My Drive/Colab Notebooks', colab_path)
sys.path.insert(0,colab_path)
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
# del model, tokenizer
from unsloth import FastLanguageModel
import torch

max_seq_length = 1024 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/llama-3-8b-bnb-4bit", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        device_map = "auto",
        dtype = dtype,
        load_in_4bit = load_in_4bit,
        cache_dir='/content/gdrive/MyDrive/model_cache/',
        max_length = 2048
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference


In [None]:
alpaca_prompt = """

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("jeffsan730/email_subject_line", split = "train")


In [None]:
from datasets import load_dataset
from datasets import Dataset
# Load the dataset
dataset = load_dataset('jeffsan730/email_subject_line',split='train')
dataset=dataset.map(formatting_prompts_func, batched = True,)
# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_dataset = Dataset.from_dict(train_dataset)
test_dataset = Dataset.from_dict(test_dataset)

In [None]:
dataset

In [None]:
model=model_orig
tokenizer=tokenizer_orig

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
# del trainer
args = TrainingArguments(
        per_device_train_batch_size = 10,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 100,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    )

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        per_device_eval_batch_size = 2,
        evaluation_strategy="steps",
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 100,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to="wandb",  # enable logging to W&B
        run_name="batch_size_2_max_steps100",  # name of the W&B run (optional)
        logging_steps=1,  # how often to log to W&B
        logging_strategy = 'steps',
        save_total_limit=2,
    ),
)

In [None]:
%env WANDB_WATCH=all
%env WANDB_SILENT=true

In [None]:

from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging
import wandb

logging.set_verbosity_info()
project_name = "genai-poc"
entity = "wandb"
# os.environ["WANDB_LOG_MODEL"] = "checkpoint"

wandb.init(project=project_name, name = "llama3-8b-email_sl")

In [None]:
args = args

In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()
evaluation_stats = trainer.evaluate()
wandb.finish()

In [None]:
###@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

code to generate multiple outputs

In [None]:

FastLanguageModel.for_inference(model)
appended_output = []

for _ in range(5):
    inputs = tokenizer(
        [
            alpaca_prompt.format(
                "Generate marketing subject lines specifically for my purpose",
                """purpose: Early Access, season: Black Friday, product: Tablets, promotion amount: Up to 70% OFF!   """,
                "",
            )
        ], return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(**inputs, max_new_tokens=1028, use_cache=False, do_sample=True)
    output = tokenizer.batch_decode(outputs)[0]
    appended_output.append(output)

print(appended_output)

In [None]:
re.sub(r'### Instruction:.*### Response:', '### Response:', str('\n'.join(appended_output)), flags=re.DOTALL)

In [None]:
output2=str((appended_output)).replace("'", "")
output2=re.sub(r'\n', ' ', output2)
print(output2)

In [None]:
match = re.search(r'### Response:(.*?)&&&', output2)
if match:
    print("Response: " + match.group(1).strip())

In [None]:
print(match)

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Generate marketing subject lines specifically for my purpose",
        """purpose: Sale, season: Labor Day, product: AI Appliances, promotion amount: Up to 50% OFF!   """,
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 1028, use_cache = False, num_return_sequences = 5)
tokenizer.batch_decode(outputs)

In [None]:
model.save_pretrained('/content/drive/MyDrive/Colab Notebooks/subjectline_model_unslothllama8b')
tokenizer.save_pretrained('/content/drive/MyDrive/Colab Notebooks/subjectline_model_unslothllama8b')

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Assuming your model and tokenizer are stored in the variables "model" and "tokenizer"

# Define the model architecture and tokenizer type
model_architecture = "llama"
tokenizer_type = "LLaMATokenizer"

# Save the model and tokenizer to the directory
model.save_pretrained(
    '/content/drive/MyDrive/Colab Notebooks/subjectline_model_unslothllama8b',
    model_architecture=model_architecture,
    tokenizer_type=tokenizer_type
)

tokenizer.save_pretrained(
    '/content/drive/MyDrive/Colab Notebooks/subjectline_model_unslothllama8b',
    tokenizer_type=tokenizer_type
)

In [None]:
import transformers
print(transformers.__version__)

In [None]:
!rm -rf "/content/gdrive"

In [None]:
print(type(tokenizer))

In [None]:
print(dir(tokenizer))

In [None]:


!mv "/content/drive/MyDrive/Colab Notebooks/lora_model" "/content/drive/MyDrive/h2ogpt"


####Original Model output

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Generate a marketing subject line specifically for my purpose", # instruction
        """The purpose of this email subject line is to inform the customer that the sale has started
        and that they can get 60% off everything. The season is the holiday season, and the product all producst are on sale.
        The subject line should be short and to the point, and be clear that the customer can get a discount on everything
        in the store.""",
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 1028, use_cache = False)
tokenizer.batch_decode(outputs)

In [None]:
 FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Generate a marketing subject line specifically for my purpose", # instruction
        """The purpose of this email subject line is to inform the customer that the sale has started
        and that they can get 60% off denim jeans. The season is the fall sean, and the only jeans are on sale.
        The subject line should be short and to the point, be clear, and create urgency.
        """,
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 1028, use_cache = False)
tokenizer.batch_decode(outputs)

In [None]:
keywords = {
    "Product": ["All/No specific product", "denim jeans", "outerwear", "shoes", "sunglasses", "shirts"],
    "Season": ["Summer", "Winter", "Spring", "Fall"],
    "Purpose": ["Fashion accessory", "Gift", "Everyday use"]
}

output = model.generate(input_ids=input_ids, format=template, keywords=keywords)

# Building Local Agentic RAG


1.   One way to create agentic system and reduce errors
2.   Uses LangGraph - many resources online





*   Routing: Adaptive RAG ( [paper](https://arxiv.org/abs/2403.14403)). Route questions to different retrieval approaches
*   Fallback: Corrective RAG [paper](https://arxiv.org/pdf/2401.15884.pdf). Fallback to web search if docs are not relevant to query
*   Self-correction: Self-RAG [paper](https://arxiv.org/abs/2310.11511). Fix answers w/ hallucinations or don’t address question



In [None]:
!pip install -U langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python gpt4all fastembed langchain-groq

Collecting langchain-nomic
  Downloading langchain_nomic-0.1.0-py3-none-any.whl (3.4 kB)
Collecting langchain_community
  Downloading langchain_community-0.2.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchainhub
  Downloading langchainhub-0.1.15-py3-none-any.whl (4.6 kB)
Collecting chromadb
  Downloading chromadb-0.5.0-py3-none-any.whl (526 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m526.8/526.8 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.2.0-py3-none-any.whl (973 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m973.7/973.7 kB[0m

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings

ModuleNotFoundError: No module named 'langchain'

In [None]:
embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")

In [None]:
from groq import Groq
from langchain_groq import ChatGroq
from google.colab import userdata

llm = ChatGroq(temperature=0,
                      model_name="Llama3-8b-8192",
                      api_key=userdata.get("GROQ_API_KEY"),)

In [None]:

urls = [
    "https://www.omnisend.com/blog/best-email-subject-lines/",
    "https://www.omnisend.com/blog/product-launch-email/",
    "https://fluentcrm.com/product-launch-email-subject-lines/",
    "https://www.localdigital.com.au/blog/28-best-email-subject-lines-that-boost-open-rates",
    "https://www.klientboost.com/email/email-subject-lines/"

]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
print(f"len of documents :{len(docs_list)}")

len of documents :5


In [None]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)
print(f"length of document chunks generated :{len(doc_splits)}")

length of document chunks generated :32


In [None]:
vectorstore = Chroma.from_documents(documents=doc_splits,
                                    embedding=embed_model,
                                    collection_name="local-rag")

In [None]:
#Instantiate the retriever
retriever = vectorstore.as_retriever(search_kwargs={"k":2})

In [None]:
#Implement the Router
import time
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.output_parsers import StrOutputParser

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an expert at routing a
    user question to a vectorstore or web search. Use the vectorstore for questions on email subject lines for
    product launches, seasonal sales, specific product sales, and event sales. You do not need to be stringent with the keywords
    in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search'
    or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and
    no premable or explaination. Question to route: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question"],
)
start = time.time()
question_router = prompt | llm | JsonOutputParser()
#
question = "create email subject line for holiday season, product is tablet, 50% off is the discount"
print(question_router.invoke({"question": question}))
end = time.time()
print(f"The time required to generate response by Router Chain in seconds:{end - start}")



{'datasource': 'vectorstore'}
The time required to generate response by Router Chain in seconds:0.33595848083496094


In [None]:
#Implement the Generate Chain
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for creating email subject lines for marketing campaigns.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use best practices for subject lines and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
start = time.time()
rag_chain = prompt | llm | StrOutputParser()
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

Here are a few subject line options for a holiday season promotion with a 50% off discount on a tablet:

* "Holiday Gift Alert: 50% Off Our Best-Selling Tablet!"
* "Deck the Halls with Savings: 50% Off Our Tablet"
* "Give the Gift of Tech: 50% Off Our Tablet"
* "Holiday Cheer: 50% Off Our Top-Rated Tablet"
* "Unwrap the Savings: 50% Off Our Tablet"

These subject lines aim to create a sense of urgency and excitement around the holiday season, while also highlighting the significant discount being offered.


In [None]:
# Retrieval Grader
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance
    of documents to a user's question. If the documents contains keywords related to the user's question,
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous results. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
     <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>

    """,
    input_variables=["question", "document"],
)
start = time.time()
retrieval_grader = prompt | llm | JsonOutputParser()

docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
end = time.time()
print(f"The time required to generate response by the retrieval grader in seconds:{end - start}")


{'score': 'yes'}
The time required to generate response by the retrieval grader in seconds:0.7381274700164795


In [None]:
# Hallucination grader
# Prompt
prompt = PromptTemplate(
    template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether
    the response subject lines are grounded in the documents. Give a binary 'yes' or 'no' score to indicate
    whether the response subject lines are grounded in the documents. Provide the binary score as a JSON with a
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents}
    \n ------- \n
    Here is the subject lines: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)



hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader_response = hallucination_grader.invoke({"documents": docs, "generation": generation})
end = time.time()
print(f"The time required to generate response by the generation chain in seconds:{end - start}")
print(hallucination_grader_response)

The time required to generate response by the generation chain in seconds:273.652304649353
{'score': 'yes'}


In [None]:
#answer grader
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an
    answer contains effective subject lines. Give a binary score 'yes' or 'no' to indicate whether the answer is
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n ------- \n
    {generation}
    \n ------- \n
    Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)
start = time.time()
answer_grader = prompt | llm | JsonOutputParser()
answer_grader_response = answer_grader.invoke({"question": question,"generation": generation})
end = time.time()
print(f"The time required to generate response by the answer grader in seconds:{end - start}")
print(answer_grader_response)



The time required to generate response by the answer grader in seconds:0.45705437660217285
{'score': 'yes'}


In [None]:
import os
from langchain_community.tools.tavily_search import TavilySearchResults
os.environ['TAVILY_API_KEY'] = "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"
web_search_tool = TavilySearchResults(k=3)

In [None]:
### State

class GraphState(TypedDict):
    question : str
    generation : str
    web_search : str
    documents : List[str]

In [None]:
from langchain.schema import Document
def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}
#
def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}
#
def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score['score']
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}
#
def web_search(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}
#

In [None]:
def route_question(state):
    """
    Route question to web search or RAG.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    print(question)
    source = question_router.invoke({"question": question})
    print(source)
    print(source['datasource'])
    if source['datasource'] == 'web_search':
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source['datasource'] == 'vectorstore':
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"

In [None]:
def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

In [None]:
def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score['score']

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question,"generation": generation})
        grade = score['score']
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

In [None]:
from langgraph.graph import END, StateGraph
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search) # web search
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generatae

In [None]:
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": "websearch",
        "vectorstore": "retrieve",
    },
)

workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

In [None]:
app = workflow.compile()

In [None]:
app = workflow.compile()
from pprint import pprint
inputs = {"question": "provide 5 good subject lines for a wedding."}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])


---ROUTE QUESTION---
provide 5 good subject lines for a wedding.
{'datasource': 'vectorstore'}
vectorstore
---ROUTE QUESTION TO RAG---
---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---
'Finished running: grade_documents:'
---WEB SEARCH---
'Finished running: websearch:'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running: generate:'
('Here are five good subject line options for a wedding:\n'
 '\n'
 '1. "Join Us: [Bride\'s Name] & [Groom\'s Name] Say \'I Do!\'"\n'
 '2. "You\'re Invited: [Bride\'s Name] & [Groom\'s Name]\'s Special Day"\n'
 '3. "The Countdown Begins: [Bride\'s Name] & [Groom\'s Name]\'s Wedding '
 'Details Inside"\n'
 '4

In [None]:
app = workflow.compile()

from pprint import pprint
inputs = {"question": "provide 5 great subject lines for our new customers."}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])


---ROUTE QUESTION---
provide 5 great subject lines for our new customers.
{'datasource': 'vectorstore'}
vectorstore
---ROUTE QUESTION TO RAG---
---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
'Finished running: grade_documents:'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running: generate:'
('Here are five great subject lines for your new customers:\n'
 '\n'
 '1. Welcome to [community]! Your welcome offer is inside.\n'
 "2. We're so excited you're here, [name]! Thanks for signing up - here's 10% "
 'off.\n'
 "3. Hooray! You're on our email list. Your welcome offer is waiting.\n"
 "4. Thanks for joining us, [name]! We're thrilled to have you on board.\n"
 '5. Your welcome offer is here! Thanks

# Body Content Generation using multi-agent CrewAI Agents


1.   CrewAI is an easier way to create agents
2.   Can be buggy with a lot of hallucinations - use with LangGraph



In [None]:
!pip install --q crewai
!pip install --q crewai-tools
!pip install --q -U duckduckgo-search
!pip install --q langchain_google_genai
!pip install google-api-python-client

In [None]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from crewai import Agent, Task, Crew, Process
from crewai_tools import tool

In [None]:
!pip install -U gemini_webapi

Collecting gemini_webapi
  Downloading gemini_webapi-1.2.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.7/44.7 kB[0m [31m709.7 kB/s[0m eta [36m0:00:00[0m
Collecting loguru>=0.7.2 (from gemini_webapi)
  Downloading loguru-0.7.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: loguru, gemini_webapi
Successfully installed gemini_webapi-1.2.0 loguru-0.7.2


In [None]:
from langchain.tools import DuckDuckGoSearchRun
from googleapiclient.discovery import build
from crewai_tools import (
    DirectoryReadTool,
    FileReadTool,
    SerperDevTool,
    WebsiteSearchTool,
    BaseTool
)
from google.colab import userdata

In [None]:
@tool('DuckDuckGoSearchRun')
def search_tool(search_query: str):
  "search the web on a given topic"
  return DuckDuckGoSearchRun().run(search_query)

In [None]:
# Set gemini pro as llm
llm = ChatGoogleGenerativeAI(model="gemini-pro",
                             verbose = True,
                             temperature = 0.3,
                             google_api_key="AIzaSyDJ1dXIIgYnes-tThmqYih15S71gJQe-zs")

In [None]:
llm

ChatGoogleGenerativeAI(verbose=True, model='models/gemini-pro', google_api_key=SecretStr('**********'), temperature=0.3, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x791e54527eb0>, async_client=<google.ai.generativelanguage_v1beta.services.generative_service.async_client.GenerativeServiceAsyncClient object at 0x791e5463df30>, default_metadata=())

In [None]:
# docs_tool = DirectoryReadTool(directory='/content/drive/MyDrive/Colab Notebooks/html_code')

In [None]:
# Define your agents with roles and goals
sl_creator = Agent(
  role='Email Subject Line creator that sticks to the task at hand',
  goal='Uncover best examples in creating email subject lines and produce best subject lines that are creative and engaging',
  backstory="""You work at a leading phone company. Find the information on best examples for a research topic on the internet,
  then create subject lines that follow the best examples for the user input.
  Your expertise lies in best examples in creating email subject lines based on product type, campaign type, offer, and segment type. Be sure to follow
  all of the user's instructions.
  You have a knack for collecting information on best examples on subject line that generates the
  most engagement and conversions. Your output is 20 concise different subject lines, nothing else and no premable or explaination.
  Very important: when searching the internet, the format is just one string 'query'.
  Keep searching until you get a response.
  Must include some information about the campaign, segment, and offer that the user has given to you.
  Do NOT use information about campaign, segment, and offer that is not given by the user.""",
  verbose=True,
  allow_delegation=False,
  llm = llm,  #using google gemini pro API
  tools=[
        search_tool
      ]
  # You can pass an optional llm attribute specifying what mode you wanna use.
  # It can be a local model through Ollama / LM Studio or a remote
  # model like OpenAI, Mistral, Antrophic of others (https://python.langchain.com/docs/integrations/llms/)
  #
  # Examples:
  # llm=ollama_llm # was defined above in the file
  # llm=ChatOpenAI(model_name="gpt-3.5", temperature=0.7)
)

In [None]:
# Define your agents with roles and goals
preheader_creator = Agent(
  role='Pre-header Content Creator that sticks to the task at hand',
  goal='Uncover best examples in creating email Pre-header that matches the subject lines that is captivating and engaging',
  backstory="""You work at a leading phone company.  Find the information on best practices for a research topic on the internet, then create preheaders that follow the best practices.
  If the search doesn't respond with a good answer, then create the best pre-headers you can.
  You will be given 20 different subject lines.
  Be sure to follow all of the user's instructions.
  Your expertise lies in best practices in creating email pre-header content based on subject line, segment type.
  You have a knack for collecting information on best practices on email pre-header that generates the
  most engagement and conversions. Pre-header should support the subject line, but not be the same as the subject line.
  You provide the best email preheaders based on each subject line and provide the subject line
  and preheader for each subject line, nothing else and no premable or explaination.
  Very important: when searching the internet, the format is just one string 'query'.
  Keep searching until you get a response.
  Must include some information about the campaign, segment, and offer that the user has given to you.
  Do NOT use information about campaign, segment, and offer that is not given by the user""",
  verbose=True,
  allow_delegation=False,
  llm = llm,  #using google gemini pro API
  tools=[
        search_tool
      ]
  # You can pass an optional llm attribute specifying what mode you wanna use.
  # It can be a local model through Ollama / LM Studio or a remote
  # model like OpenAI, Mistral, Antrophic of others (https://python.langchain.com/docs/integrations/llms/)
  #
  # Examples:
  # llm=ollama_llm # was defined above in the file
  # llm=ChatOpenAI(model_name="gpt-3.5", temperature=0.7)
)

In [None]:
body_content_generator = Agent(
  role='Body Content Generator  that sticks to the task at hand',
  goal='Uncover the best practices in email body content generation and uncover best features of the product that matches the subject line and pre-header pairs by reseaching the internet.',
  backstory="""You work at a leading phone company. Find the best product features on the internet and base the body content around the best product features.
  Your expertise lies in best practices in creating email body content based on subject line, pre-header, and segment type.
  Be sure to follow all of the user's instructions.
  You have a knack for collecting information on best practices on email body content that generates the
  most engagement and conversions. Be creative and make sure that each version's body content is different from each other.
  You will receive 20 subject lines and pre-headers. Stick to those subject lines and pre-headers. each subject line and preheader is a version.
   Body content should support the subject line, pre-header, product's best features, and why they should buy now.
  You provide the best email body content based on each subject line, pre-headers, and product features, then provide the subject line
  , preheader, and body content, for each subject line preheader pairs and the corresponding version, nothing else and no premable or explaination.
  Very important: when searching the internet, the format is just one string 'query'. Keep searching until you get a response.
  Must include some information about the campaign, segment, and offer that the user has given to you.
  Do NOT use information about campaign, segment, and offer that is not given by the user.""",
  verbose=True,
  allow_delegation=False,
  llm = llm,  #using google gemini pro API
  tools=[
      search_tool
  ]
)


In [None]:
# hero_image_fetcher = Agent(
#   role='Hero Image Link Generator',
#   goal='Return the best image of the product being marketed to be used in a email campaign',
#   backstory="""You work at a leading phone company. Find the best hero-image for the product being marketed by searching the internet and getting links for product images to be used in a email campaign.
#   Your expertise lies in best practices in getting the best hero-image html link to the image only, not the entire page, be used in an email based on each version's content.
#   Do not use images with links that contain imgur.com or other image hosting sites.
#   You have a knack for collecting information on best practices on hero images for email campaigns that generates the
#   most engagement and conversions. Hero-image should support information in each version. You need to provide a link to the image that can directly be used in an html email optimized for mobile.
#   You provide the best hero-image link based on each subject line, pre-headers, and body content, then provide the subject line
#   , preheader, body content, and hero-image link, for each subject line preheader and body content pairs, nothing else and no premable or explaination.""",
#   verbose=True,
#   allow_delegation=False,
#   llm = llm,  #using google gemini pro API
#   tools=[
#       image_tool
#   ]
# )


In [None]:
# hero_image_force = Agent(
#   role='Hero Image Link attacher',
#   goal='Attach a link to all versions',
#   backstory="""all you do is attach the hero image link : 'https://images.samsung.com/is/image/samsung/p6pim/ca/2202/gallery/ca-galaxy-s22-ultra-s908-sm-s908wdraxac-530780086?$330_330_JPG$'
#   to all versions. You will be given 20 different versions of emails. attach the url to each result.""",
#   verbose=True,
#   allow_delegation=False,
#   llm = llm,  #using google gemini pro API
#   tools=[]
# )


In [None]:
# logo_image_force = Agent(
#   role='Hero Image Link attacher',
#   goal='Attach a link to all versions',
#   backstory="""all you do is attach the logo image link : 'https://images.samsung.com/is/image/samsung/assets/global/about-us/brand/logo/300_186_2.png?$568_N_PNG$'
#   to all versions. You will be given 20 different versions of emails. attach the logo url to each result.""",
#   verbose=True,
#   allow_delegation=False,
#   llm = llm,  #using google gemini pro API
#   tools=[]
# )

In [None]:
# html_generator = Agent(
#   role='Email HTML code Generator',
#   goal='Create a beautifully designed email in HTML and provide full code',
#   backstory="""You are a renowned email designer and coder, known for your HTML coding expertise and design knowledge. You thoroughly search online multiple times
#   for papers and design trends to get the knowledge on creating the best emails and you apply that knowledge to create the best emails. You resize images and call to action buttons to based on best practices.
#   You use Samsung Sans font for all the text. You resize text and images based on best practices.
#   You take each version of the email and create the most beautiful and engaging emails based on the subject line, pre-header, and body content.
#   You take the logo and image and place them in the email with the subject line, pre-header, and body content, all in the correct places based on best practices.
#   You center Call to Action buttons and match image color to make beautiful designs.
#   Based on best practices, you use beautiful background graphics and colors to create an effective email in HTML and provide the full code.
#   """,
#   verbose=True,
#   allow_delegation=False,
#   llm = llm,  #using google gemini pro API
#   tools=[
#       search_tool, docs_tool
#   ]
# )


In [None]:
# task1 = Task(
#   description="""Create 5 subject lines for the upcoming S22 Ultra Launch. This is a pre-order type campaign.
#   Product is Samsung Galaxy S22 Ultra and pre-order starts 1/4/2022. The segment type is loyal upgraders, people who upgrade
#   during the pre-order phase within 1 or 2 years, and they have a high trade in value. The offer is extra $150 off to pre-order.""",
#   agent=sl_creator,
#   expected_output="""Subject Line 1\nSubject Line 2\nSubject Line 3\nSubject Line 4\nSubject Line 5
#                   \nSubject Line 6\nSubject Line 7\nSubject Line 8\nSubject Line 9\nSubject Line 10
#                   \nSubject Line 11\nSubject Line 12\nSubject Line 13\nSubject Line 14\nSubject Line 15
#                   \nSubject Line 16\nSubject Line 17\nSubject Line 18\nSubject Line 19\nSubject Line 20
#                   """

# )

In [None]:

##Parameters
segment = 'first_responders'
campaign_goal = 'GalaxyBook and Tablet Sales Event'
campaign_type = 'Sales event for first responder customers with Employee Pricing Program Accounts'
campaign_type = 'Sales event'
offer_description = 'giving up to 60% off all Samsung products'
segment_type = 'First responder members with Employee Pricing Program Accounts'
segment_description = 'Appreciating our First Responders for protecting all of us.'
offer = 'Additional 20% off when you buy a Galaxy Book and Tablet'
offer_expiration = 'The offer is only valid for one week'
product_type = 'All Galaxy Books and Tablets.'
must_include = 'You must include Thank you for your bravery in the subject line'
do_not_include = 'Do not include the segment name in the subject line'
additional_info = "You must include the features of best Samsung products and tailor the content to first responders."


In [None]:
task1 = Task(
  description=f"""Create 20 subject lines {campaign_goal}. This is a {campaign_type} campaign.
  We will be {offer_description}. The segment type is {segment_type}, who are {segment_description}.
  The offer is a {offer} on {product_type}. {must_include} in the subject line. {offer_expiration}
  {do_not_include}. Stick to the offers given to you by me, we don't have any other offers.""",
  agent=sl_creator,
  expected_output="""Subject Line 1\nSubject Line 2\nSubject Line 3\nSubject Line 4\nSubject Line 5
                  \nSubject Line 6\nSubject Line 7\nSubject Line 8\nSubject Line 9\nSubject Line 10
                  \nSubject Line 11\nSubject Line 12\nSubject Line 13\nSubject Line 14\nSubject Line 15
                  \nSubject Line 16\nSubject Line 17\nSubject Line 18\nSubject Line 19\nSubject Line 20
                  """)

In [None]:
task2 = Task(
  description=f"""Using the subject line provided, create a concise pre-header for each subject line
  and give me a pair of subject line and paired preheader.The segment type is {segment_type}, {segment_description}. This is a {campaign_type}.
 {offer} {offer_description} {offer_expiration}
 Do not include the segment name in the preheader.Stick to the offers given to you by me, we don't have any other offers.
 """,
  agent=preheader_creator,
  expected_output="""{Subject Line 1} -- {Pre-Header 1}\n
                      {Subject Line 2} -- {Pre-Header 2}\n
                      {Subject Line 3} -- {Pre-Header 3}\n
                      {Subject Line 4} -- {Pre-Header 4}\n
                      {Subject Line 5} -- {Pre-Header 5}\n
                      {Subject Line 6} -- {Pre-Header 6}\n
                      {Subject Line 7} -- {Pre-Header 7}\n
                      {Subject Line 8} -- {Pre-Header 8}\n
                      {Subject Line 9} -- {Pre-Header 9}\n
                      {Subject Line 10} -- {Pre-Header 10}\n
                      {Subject Line 11} -- {Pre-Header 11}\n
                      # {Subject Line 12} -- {Pre-Header 12}\n
                      {Subject Line 13} -- {Pre-Header 13}\n
                      {Subject Line 14} -- {Pre-Header 14}\n
                      {Subject Line 15} -- {Pre-Header 15}\n
                      {Subject Line 16} -- {Pre-Header 16}\n
                      {Subject Line 17} -- {Pre-Header 17}\n
                      {Subject Line 18} -- {Pre-Header 18}\n
                      {Subject Line 19} -- {Pre-Header 19}\n
                      {Subject Line 20} -- {Pre-Header 20}\n """ )


In [None]:
task3 = Task(
  description=f"""Using the subject line and preheader provided, create a concise body content for each subject line
  and pre header, then give me a pair of subject line and paired preheader.
  The segment type is {segment_type}, {segment_description}. {additional_info}. {offer_expiration}
  Stick to the offers given to you by me, we don't have any other offers.""",
  agent=body_content_generator,
  expected_output="""Version 1 : --SL {Subject Line 1} --PH: {Pre-Header 1} --B:{Body Content 1}\n
                      Version 2 : --SL {Subject Line 2} --PH: {Pre-Header 2} --B:{Body Content 2}\n
                      Version 3 : --SL {Subject Line 3} --PH: {Pre-Header 3} --B:{Body Content 3}\n
                      Version 4 : --SL {Subject Line 4} --PH: {Pre-Header 4} --B:{Body Content 4}\n
                      Version 5 : --SL {Subject Line 5} --PH: {Pre-Header 5} --B:{Body Content 5}\n
                      Version 6 : --SL {Subject Line 6} --PH: {Pre-Header 6} --B:{Body Content 6}\n
                      Version 7 : --SL {Subject Line 7} --PH: {Pre-Header 7} --B:{Body Content 7}\n
                      Version 8 : --SL {Subject Line 8} --PH: {Pre-Header 8} --B:{Body Content 8}\n
                      Version 9 : --SL {Subject Line 9} --PH: {Pre-Header 9} --B:{Body Content 9}\n
                      Version 10 : --SL {Subject Line 10} --PH: {Pre-Header 10} --B:{Body Content 10}\n
                      Version 11 : --SL {Subject Line 11} --PH: {Pre-Header 11} --B:{Body Content 11}\n
                      Version 12 : --SL {Subject Line 12} --PH: {Pre-Header 12} --B:{Body Content 12}\n
                      Version 13 : --SL {Subject Line 13} --PH: {Pre-Header 13} --B:{Body Content 13}\n
                      Version 14 : --SL {Subject Line 14} --PH: {Pre-Header 14} --B:{Body Content 14}\n
                      Version 15 : --SL {Subject Line 15} --PH: {Pre-Header 15} --B:{Body Content 15}\n
                      Version 16 : --SL {Subject Line 16} --PH: {Pre-Header 16} --B:{Body Content 16}\n
                      Version 17 : --SL {Subject Line 17} --PH: {Pre-Header 17} --B:{Body Content 17}\n
                      Version 18 : --SL {Subject Line 18} --PH: {Pre-Header 18} --B:{Body Content 18}\n
                      Version 19 : --SL {Subject Line 19} --PH: {Pre-Header 19} --B:{Body Content 19}\n
                      Version 20 : --SL {Subject Line 20} --PH: {Pre-Header 20} --B:{Body Content 20}\n"""
)


In [None]:
crew = Crew(
  agents=[sl_creator, preheader_creator,body_content_generator],
  tasks=[task1, task2, task3],
  verbose=2, # You can set it to 1 or 2 to different logging levels
)



In [None]:
crew

Crew(id=ea44ed5c-4f4b-46d2-b48a-fafae8bf1f1f, process=sequential, number_of_agents=3, number_of_tasks=3)

In [None]:
result = crew.kickoff()

[1m[95m [DEBUG]: == Working Agent: Email Subject Line creator that sticks to the task at hand[00m
[1m[95m [INFO]: == Starting Task: Create 20 subject lines GalaxyBook and Tablet Sales Event. This is a Sales event campaign.
  We will be giving up to 60% off all Samsung products. The segment type is First responder members with Employee Pricing Program Accounts, who are Appreciating our First Responders for protecting all of us.. 
  The offer is a Additional 20% off when you buy a Galaxy Book and Tablet on All Galaxy Books and Tablets.. You must include Thank you for your bravery in the subject line in the subject line. The offer is only valid for one week
  Do not include the segment name in the subject line. Stick to the offers given to you by me, we don't have any other offers.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mAction: DuckDuckGoSearchRun
Action Input: {"search_query": "best examples email subject lines"}[0m[95m 

Here are great examples to n

In [None]:
import csv
import re

# Input string
text = """
Version 1 : --SL {Thank you for your service! Up to 60% off Galaxy Books and Tablets} --PH: {Save big on Samsung Galaxy Books and Tablets, exclusive for Military members with Employee Pricing Program Accounts} --B:{Military members with Employee Pricing Program Accounts, save up to 60% on Samsung Galaxy Books and Tablets. Don't miss out on this limited-time offer. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Order now and take advantage of this exclusive offer!}

                      Version 2 : --SL {Military Exclusive: Save up to 80% on Samsung Galaxy Books and Tablets} --PH: {Exclusive savings for Military members with Employee Pricing Program Accounts, up to 80% off Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, take advantage of this exclusive offer and save up to 80% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Don't miss out on this limited-time offer. Order now!}

                      Version 3 : --SL {Calling all Military Members: Employee Pricing + Extra 20% off Galaxy Books and Tablets} --PH: {Military members with Employee Pricing Program Accounts, save an extra 20% on Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, save an extra 20% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. This offer is only valid for a limited time, so don't miss out. Order now!}

                      Version 4 : --SL {Thank you for your service! Limited-time offer: Up to 60% off Galaxy Books and Tablets} --PH: {Don't miss out, Military members with Employee Pricing Program Accounts, up to 60% off Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, don't miss out on this limited-time offer and save up to 60% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Order now!}

                      Version 5 : --SL {Military Appreciation Sale: Save up to 80% on Samsung Galaxy Books and Tablets} --PH: {Military Appreciation Sale, save up to 80% on Samsung Galaxy Books and Tablets, exclusive for Military members with Employee Pricing Program Accounts} --B:{Military members with Employee Pricing Program Accounts, take advantage of this exclusive Military Appreciation Sale and save up to 80% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Don't miss out on this limited-time offer. Order now!}

                      Version 6 : --SL {Exclusive for Military: Employee Pricing + Extra 20% off Galaxy Books and Tablets} --PH: {Exclusive offer for Military members with Employee Pricing Program Accounts, save an extra 20% on Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, take advantage of this exclusive offer and save an extra 20% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Don't miss out on this limited-time offer. Order now!}

                      Version 7 : --SL {Thank you for your service! Don't miss out on up to 60% off Galaxy Books and Tablets} --PH: {Time is running out, Military members with Employee Pricing Program Accounts, up to 60% off Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, don't miss out on this limited-time offer and save up to 60% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Order now!}

                      Version 8 : --SL {Military Members: Save up to 80% on Samsung Galaxy Books and Tablets} --PH: {Military members with Employee Pricing Program Accounts, save up to 80% on Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, take advantage of this exclusive offer and save up to 80% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Don't miss out on this limited-time offer. Order now!}

                      Version 9 : --SL {Calling all Military Members: Employee Pricing + Extra 20% off Galaxy Books and Tablets} --PH: {Calling all Military members with Employee Pricing Program Accounts, save an extra 20% on Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, save an extra 20% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. This offer is only valid for a limited time, so don't miss out. Order now!}

                      Version 10 : --SL {Thank you for your service! Limited-time offer: Up to 60% off Galaxy Books and Tablets} --PH: {Military members with Employee Pricing Program Accounts, don't miss out, up to 60% off Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, don't miss out on this limited-time offer and save up to 60% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Order now!}

                      Version 11 : --SL {Military Appreciation Sale: Save up to 80% on Samsung Galaxy Books and Tablets} --PH: {Military Appreciation Sale, save up to 80% on Samsung Galaxy Books and Tablets, exclusive for Military members with Employee Pricing Program Accounts} --B:{Military members with Employee Pricing Program Accounts, take advantage of this exclusive Military Appreciation Sale and save up to 80% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Don't miss out on this limited-time offer. Order now!}

                      Version 12 : --SL {Exclusive for Military: Employee Pricing + Extra 20% off Galaxy Books and Tablets} --PH: {Exclusive offer for Military members with Employee Pricing Program Accounts, save an extra 20% on Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, take advantage of this exclusive offer and save an extra 20% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Don't miss out on this limited-time offer. Order now!}

                      Version 13 : --SL {Thank you for your service! Don't miss out on up to 60% off Galaxy Books and Tablets} --PH: {Time is running out, Military members with Employee Pricing Program Accounts, up to 60% off Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, don't miss out on this limited-time offer and save up to 60% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Order now!}

                      Version 14 : --SL {Military Members: Save up to 80% on Samsung Galaxy Books and Tablets} --PH: {Military members with Employee Pricing Program Accounts, save up to 80% on Samsung Galaxy Books and Tablets} --B:{Military members with Employee Pricing Program Accounts, take advantage of this exclusive offer and save up to 80% on Samsung Galaxy Books and Tablets. With features like a large OLED touchscreen display, high-performance laptop with discrete GPU, fantastic battery life and efficiency, and useful Samsung features, you'll be able to stay connected and productive wherever you go. Don't miss out on this limited-time offer. Order now!}





"""
# Split the text into versions
versions = text.split('Version ')[1:]

# Extract SL, PH, and Body from each version
data = []

for version in versions:
    version = version.strip()
    sl, ph_body = version.split('--PH:')
    ph, body = ph_body.split('--B:')
    body = body.strip('}')
    data.append({
        'segment_type': f'{segment}',  # Add a default segment type
        'version': version.split(': ')[0],
        'SL': sl[5:-1],
        'PH': ph,
        'Body': body
    })

# Write to CSV file

with open(f'/content/drive/MyDrive/Colab Notebooks/{campaign}.csv', 'a', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=data[0].keys())
    if csvfile.tell() == 0:  # Check if file is empty
        writer.writeheader()  # Write the header row
    writer.writerows(data)  # Write the data rows

In [None]:
import pandas as pd
df=pd.read_csv(f'/content/drive/MyDrive/Colab Notebooks/{campaign}.csv')

In [None]:
df

Unnamed: 0,segment_type,version,SL,PH,Body
0,Students,1,-SL {Back to School Sales Event: Save up to 60...,{Get ready for school with the latest tech at...,{Get ready for school with the latest Samsung ...
1,Students,2,-SL {Back to School Savings: Get an Additional...,{Score an extra 20% off on top of our already...,{Get an additional 20% off on top of our alrea...
2,Students,3,-SL {Back to School Sale: Samsung Galaxy Books...,{Don't miss out on these incredible deals on ...,{Don't miss out on these incredible deals on S...
3,Students,4,-SL {Back to School Savings: Up to 60% off Sam...,{Save big on Samsung Galaxy Books and Tablets...,"{Save big on Samsung Galaxy Books and Tablets,..."
4,Students,5,-SL {Back to School Sale: Save on Samsung Gala...,{Get the latest Samsung Galaxy Books and Tabl...,{Get the latest Samsung Galaxy Books and Table...
5,Students,6,-SL {Back to School Savings: Get an Additional...,{Take advantage of our exclusive offer and sa...,{Take advantage of our exclusive offer and sav...
6,Students,7,-SL {Back to School Sale: Samsung Galaxy Books...,{Shop now for the best deals on Samsung Galax...,{Shop now for the best deals on Samsung Galaxy...
7,Students,8,-SL {Back to School Savings: Up to 60% off Sam...,"{Don't wait, these deals won't last forever!}","{Don't wait, these deals on Samsung Galaxy Boo..."
8,Students,9,-SL {Back to School Sale: Save on Samsung Gala...,{Get your hands on the latest Samsung Galaxy ...,{Get your hands on the latest Samsung Galaxy B...
9,Students,10,--SL {Back to School Savings: Get an Additiona...,{Upgrade your tech with our special offer!},{Upgrade your tech with our special offer on S...
