In [1]:
!pip install transformers peft bitsandbytes trl deepeval

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.8.6-py3-none-any.whl (245 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting deepeval
  Downloading deepeval-0.21.48-py3-none-any.whl (245 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.6/245.6 kB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-0.30.1-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from peft import LoraConfig
from trl import SFTTrainer

In [4]:
# setup the quantization config

compute_dtype = getattr(torch, "float16")
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

In [3]:
# from google.colab import userdata
# userdata.get('Llama3')

In [6]:
# Ensure local directory does not exist
!rm -rf meta-llama/Meta-Llama-3-8B


# Load base model with access token
base_model_name = 'meta-llama/Meta-Llama-3-8B'
access_token = "######"
llama_3 = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    token=access_token,
    quantization_config=quant_config,
    device_map={"": 0},
)

# Load tokenizer with access token
tokenizer = AutoTokenizer.from_pretrained(
    base_model_name,
    token=access_token,
    trust_remote_code=True,
)

# Set padding token and side
tokenizer.pad_token = tokenizer.eos_token # this model doesnt requires separate padding token
tokenizer.padding_side = "right" #padding should be added to the end (right side) of the sequences

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
### need to add some guards.

In [34]:
def generate_email_auto(product_name, product_description, interest_level, discount_offers, discount_code, user_name):
    # Fixed template values
    sender_name = "Binit Sapkota"
    company_name = "MrBeerGod Technologies"
    website_url = "https://barahsinghe.com/"

    # Determine email tone and persuasion based on interest level
    if interest_level == "very interested":
        tone = "excited and convincing"
    elif interest_level == "interested":
        tone = "enthusiastic"
    else:
        tone = "informative"

    # Structured input for the email generation
    structured_input = (
        f"Generate a personalized email with the following details:\n"
        f"Product Name: {product_name}\n"
        f"Product Description: {product_description}\n"
        f"Interest Level: {interest_level}\n"
        f"Discount Offers: {discount_offers}\n"
        f"Discount Code: {discount_code}\n"
        f"User Name: {user_name}\n"
        f"Email Tone: {tone}\n"
        f"Email Content:\nSubject: Exclusive {product_name} Offer Just for You!\n"
        f"Dear {user_name},\n\n"
    )

    # Tokenize the structured input
    inputs = tokenizer(structured_input, return_tensors="pt")

    # Generate the output
    outputs = llama_3.generate(
        inputs.input_ids,
        max_length=500,  # Adjusting max length for approximately 150 words
        min_length=200,  # Ensuring minimum length for completeness
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=3,  # Prevent repetition
    )

    # Decode the generated text
    email = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the generated email content
    email_content_start = email.find("Subject:")  # Start of the generated content
    generated_email = email[email_content_start:].strip()

    # Ensure the email is approximately 200 words
    words = generated_email.split()
    if len(words) > 200:
        generated_email = " ".join(words[:200]) + "..."

    # Remove any existing sign-offs
    sign_offs = ["Regards,", "Best regards,", "Sincerely,", "Yours truly,"]
    for sign_off in sign_offs:
        if sign_off in generated_email:
            generated_email = generated_email.split(sign_off)[0].strip()

    # Append the fixed template values
    generated_email += (
        f"\n\nBest regards,\n"
        f"{sender_name}\n"
        f"{company_name}\n"
        f"Visit us: {website_url}\n"
    )

    return generated_email

# Example inputs
inputs_list = [
    {
        "product_name": "Beer",
        "product_description": "Beer is a popular alcoholic beverage made from fermented grains, typically barley, and flavored with hops, offering a wide range of flavors and styles from light and crisp to dark and rich.",
        "interest_level": "very interested",
        "discount_offers": "20%",
        "discount_code": "Jadiya",
        "user_name": "Ms. Sriju"
    }
]

# Generate and print emails for each set of inputs
for inputs in inputs_list:
    email_content = generate_email_auto(
        product_name=inputs["product_name"],
        product_description=inputs["product_description"],
        interest_level=inputs["interest_level"],
        discount_offers=inputs["discount_offers"],
        discount_code=inputs["discount_code"],
        user_name=inputs["user_name"]
    )
    print(email_content)


Subject: Exclusive Beer Offer Just for You!
Dear Ms. Sriju,

We've got a special offer just for you! Our beer is perfect for any occasion, whether you're hosting a party or just want to unwind after a long day. With a variety of flavors to choose from, you're sure to find the perfect beer to suit your taste.

Get 20% off your first order when you use discount code Jadija at checkout. Don't miss out on this limited-time offer!

Best regards,
Binit Sapkota
MrBeerGod Technologies
Visit us: https://barahsinghe.com/



In [36]:
def generate_email_auto(product_name, product_description, interest_level, discount_offers, discount_code, user_name):
    # Fixed template values
    sender_name = "Binit Sapkota"
    company_name = "MrBeerGod Technologies"
    website_url = "https://barahsinghe.com/"

    # Determine email tone and persuasion based on interest level
    if interest_level == "very interested":
        tone = "excited and convincing"
    elif interest_level == "interested":
        tone = "enthusiastic"
    else:
        tone = "informative"

    # Structured input for the email generation
    structured_input = (
        f"Generate a personalized email in paragraph form with the following details:\n"
        f"Product Name: {product_name}\n"
        f"Product Description: {product_description}\n"
        f"Interest Level: {interest_level}\n"
        f"Discount Offers: {discount_offers}\n"
        f"Discount Code: {discount_code}\n"
        f"User Name: {user_name}\n"
        f"Email Tone: {tone}\n"
        f"Email Content in Paragraph:\nSubject: Exclusive {product_name} Offer Just for You!\n"
        f"Dear {user_name},\n\n"
    )

    # Tokenize the structured input
    inputs = tokenizer(structured_input, return_tensors="pt")

    # Generate the output
    outputs = llama_3.generate(
        inputs.input_ids,
        max_length=500,  # Adjusting max length for approximately 150 words
        min_length=200,  # Ensuring minimum length for completeness
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=3,  # Prevent repetition
    )

    # Decode the generated text
    email = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the generated email content
    email_content_start = email.find("Subject:")  # Start of the generated content
    generated_email = email[email_content_start:].strip()

    # Ensure the email is approximately 150 words
    words = generated_email.split()
    if len(words) > 200:
        generated_email = " ".join(words[:200]) + "..."

    # Remove any existing sign-offs
    sign_offs = ["Regards,", "Best regards,", "Sincerely,", "Yours truly," , "Best "]
    for sign_off in sign_offs:
        if sign_off in generated_email:
            generated_email = generated_email.split(sign_off)[0].strip()

    # Append the fixed template values
    generated_email += (
        f"\n\nBest regards,\n"
        f"{sender_name}\n"
        f"{company_name}\n"
        f"Visit us: {website_url}\n"
    )

    return generated_email

# Example inputs
inputs_list = [
    {
        "product_name": "Beer",
        "product_description": "Beer is a popular alcoholic beverage made from fermented grains, typically barley, and flavored with hops, offering a wide range of flavors and styles from light and crisp to dark and rich.",
        "interest_level": "very interested",
        "discount_offers": "20%",
        "discount_code": "Jadiya",
        "user_name": "Ms. Sriju"
    }
]

# Generate and print emails for each set of inputs
for inputs in inputs_list:
    email_content = generate_email_auto(
        product_name=inputs["product_name"],
        product_description=inputs["product_description"],
        interest_level=inputs["interest_level"],
        discount_offers=inputs["discount_offers"],
        discount_code=inputs["discount_code"],
        user_name=inputs["user_name"]
    )
    print(email_content)


Subject: Exclusive Beer Offer Just for You!
Dear Ms. Sriju,

We're excited to offer you an exclusive discount on our best-selling Beer! As a valued customer, we're offering a 20% discount on all orders, just for you.

To claim your discount, simply enter the code "Jadiya" at checkout. This code is valid for one-time use only and expires on March 15th, 2023.

Don't miss out on this limited-time offer! Order now and enjoy your favorite Beer at a discounted price.

Thank you for being a loyal customer!

Best regards,
Binit Sapkota
MrBeerGod Technologies
Visit us: https://barahsinghe.com/

