https://github.com/password123456/setup-selenium-with-chrome-driver-on-ubuntu_debian
1. download chrome- wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
2. install chrome- apt-get install -y ./google-chrome-stable_current_amd64.deb
3. check version- google-chrome --version # Google Chrome 129.0.6668.100 
4. pip install selenium
5. pip install webdriver-manager
6. sudo apt-get install tesseract-ocr
7. pip install pytesseract
8. pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

Required Python libraries installed (selenium, python-docx, docx2pdf)


In [1]:
#https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/retrieval-augmented-generation/Document_QnA_using_gemini_and_vector_search.ipynb

# File system operations and displaying images
import os
import pandas as pd

KEYFILE_PATH = "keyfile_other.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = KEYFILE_PATH
PROJECT_ID = "publicismedia-cortex-216901"
REGION = "europe-west1"
OPENAI_API_KEY = ''


# Import LangChain components
import langchain

print(f"LangChain version: {langchain.__version__}")


LangChain version: 0.3.11


In [2]:
from openai import OpenAI
client = OpenAI(
    # This is the default and can be omitted
    api_key=OPENAI_API_KEY
)


In [27]:
# import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver
from docx import Document
from fpdf import FPDF
import os
import regex as re
import time
# Import the necessary libraries
from PIL import Image
import pytesseract
import shutil

# from dotenv import load_dotenv

def get_pdf_text(pdf_docs):
    text=""
    for pdf in pdf_docs:
        pdf_reader= PdfReader(pdf)
        for page in pdf_reader.pages:
            text+= page.extract_text()

    return text


def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
    chunks = text_splitter.split_text(text)
    return chunks


def get_vector_store(text_chunks):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=OPENAI_API_KEY)
    vector_store=FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")


# Function to capture screenshots of each page
def capture_screenshots(url, output_folder):
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
    os.mkdir(output_folder)
    chrome_options = Options()
    chrome_options.binary_location = '/usr/bin/google-chrome'
    chrome_options.add_argument('--headless')  # Run Chrome in headless mode
    chrome_options.add_argument('--no-sandbox')  # Required for running as root user
    chrome_options.add_argument('--disable-dev-shm-usage')  # Required for running in Docker

    driver = webdriver.Chrome(options=chrome_options)
    driver.get(url)

    # Wait until page is fully loaded
    WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))

    # Scroll down to capture the entire page
    total_height = int(driver.execute_script("return document.body.scrollHeight"))
    screenshot_count = 0
    for i in range(0, total_height, 250):
        driver.execute_script("window.scrollTo(0, {});".format(i))
        time.sleep(0.2)  # Adjust the delay as needed

        # Capture screenshot
        screenshot_path = f"{output_folder}/screenshot_{screenshot_count + 1}.png"
        driver.save_screenshot(screenshot_path)

        # Open the screenshot image
        image = Image.open(screenshot_path)
        # Resize the image to a width of 1024 pixels
        image = image.resize((1024, int(image.height * 1024 / image.width)))
        # Save the resized image to a file
        image.save(screenshot_path)


        screenshot_count += 1

    driver.quit()

    return screenshot_count


# Function to create Word document and insert screenshots
def create_word_document(output_folder, num_pages):
    doc = Document()
    for i in range(1, num_pages + 1):
        screenshot_path = f"{output_folder}/screenshot_{i}.png"
        doc.add_picture(screenshot_path)

    doc.save(f"{output_folder}/output.docx")


# Convert Word document to PDF
def convert_to_pdf(output_folder):
    pdf = FPDF()
    screenshots = [f for f in os.listdir(output_folder) if f.endswith('.png')]
    screenshots.sort(key=lambda x: int(re.search(r'\d+', x).group()))  # Sort numerically

    for screenshot in screenshots:
        image_path = os.path.join(output_folder, screenshot)
        pdf.add_page()
        pdf.image(image_path, x=10, y=10, w=190)

    pdf.output(f"{output_folder}/output.pdf", "F")


def get_html_text(url='https://ecommerce.inclusivedesigntoolkit.com/retailers/overview.html'):
    output_folder='screenshots'
    n_pages = capture_screenshots(url, output_folder)
    print(n_pages, url)
    # create_word_document('screenshots', n_pages)
    # convert_to_pdf('screenshots')

    text=""
    screenshots = [f for f in os.listdir(output_folder) if f.endswith('.png')]
    screenshots.sort(key=lambda x: int(re.search(r'\d+', x).group()))  # Sort numerically

    for screenshot in screenshots:
        image_path = os.path.join(output_folder, screenshot)
        text += pytesseract.image_to_string(Image.open(image_path))


    return text
 

def user_input(input_data, source_type, user_question, p=1, model='gemini', verbose=0):
    if source_type=='pdf':
        # read pdf
        text = get_pdf_text([input_data])
    elif source_type=='html':
        text = get_html_text(input_data)

    # text = text
    # convert into chunks
    chunks = get_text_chunks(text)
    if verbose>1:
        print('number of chunks:', len(chunks))
    
    # print(chunks)

    # save embeddings of chunks
    get_vector_store(chunks)

    # define embedding model
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=OPENAI_API_KEY)

    # get user question embedding and relevant chunk
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)
    # print(docs[0])
    context = ""
    for doc in docs[:p]:
        context += doc.page_content
    
    if verbose>1:
        print("Context: ", context)

    # try:
    completion = client.chat.completions.create(
    model="gpt-4-turbo",
    temperature=0,
    messages=[
        {"role": "user", "content": f"""
                Answer the question as precise as possible using the provided context. If the answer is
                not contained in the context, then return empty string and nothing else, Provide accurate answers and concise answers in form of points. \n\n
                Context: \n {context} \n
                Question: \n {user_question} \n
                Answer: 
                """}
        ]
    )

    response = completion.choices[0].message.content
        
    # except:
    #     response = ""
        
    return response


def get_guidelines(input_data='sample1.pdf', source_type='pdf', section='title', model='gemini', verbose=0):
    if section=='title':
        guidelines = user_input(input_data, source_type, "instructions and information to write good title, do not include information related to bullets", 2, model=model, verbose=verbose)

    elif section=='description':
        guidelines = user_input(input_data, source_type, "instructions and information to write good product description ?", 1, model=model, verbose=verbose)

    elif section=='bullets':
        guidelines = user_input(input_data, source_type, "instructions and information to write good bullets, do not include product features related information ?", 1, model=model, verbose=verbose)

    elif section=='general':
        guidelines = user_input(input_data, source_type, "instructions and information to write good product content. Basically extract information that applies to all title, bullets and description and is not specific to any.", 1, model=model, verbose=verbose)

    if verbose>0:
        print('guidelines-\n', guidelines)
    return guidelines


def get_guidelines_from_excel(section='title'):
    guidelines = pd.read_excel('Guidelines Template.xlsx')
    if section=='bullets':
        guidelines = '- '+ '\n- '.join(guidelines['Bullet Guidelines'].dropna().tolist())
    elif section=='title':
        guidelines = '- '+ '\n- '.join(guidelines['Title Guidelines'].dropna().tolist())
    elif section=='description':
        guidelines = '- '+ '\n- '.join(guidelines['Description Guidelines'].dropna().tolist())    
    elif section=='general':
        guidelines = '- '+ '\n- '.join(guidelines['General Guidelines'].dropna().tolist()) 
    return guidelines


def get_issues(guidelines, section='title', bullets_text='', title_text='', description_text='', model='gemini', verbose=0):
    if section=='title':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""I want to ensure my product listings comply with a specific retailer's content and style guidelines. I have added these guidelines. Please analyze my product title comparing them to the guidelines.
            Guidelines for Title:[{guidelines}]
            Product Title: {title_text}
            Evaluate my product title based on these guidelines and assign a score between 0 and 1, reflecting the level of compliance.
            For scoring less than 1, provide a list of specific missing or violated guidelines.
            Provide output as a dictionary containing:"""+"""
            {
            "Score":Score value, 
            "Violated Guidelines":[List of missing or violated guidelines],
            "Explanation":Include an explanation for the list of missing or violated guidelines
            }
            Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content 

    elif section=='description':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""I want to ensure my product listings comply with a specific retailer's content and style guidelines. I have added these guidelines. Please analyze my product description comparing them to the guidelines.
                Guidelines for Description:[{guidelines}]
                Product Description: {description_text}
                Evaluate my product Description based on these guidelines and assign a score between 0 and 1, reflecting the level of compliance.
                For scoring less than 1, provide a list of specific missing or violated guidelines.
                Provide output as a dictionary containing:"""+"""
                {
                "Score":Score value, 
                "Violated Guidelines":[List of missing or violated guidelines],
                "Explanation":Include an explanation for the list of missing or violated guidelines
                }
                Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content
       
    elif section=='bullets':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""I want to ensure my product listings comply with a specific retailer's content and style guidelines. I have added these guidelines. Please analyze my product bullets comparing them to the guidelines.
            Guidelines for bullets:[{guidelines}]
            Product Bullets: {bullets_text}
            Evaluate my product bullets based on these guidelines and assign a score between 0 and 1, reflecting the level of compliance.
            For scoring less than 1, provide a list of specific missing or violated guidelines.
            Provide output as a dictionary containing:"""+"""
            {
            "Score":Score value, 
            "Violated Guidelines":[List of missing or violated guidelines],
            "Explanation":Include an explanation for the list of missing or violated guidelines
            }
            Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content

    elif section=='general':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""I want to ensure my product title, description and bullets comply with a specific retailer's content and style guidelines. I have added these guidelines. Please analyze title, bullets and description, comparing them to the guidelines relevant to them only.
            Guidelines for Product Content:[{guidelines}]
            Product Content Title: {title_text}
            Product Content Bullets: {bullets_text}
            Product Content Description: {description_text}
            Evaluate my product contents based on these guidelines and assign a score between 0 and 1, reflecting the level of compliance.
            For scoring less than 1, provide a list of specific missing or violated guidelines that are relevant to title, bullets and description only and not other fields.
            Provide output as a dictionary containing:"""+"""
            {
            "Score":Score value, 
            "Violated Guidelines":[List of missing or violated guidelines],
            "Explanation":Include an explanation for the list of missing or violated guidelines
            }
            Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content


    if "```json" in response:
        violated_guidelines = eval(response.strip()[7:-3].strip())["Violated Guidelines"]
    else:
        violated_guidelines = eval(response.strip())["Violated Guidelines"]

    if verbose>0:
        print('violated guidelines-\n', violated_guidelines)

    return violated_guidelines


def get_suggestions(issues, section='title', bullets_text='', title_text='', description_text='', model='gemini', verbose=0):
    if section=='title':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Compare the Product Title: {title_text} to the Issues with Title:[{issues}]. 
            Generate a list of bullets or points that provide the suggestions to resolve the discrepancies or areas where the issues exist. 
            The output should be a concise and clear list of specific points that need to be addressed or improved.
            Do not provide any suggestions if there are no issues.
            Provide output as a dictionary containing:"""+"""
            {
            "Suggestions":[List of resolutions to remove listed issues with proper explanation]
            }
            Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content
    
    elif section=='description':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Compare the Product Description: {description_text} to the Issues with Description:[{issues}]. 
            Generate a list of bullets or points that provide the suggestions to resolve the discrepancies or areas where the issues exist. 
            The output should be a concise and clear list of specific points that need to be addressed or improved.
            Do not provide any suggestions if there are no issues.
            Provide output as a dictionary containing:"""+"""
            {
            "Suggestions":[List of resolutions to remove listed issues with proper explanation]
            }
            Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content
       
    elif section=='bullets':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Compare the Product Bullets: [{bullets_text}] to the Issues with Bullets:[{issues}]. 
            Generate a thorough list of bullets or points that provide the suggestions to resolve the discrepancies or areas where the issues exist. 
            The output should be a concise and clear list of specific points that need to be addressed or improved.
            Do not provide any suggestions if there are no issues.
            Provide output as a dictionary containing:"""+"""
            {
            "Suggestions":[List of resolutions to remove listed issues with proper explanation]
            }
            Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content

    elif section=='general':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Compare the Product Content Title: {title_text},
            Product Content Bullets: {bullets_text},
            Product Content Description: {description_text} to the Issues:[{issues}]. 
            Generate a list of bullets or points that provide the suggestions to resolve the discrepancies or areas where the issues exist. 
            The output should be a concise and clear list of specific points that need to be addressed or improved.
            Do not provide any suggestions if there are no issues.
            Provide output as a dictionary containing:"""+"""
            {
            "Suggestions":[List of resolutions to remove listed issues with proper explanation]
            }
            Always return answer in above format and no other information."""}
            ]
        )

        response = completion.choices[0].message.content

    if "```json" in response:
        Suggestions = eval(response.strip()[7:-3].strip())["Suggestions"]
    else:
        Suggestions = eval(response.strip())["Suggestions"]

    if verbose>0:
        print('Suggestions-\n', Suggestions)

    return Suggestions


def get_compliant_content(suggestions, section='title', bullets_text='', title_text='', description_text='', model='gemini', verbose=0):
    if section=='title':
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Product Title: {title_text}. 
            Suggestions: {suggestions}.
            Given input product title and a list of suggestions, update the title to reflect the suggested changes. 
            The suggestions should be applied in the order they are provided. If a suggestion is to replace a word, replace the exact word. If a suggestion is to add text, insert it at the specified location. If a suggestion is to remove text, delete the specified text. Provide the updated text as the output."
            
            Special instructions for the title section : Do not add new words to the very beginning of the Title, make sure that original title is modified for only provided suggestions and rest of the title remains exactly same as original
            Special instructions for the bullets : Do not create new bullet points for any new information. Instead, seamlessly integrate them into the existing content, ensuring that the overall text remains coherent and meaningful. Additionally, please ensure that the number of bullet points remains the same as the original text. Bullet points are indicated by "\\n" in the text, and the number of bullet points should be calculated as the number of "\\n" characters plus one. Don't remove any important information, add integrate them into the existing content.
            Please provide dictionary with the following structure:"""+"""
            {
            "old Title": "original title goes here",
            "Title": "Improved Title Goes Here.",
            "Explaination": "explaine concisely what changes have been done."
            }
            Return the response in above format only.
            Improve upon only on the points that are described above, otherwise return original answer.
            """}
            ]
        )

        response = completion.choices[0].message.content
        
    if section=='description':
    
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Product Description: {description_text}. 
            Suggestions: {suggestions}.
            Given input product Description and a list of suggestions, update the Description to reflect the suggested changes. 
            The suggestions should be applied in the order they are provided. If a suggestion is to replace a word, replace the exact word. If a suggestion is to add text, insert it at the specified location. If a suggestion is to remove text, delete the specified text. Provide the updated text as the output."
            Do not add any information that is not already present. for example if sugggestions say that add dimension of product but we do not have the information then skip adding any false information.

            Special instructions for the title section : Do not add new words to the very beginning of the Title, make sure that original title is modified for only provided suggestions and rest of the title remains exactly same as original
            Special instructions for the bullets : Do not create new bullet points for any new information. Instead, seamlessly integrate them into the existing content, ensuring that the overall text remains coherent and meaningful. Additionally, please ensure that the number of bullet points remains the same as the original text. Bullet points are indicated by "\\n" in the text, and the number of bullet points should be calculated as the number of "\\n" characters plus one. Don't remove any important information, add integrate them into the existing content.
            Please provide dictionary with the following structure:"""+"""
            {
            "old Description": "original Description goes here",
            "Description": "Improved Description Goes Here.",
            "Explaination": "explaine concisely what changes have been done."
            }
            Return the response in above format only.
            Improve upon only on the points that are described above, otherwise return original answer.
            """}
            ]
        )

        response = completion.choices[0].message.content
        
    if section=='bullets':
    
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Product Bullets: {bullets_text}. 
            Suggestions: {suggestions}.
            Given input product Bullets and a list of suggestions, update the Bullets to reflect the suggested changes. 
            The suggestions should be applied in the order they are provided. If a suggestion is to replace a word, replace the exact word. If a suggestion is to add text, insert it at the specified location. If a suggestion is to remove text, delete the specified text. Provide the updated text as the output."
            Do not add any information that is not already present. for example if sugggestions say that add dimension of product but we do not have the information then skip adding any false information.

            Special instructions : 
            Do not create new bullet points for any new information. Instead, seamlessly integrate them into the existing content, ensuring that the overall text remains coherent and meaningful. Additionally, please ensure that the number of bullet points remains the same. Don't remove any important information, add integrate them into the existing content.
            Make sure to resolved all violated guidelines and provide the updated bullets.
            
            Please provide dictionary with the following structure:"""+"""
            {
            "old Bullets": "original Bullets goes here",
            "Bullets": "Improved Bullets Goes Here.",
            "Explaination": "explaine concisely what changes have been done."
            }
            Return the response in above format only.
            Improve upon only on the points that are described above, otherwise return original answer.
            """}
            ]
        )

        response = completion.choices[0].message.content
    
    if section=='general':
    
        completion = client.chat.completions.create(
        model="gpt-4-turbo",
        temperature=0,
        messages=[
            {"role": "user", "content": f"""
            Product Content Title: {title_text}.
            Product Content Description:{description_text}.
            Product Content Bullets: {bullets_text}.
            Suggestions: {suggestions}.
            Given input product content and a list of suggestions, update them to reflect the suggested changes. 
            The suggestions should be applied in the order they are provided. If a suggestion is to replace a word, replace the exact word. If a suggestion is to add text, insert it at the specified location. If a suggestion is to remove text, delete the specified text. Provide the updated text as the output."
            Do not add any information that is not already present. for example if sugggestions say that add dimension of product but we do not have the information then skip adding any false information.
            
            Special instructions : 
            Do not create new bullet points for any new information. Instead, seamlessly integrate them into the existing content, ensuring that the overall text remains coherent and meaningful. Additionally, please ensure that the number of bullet points remains the same. Don't remove any important information, add integrate them into the existing content.
            Make sure to resolved all violated guidelines and provide the updated bullets.
            
            Please provide dictionary with the following structure:"""+"""
            {
            "old Bullets": "original bullets goes here",
            "Bullets": "Suggested Improved Bullets Goes Here",
            "Explaination": "explaine concisely what changes have been done.",

            "Old Description": "original description goes here",
            "Description": "Suggested Improved Description Goes Here",
            "Explaination": "explaine concisely what changes have been done.",

            "old Title": "original title goes here",
            "Title": "Suggested Improved Title Goes Here.",
            "Explaination": "explaine concisely what changes have been done.",
            }
            
            Return the response in above format only.
            Improve upon only on the points that are described above and keep the number of bullets same as original bullets. 
            """}
            ]
        )

        response = completion.choices[0].message.content
    return response


def optimize_product_content(input_data='sample1.pdf', source_type='pdf', section='title', bullets_text='', title_text='', description_text='', verbose=0):
    guidelines = get_guidelines(input_data, source_type, section, verbose)
    violated_guidelines = get_issues(guidelines, section, bullets_text, title_text, description_text, verbose)
    response = get_compliant_content(violated_guidelines, section, bullets_text, title_text, description_text, verbose)
    return response

# test data

In [6]:
titles = [
    # "The Glenlivet Founder's Reserve Single Malt Scotch Whisky, 700ml with Giftbox, Perfect for Special Occasions",
    "Fresh Step Advanced Multi-Cat Clumping Cat Litter with Odor Control, 18.5 lb",
    "Burt's Bees Natural & Moisturizing Lip Balm, Vanilla Bean, 4 Pack",
    "BIC Break-Resistant Mechanical Pencils with Erasers, No. 2 Medium Point (0.7mm), 12-Count Pack, Perfect Teacher Appreciation Gifts",
    "Coconut Secret Coconut Amino Garlic Sauce, 10 fl. oz.",
    "Cortizone 10 Anti-Itch Lotion for Psoriasis 3.4 oz (96g) , Maximum Strength 1% Hydrocortisone",
    "Maxwell House House Blend Medium Roast K-Cup Coffee Pods (84 ct Box)",
]
# not identifying number of characters to include but can be modified

descriptions = [
    # "Introducing The Glenlivet Founder’s Reserve, a distinguished Scotch whisky hailing from the Speyside region of Scotland. Crafted using a time-honored method by founder George Smith, this whisky is matured in a select blend of traditional and American first-fill oak casks. Further enhancing its complexity, it is then combined with American first-fill oak barrels that have never been used for maturing whisky. This selective use of casks imbues the single malt with a perfectly balanced sweet and smooth taste. Its flavor profile is accentuated with hints of citrus and fruity tones, a perfect complement to its alluring pale gold color. This exceptional quality earned it the 2019 International Wine & Spirits Competition (IWSC) Gold award. This classic whisky, presented in a 70 cl (700 ml) bottle at 40% ABV, is not just a drink but an experience. Enjoy it on the rocks or use it to craft exquisite cocktails such as the Hot Toddy, Carnivale, and The Last King. It's the perfect gift for special occasions or as an everyday treat to yourself.",
    "Fresh Step Advanced Multi Cat Extra Strength Formula clumping cat litter is effective in multi-cat homes. It destroys odors without the dust using a tough odor eliminating formula and the power of Febreze. Fresh Step Advanced Multi Cat Extra Strength Formula also uses Fresh Step Clumplock Technology, which locks in liquid and odor and creates tight clumps for easy scooping, and prevents stinky crumbles from being left behind. And, it is 99.9% dust free for cleaner surfaces, cleaner air and reduced tracking. Cleaning is fast and easyjust add more litter to your cats litter box after you scoop.",
    "Burt's Bees Natural & Moisturizing Lip Balm, Vanilla Bean, Burt's Bees Moisturizing Lip Balms nourish and make your lips feel luxurious. Infused with a sweet vanilla flavor and Beeswax to richly moisturize and soften lips, the nourishing oils and butter will make your lips juicy, happy, and healthy. With a matte finish and moisturizing balm texture, this tint-free tube of soothing lip balm glides on smoothly to nourish dry lips while keeping them revitalized and hydrated. Conveniently tuck a tube into a pocket or purse, to keep natural, nurturing lip care handy. This 100% natural beauty product is free of parabens, phthalates, petrolatum, and SLS and will beautify and revitalize your lips. Use these natural Burt's Bees Moisturizing Lip Balms to make lips feel their best.",
    "No more broken leads. Enjoy a smooth, uninterrupted writing experience with new BIC Break-Resistant mechanical pencils, thanks to the specially formulated 0.7mm lead and innovative shock-absorption mechanism has a 75% stronger lead. A 75% stronger lead means less breakage–so you can write with less time spent clicking for more lead. Crafted with thoughtful design from top to bottom, the new BIC Break-Resistant mechanical pencil is the tool for any type of writing from jotting down a simple shopping list to composing longform letters; drafting professional documents to taking standardized tests; and everything in-between. Made to last with high-quality materials, each package includes extra break-resistant leads and a replacement jumbo eraser for easy editing. The new BIC Break-Resistant mechanical pencils are as good looking as they are functional. Avoid the frustration of flimsy lead that snaps easily and requires constant replacement with the BIC Break-Resistant mechanical pencil to go the distance and last as long your inspiration. vs leading competitor.",
    "Savor the bold flavor of our soy-free and gluten-free Garlic Coconut Aminos with 39% less sodium than the leading garlic stir fry sauce brand. This delicious creation only exists thanks to our secret and sacred ingredient: the sap of organic coconut blossoms. Hand-harvested, naturally fermented, then blended with sea salt and a unique combination of organic spices, this sweet and savory Garlic Sauce is perfect for marinades dipping, stir-fries and dips. Always soy-free, gluten-free, and keto-friendly with no added sugar, it is our great joy to share this exceptionally clean and flavorful sauce with you.",
    "Psoriasis is an immune-mediated disease that causes raised, red, scaly patches to appear on the skin and itch. Psoriasis typically affects the outside of the elbows, knees or scalp, though it can appear on any location. Some people report that psoriasis is itchy. Anti-Itch Lotion for Psoriasis provides temporary itch relief for irritated & inflamed skin caused by or associated with psoriasis. It features 7 moisturizers & 2 vitamins to hydrate skin plus maximum strength hydrocortisone without a prescription to provide fast itch relief*. Stock up on 3.4-ounce tubes of Cortizone 10 Lotion for Psoriasis to give everyone in the family, ages two and up, itch relief from skin irritations associated with psoriasis. Cortizone 10: Stop the Itchsanity. *1% hydrocortisone is the maximum strength available without a prescription. DIRECTIONS: Adults and children 2 years of age and older: Apply to the affected area not more than 3 to 4 times daily. For children under 2 years of age: Ask a doctor.",
    "Take a break with the coffee that practically invented breaks. The day's been a whirlwind and it's far from over. But here's the thing — you're not stressing it. Because you've got it all figured out. You're gonna brew the medium roast made for kicking back and enjoying the aroma. Maxwell House House Blend Medium Roast K-Cup Coffee Pods. Just sit back and savor every sip from your steaming cup full of strong, full-bodied Maxwell House flavor. Reset with that consistently delicious taste that hits the spot from the very first sip to the very last drop. And when that cup's empty and you're finally fueled up, you can squint your eyes, gather your gear and roll up your sleeves. Now you're ready to tackle anything, and there’s plenty of coffee pods ready for you in your 84 count box of Maxwell House House Blend Medium Roast K-Cup Coffee Pods. Let's show 'em what you're made of. Maxwell House. To Your Max.",

]

bullets = [
# """
# 1. Speyside whisky matured in carefully selected traditional and American first-fill oak casks.\n
# 2. Delivers a well-balanced sweet, fruity taste with a smooth finish.\n
# 3. Versatile - Enjoy it with ice or use it to craft cocktails such as Hot Toddy, Carnivale, and The Last King.\n
# 4. Presented in a 70 cl (700 ml) bottle at 40% ABV.\n
# 5. Winner of the 2019 International Wine & Spirits Competition (IWSC) Gold award.\n
# 6. Perfect gift for occasions like Father's Day, Christmas, dinners, birthdays and other special events.
# """,
"""
-FIGHTS ODORS LONGER*: Fight litter box odors with Fresh Step Advanced Cat Litter that starts fighting odors on contact to control odor longer* vs. Fresh Step Multi-Cat; Packaging may vary 
-MULTI CAT LITTER: Extra strength formula is strong enough to handle multiple-cat litter boxes and multi-cat homes
-LOW DUST CAT LITTER: For cleaner surfaces and cleaner air, this litter is 99% dust free and a low tracking cat litter
-ODOR CONTROL: 10 days of odor control helps keep the litter box and your home smelling fresh 
-CLUMPING CAT LITTER: This high absorbent clumping formula prevents stinky crumbles from being left behind; Certain trademarks used under license from The Procter & Gamble Company or its affiliates
""",
"""
-LIP CARE: Bursting with sweet tropical flavor, refresh and nourish your lips with Coconut & Pear Burt's Bees Moisturizing Lip Balm 
-TOTAL HYDRATION: Hydrate and soften dry lips with beeswax and fruit extracts packed into every tube of Burt's Bees lip care 
-MOISTURIZING: Just one swipe of this conditioning lip moisturizer leaves your lips smooth and refreshed 
-ALL NATURAL: Enriched with 100% natural ingredients, this natural lip balm is formulated without parabens, phthalates, petrolatum or SLS 
-BURT'S BEES LIP BALM: Treat your lips to a tropical vacation with this fruity lip balm that naturally beautifies and revitalizes your lips
""",
"""
-One 12-count pack of BIC Break-Resistant No. 2 Mechanical Pencils With Erasers, Gray and Blue Colored Barrels Only
-BIC Break-Resistant mechanical pencils provide the perfect combination of innovative features and appealing design for a smooth, uninterrupted writing experience 
-No more broken leads. BIC Break-Resistant Mechanical Pencil has 75% stronger lead and an innovative shock-absorption mechanism that helps prevent lead breakage. vs leading competitor 
-Each BIC Break-Resistant mechanical pencil comes with specially formulated extra strong lead, as well as an easy to use twist-up jumbo-size eraser for clean corrections 
-Easily erase unwanted marks and make clean corrections with the jumbo, twist-up eraser that won't smudge the page 
-This box contains 12 Mechanical Pencils, Gray and Blue Colored Barrels Only
""",
"""
-GREAT TASTING SOY FREE ALTERNATIVE - Coconut Secret Coconut Aminos Garlic Sauce is a new innovation in healthy Asian cuisine. Its created by infusing our Original Coconut Aminos sauce with garlic and other spices to create an exceptional seasoning and stir-fry sauce. It is great brushed over fish, chicken, meat and veggies, or in sautés and marinades 
-FIVE INGREDIENTS - Sometimes simple is best. The delicious, garlic flavor of our garlic sauce is achieved with just five ingredients: organic coconut tree sap, organic garlic, sea salt, organic ginger, and organic cayenne pepper 
-SATISFY CRAVINGS WITHOUT COMPROMISING FLAVOR OR HEALTH - Coconut Secret Coconut Aminos Garlic Sauce is low sodium and contains no MSG. Plus, it is USDA Certified Organic, gluten-free, kosher and non-GMO verified 
-THE HEALTHY SECRET - Coconut sap is the healthy secret to all of Coconut Secret products. It is loaded with a wide range of minerals, vitamins, and 17 amino acids. Its also low-glycemic and has a nearly neutral pH. Plus, since it comes from the tree and not the fruit, it doesn't have a coconutty flavor 
-SUSTAINABLE PRACTICES - Agrochemicals and GMOs are strictly prohibited from our farms in the Philippines. We use environmentally sustainable organic farming methods and always follow fair trade practices so you can consume our products with confidence
""",
"""
-CONTAINS: One (1) 3.4-oz. tube of Cortizone 10 Maximum Strength Anti-Itch Lotion for Psoriasis 
-LASTING RELIEF: This lotion is for relief of itching associated with rashes, minor skin irritation and inflammation due to psoriasis 
-STOP THE ITCHSANITY: Cortizone 10 Psoriasis Lotion contains maximum strength hydrocortisone, the #1 itch medicine recommended by doctors 
-7 INTENSIVE MOISTURIZERS: Cortizone 10 Lotion for Psoriasis features 7 moisturizers 
-APPLICATION: Adults and children 2 years of age and older: Apply to the affected area not more than 3 to 4 times daily
""",
"""
-MAXWELL HOUSE COFFEE PODS: 84 ct box of Maxwell House House Blend Medium Roast K-Cup Coffee Pods 
-MEDIUM ROAST COFFEE: Maxwell House Medium Roast House Blend Coffee K-Cups are the perfect balance of strength and flavor 
-BREW FOR YOU: Enjoy a well balanced, lively medium roast coffee with a smooth taste that's good to the last drop 
-100% ARABICA BEANS: Made with 100% Arabica coffee 
-SINGLE SERVING: Packaged in individual K-Cups for single-serve use every morning 
-COMPATIBILITY: Perfect for use in Keurig 1.0 & 2.0 brewing systems 
-OUR STORY: Maxwell House coffee has been proudly roasted in the USA since 1892 
-KOSHER: Certified Kosher coffee 
-STORAGE: Store our box of coffee pods in a dry place
"""
]

# compliance using pdf

In [28]:
input_data='sample2.pdf'
section='bullets'
model='openai'
source_type='pdf' # html

for i in range(len(bullets)):
    bullets_text = bullets[i]
    title_text = titles[i]
    description_text=descriptions[i]
    verbose=1
    if i==0:
        guidelines = get_guidelines(input_data, source_type, section, model, verbose)
    issues = get_issues(guidelines, section, bullets_text, title_text, description_text, model, verbose)
    suggestions = get_suggestions(issues, section, bullets_text, title_text, description_text, model, verbose)
    response = get_compliant_content(suggestions, section, bullets_text, title_text, description_text, model, verbose)
    print(response)
    print('###########################################################################')

guidelines-
 - Capitalize the first letter of each word.
- Do not use ALL CAPS.
- Do not capitalize conjunctions, articles, or prepositions with fewer than five letters.
- Use numerals.
- State the number of items in a bundled product.
- Keep it under 200 characters.
- Use only standard text; avoid special characters or symbols.
- Do not include price and quantity.
- Do not include information about yourself or your company.
- Do not include promotional messages.
- Use your seller name as the Brand or Manufacturer only if your product is Private Label.
- Do not include subjective commentary.
violated guidelines-
 ['Do not use ALL CAPS', 'Use only standard text; avoid special characters or symbols', 'Do not include subjective commentary']
Suggestions-
 ["Replace 'FIGHTS ODORS LONGER*' with 'Fights odors longer' to avoid using all caps, which can seem aggressive and is less readable.", "Remove the asterisk (*) after 'longer' in 'FIGHTS ODORS LONGER*' as it is a special character. If refe

# compliance using url address

In [16]:
input_data='https://www.zonguru.com/blog/amazon-listing-optimization'
section='general'
model='openai'
source_type='html'

for i in range(len(bullets[:2])):
    bullets_text = bullets[i]
    title_text = titles[i]
    description_text=descriptions[i]
    verbose=1
    if i==0:
        guidelines = get_guidelines(input_data, source_type, section, model, verbose)
    violated_guidelines = get_issues(guidelines, section, bullets_text, title_text, description_text, model, verbose)
    response = get_compliant_content(violated_guidelines, section, bullets_text, title_text, description_text, model, verbose)
    print(response)

64 https://www.zonguru.com/blog/amazon-listing-optimization
guidelines-
 - Use the brand and main keyword/product type at the start of the title.
- Include features and benefits in the product title.
- Write in a way that addresses customer pain points.
- Use one long-tail keyword in each bullet point.
- Ensure the content is descriptive enough to be ranked favorably.
text-
 
-FIGHTS ODORS LONGER*: Fight litter box odors with Fresh Step Advanced Cat Litter that starts fighting odors on contact to control odor longer* vs. Fresh Step Multi-Cat; Packaging may vary 
-MULTI CAT LITTER: Extra strength formula is strong enough to handle multiple-cat litter boxes and multi-cat homes
-LOW DUST CAT LITTER: For cleaner surfaces and cleaner air, this litter is 99% dust free and a low tracking cat litter
-ODOR CONTROL: 10 days of odor control helps keep the litter box and your home smelling fresh 
-CLUMPING CAT LITTER: This high absorbent clumping formula prevents stinky crumbles from being left be

# compliance using excel template

In [None]:
model='openai'
section='general'

for i in range(len(bullets[:2])):
    bullets_text = bullets[i]
    title_text = titles[i]
    description_text=descriptions[i]
    verbose=1
    if i==0:
        guidelines = get_guidelines_from_excel(section)
    violated_guidelines = get_issues(guidelines, section, bullets_text, title_text, description_text, model, verbose)
    response = get_compliant_content(violated_guidelines, section, bullets_text, title_text, description_text, model, verbose)
    print(response)