In [1]:
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
import typing_extensions as typing
import logging
import json
import time
from dotenv import load_dotenv
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info("Calling Gemini Pro with prompt")

INFO:__main__:Calling Gemini Pro with prompt


In [3]:
# Load environment variables from a .env file
load_dotenv()

True

In [1]:
GOOGLE_API_KEY= os.getenv('API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

#model_list = ['gemini-1.5-flash', 'gemini-1.5-pro-latest']

NameError: name 'os' is not defined

In [8]:
system_message="""
You are a compliance officer.
Your task is to understand the following rule and verify its adherence in the given sales deck. The steps are as follows:

Understand the given rule: {rule_name}.
Augment the rule with additional vocabulary related to financial products.
Evaluate the following sales deck: to determine if it respects the rule.
Provide the output in JSON format with the following fields:

rule_name (str): The name of the rule being applied.
label (bool): The result of evaluating adherence to the rule.
part (list[str]): Specific sections or aspects of the sales deck evaluated, including relevant details.
suggestion (list[str]): Recommended changes or improvements to ensure compliance with the rule.

Example JSON output structure:
{
  "rule_name": "The compliance rule being applied",
  "label": "Result of evaluating adherence to the rule, either True or False",
  "part": ["Specific section or aspect of the sales pitch evaluated, including relevant details"],
  "suggestion": ["Recommended changes or improvements to ensure compliance with the rule, including relevant details"]
}

The sales deck to evaluate is: {sales_deck}

"""

In [16]:
model = genai.GenerativeModel(model_name='gemini-1.5-flash',
                              system_instruction=system_message)

In [17]:
class GeminiResponse(typing.TypedDict):
    rule_name: str
    label: bool
    part: list[str]
    suggestion: list[str]

# Generation config
genai_generation_config = genai.types.GenerationConfig(candidate_count=1,
                                                       max_output_tokens=400,
                                                       temperature=0.1,
                                                       response_mime_type="application/json",
                                                       response_schema=GeminiResponse)
# Safety config
safety_settings={
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
    }

In [18]:
def gemini_answer(prompt,model):
    response = model.generate_content(prompt, generation_config=genai_generation_config, safety_settings=safety_settings)
    response_text = response.parts[0].text
    try:
        print(response_text)
        return response_text
    except json.JSONDecodeError:
      print("Error: Invalid JSON output string")
      return None

In [19]:
def inference(model_name,rule,input_text):
    system_message=f"""
    You are a compliance officer.
    Your task is to understand the following rule and verify its adherence in the given sales deck. The steps are as follows:

    Understand the given rule: {rule}.
    Augment the rule with additional vocabulary related to financial products.
    Evaluate the following sales deck: to determine if it respects the rule.
    Provide the output in JSON format with the following fields:

    rule_name (str): The name of the rule being applied.
    label (bool): The result of evaluating adherence to the rule.
    part (list[str]): Specific sections or aspects of the sales deck evaluated, including relevant details.
    suggestion (list[str]): Recommended changes or improvements to ensure compliance with the rule.

    Example JSON output structure:
    {{
    "rule_name": "The compliance rule being applied",
    "label": "Result of evaluating adherence to the rule, either True or False",
    "part": ["Specific section or aspect of the sales pitch evaluated, including relevant details"],
    "suggestion": ["Recommended changes or improvements to ensure compliance with the rule, including relevant details"]
    }}

    The sales deck to evaluate is: {input_text}
    """
    used_model = genai.GenerativeModel(model_name=model_name,
                              system_instruction=system_message)
    
    output = gemini_answer(input_text, used_model)
    return output

In [20]:
sales_deck_example= """
Welcome to BrightFuture Investments! We are dedicated to providing top-notch investment opportunities tailored to your financial goals. With our expert team and innovative strategies, your financial future is in safe hands. At BrightFuture Investments, we understand the complexities of the financial market and strive to simplify the investment process for you. Our mission is to help you achieve your financial aspirations with confidence and ease.
BrightFuture Investments leverages cutting-edge algorithms and market insights to maximize your returns. Our team of experts has developed a sophisticated investment strategy that has historically delivered exceptional results. Many of our clients have seen their investments grow significantly, often doubling within a short period. While we always emphasize that past performance does not guarantee future results, our track record speaks volumes about our capability and dedication. Our focus on minimizing risk while maximizing returns sets us apart in the industry. Our platform consistently outperforms the competition, making it the preferred choice for savvy investors. We pride ourselves on our ability to deliver superior returns and unparalleled service. Many of our clients achieve their financial independence much faster than they anticipated, thanks to our innovative approach. By choosing BrightFuture Investments, you are aligning yourself with a team that prioritizes your financial success and is committed to helping you reach your goals.
At BrightFuture Investments, we offer personalized investment plans tailored to your unique needs and objectives. Our comprehensive approach ensures that every aspect of your financial journey is carefully considered and optimized for maximum growth. From the initial consultation to ongoing portfolio management, we are with you every step of the way, providing expert guidance and support.
Our advanced technology and analytical tools enable us to stay ahead of market trends and make informed investment decisions. This proactive approach allows us to capitalize on opportunities and mitigate risks effectively. Our clients benefit from our deep market knowledge and strategic insights, which are integral to achieving consistent and impressive returns.
Moreover, we are committed to transparency and integrity in all our dealings. Our clients have access to detailed reports and updates on their investment performance, ensuring they are always informed and confident in their financial decisions. We believe in building long-term relationships based on trust and mutual success.
In summary, BrightFuture Investments is your partner in achieving financial success. With our proven strategies, expert team, and commitment to excellence, you can rest assured that your investments are in capable hands. Join us today and take the first step towards a brighter financial future. Let us help you turn your financial dreams into reality with confidence and peace of mind.

"""

In [21]:
result = inference("gemini-1.5-flash", "Inclusion of Risk Warnings", sales_deck_example)
print(result)





In [22]:
print(result)




In [25]:
def gemini_answer(system_message: str, model: genai.GenerativeModel, rule: str, input_text: str) -> typing.Optional[str]:
    """Generate content using the Gemini model and return the response text."""
    try:
        user_message = f"The rule is: {rule}, The sales deck to evaluate is: {input_text}"
        
        chat = model.start_chat(history=[{"role": "system", "content": system_message}], 
        generation_config=genai_generation_config, safety_settings=safety_settings)
        response = chat.send_message(user_message)
        response_text = response.parts[0].text
        logger.info(f"Response: {response_text}")
        return response_text
    except json.JSONDecodeError:
        logger.error("Invalid JSON output string")
        return None
    except Exception as e:
        logger.error(f"An unexpected error occurred: {e}")
        return None

In [26]:
gemini_answer("you are compilance officer evaluate this:",model,"fairness", sales_deck_example)

ERROR:__main__:An unexpected error occurred: GenerativeModel.start_chat() got an unexpected keyword argument 'generation_config'


## Media extraction

In [2]:
# test groq
import os
from dotenv import load_dotenv
from groq import Groq
import typing_extensions as typing
import logging
import json
from typing import List, Optional

In [3]:
# Load environment variables from a .env file
load_dotenv()

GROQ_API_KEY = os.getenv('groq_api')
# Initialize the Groq client
client = Groq(api_key=GROQ_API_KEY)

In [6]:
def transcribe_audio_with_whisper(audio_path):
    """Transcribes the audio using the specified Whisper model."""
    with open(audio_path, "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            file=(audio_path, audio_file.read()),
            model="whisper-large-v3",
            prompt="Specify context or spelling",
            response_format="json",
            temperature=0.0
        )
        return transcription.text

In [None]:
res =transcribe_audio_with_whisper("test.mp3")

In [7]:
res

' Fine-tuning large language models has traditionally required massive computational resources, but techniques like low-rank adaptation or LoRa and prefix tuning are revolutionizing this process. LoRa streamlines fine-tuning by reducing the number of trainable parameters, focusing specifically on low-rank matrices within the model. This means you can fine-tune a large model more quickly and at a fraction of the cost, all while maintaining high performance. For instance, LoRa can make a 175 billion parameter model adaptable with just a few million additional parameters. Prefix tuning, on the other hand, allows for even more efficiency. Instead of retraining the entire model, it modifies only a small portion of the input by appending task-specific tokens or prefixes to the input data. This makes it possible to adapt a model to new tasks with minimal computational overhead. Together, these techniques are making AI more accessible, enabling researchers and developers to fine-tune large mod

In [8]:
import cv2
import os

def extract_frames(video_path, interval_seconds=3, output_dir="frames"):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Open the video file
    video = cv2.VideoCapture(video_path)
    
    # Get the frames per second (fps) of the video
    fps = video.get(cv2.CAP_PROP_FPS)
    frame_count = 0
    success, frame = video.read()

    while success:
        # Calculate the current timestamp in seconds
        current_time_sec = frame_count / fps
        
        # Extract frame every `interval_seconds`
        if current_time_sec % interval_seconds == 0:
            # Save the frame as an image in the output directory
            frame_name = f"{output_dir}/frame_at_{int(current_time_sec)}_seconds.jpg"
            cv2.imwrite(frame_name, frame)
            print(f"Saved {frame_name}")
        
        # Read the next frame
        success, frame = video.read()
        frame_count += 1

    # Release the video capture object
    video.release()

In [10]:
# Example usage
extract_frames("D:/Work/BerryPie/github streamlit app/Prompt_engineering_app_v2/no push/Zopa - TV - Car finance.mp4", interval_seconds=4, output_dir="output_frames")

Saved output_frames/frame_at_0_seconds.jpg
Saved output_frames/frame_at_4_seconds.jpg
Saved output_frames/frame_at_8_seconds.jpg
Saved output_frames/frame_at_12_seconds.jpg
Saved output_frames/frame_at_16_seconds.jpg
Saved output_frames/frame_at_20_seconds.jpg
Saved output_frames/frame_at_24_seconds.jpg
Saved output_frames/frame_at_28_seconds.jpg


In [28]:
import os
import base64
import json
from groq import Groq

def encode_image(image_path):
    """Encodes an image to a base64 string."""
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        print(f"Error encoding image {image_path}: {e}")
        return None

def bulk_image_ingest(folder_path):
    """Processes all images in a given folder and extracts text from each."""
    image_links_list = []

    # Loop through the folder and collect all image files
    try:
        for filename in os.listdir(folder_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):  # Add more formats as needed
                image_links_list.append(os.path.join(folder_path, filename))
    except Exception as e:
        print(f"Error accessing folder {folder_path}: {e}")
        return []

    # Process each image
    results = []
    client = Groq(api_key=GROQ_API_KEY)
    
    for image in image_links_list:
        # Get the base64 string
        base64_image = encode_image(image)
        if base64_image is None:
            print(base64_image)
            continue  # Skip this image if encoding failed
        
        text_prompt = """
        Your task is to extract the text from the provided image, focus on any small disclaimers or warnings written in small size.
        Ensure that you Provide the extracted text in JSON format, using the following structure:
        {
            "image_content": ""
        }
        
        If no text is presented in the image return this JSON format: 
        {
            "image_content": "No text presented in the image"
        }
        """
        try:
            chat_completion = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": text_prompt},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}",
                                },
                            },
                        ],
                    }
                ],
                model="llama-3.2-11b-vision-preview",
                response_format={"type": "json_object"},
                temperature=0.1,
                max_tokens=500,
                stream=False,
                stop=None,
            )
            print(chat_completion.choices[0].message.content)
            result = json.loads(chat_completion.choices[0].message.content)
            results.append(result["image_content"])

        except Exception as e:
            print(f"Error processing image {image}: {e}")

    return set(results)


In [29]:
# Example usage
results = bulk_image_ingest("D:/Work/BerryPie/github streamlit app/Prompt_engineering_app_v2/no push\output_frames")
print(results)

{
   "image_content": "No text presented in the image"
}
{
   "image_content": "Car finance loan representative APR 13.9% subject to individual circumstances and borrowing history. UK residents only. vehicle criteria and t's and c's apply. Average savings of Zopa customers online vs via dealerships. Allfi Bank of the year 2022. Zopa Bank Limited."
}
{
   "image_content": "Car finance loan representative APR 13.9%, subject to individual circumstances and borrowing history. UK residents only, vehicle criteria and t's and c's apply. Average savings of Zopa customers online vs via dealerships. All-Mi Bank of the year 2022. Zopa Bank Limited."
}
{
   "image_content": "Car finance loan representative APR 13.9%, subject to individual circumstances and borrowing history. UK residents only. vehicle criteria and t's and c's apply. Average savings of Zopa customers online vs via dealerships. All-fi Bank of the year 2022. Zopa Bank Limited."
}
{
   "image_content": "No text presented in the image"

In [30]:
results

{"Car finance loan representative APR 13.9% subject to individual circumstances and borrowing history. UK residents only. vehicle criteria and t's and c's apply. Average savings of Zopa customers online vs via dealerships. Allfi Bank of the year 2022. Zopa Bank Limited.",
 "Car finance loan representative APR 13.9%, subject to individual circumstances and borrowing history. UK residents only, vehicle criteria and t's and c's apply. Average savings of Zopa customers online vs via dealerships. All-Mi Bank of the year 2022. Zopa Bank Limited.",
 "Car finance loan representative APR 13.9%, subject to individual circumstances and borrowing history. UK residents only. vehicle criteria and t's and c's apply. Average savings of Zopa customers online vs via dealerships. All-fi Bank of the year 2022. Zopa Bank Limited.",
 'No text presented in the image',
 'ZOPA'}