In [52]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
import json
import re
load_dotenv()
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service_Account_key.json"

### Configuring the ChatBot

In [2]:

client = OpenAI(
    api_key=os.getenv("GEMINI_API_KEY"),
    base_url=os.getenv("GEMINI_BASE_URL")
)
model_name=os.getenv("MODEL_NAME")

In [3]:

def get_chatbot_response(client,model_name,messages,temperature=0):
    input_messages = []
    for message in messages:
        input_messages.append({"role": message["role"], "content": message["content"]})

    response = client.chat.completions.create(
        model=model_name,
        messages=input_messages,
        temperature=temperature,
        top_p=0.8,
        max_tokens=2000,
    ).choices[0].message.content
    
    return response

### Sample Usage of getting LLM response

In [4]:

messages = [{'role':'user','content':"What's the capital of Italy? Give History of the capital in 20 words" }]
response = get_chatbot_response(client,model_name,messages)
print(response)

Rome.  Ancient capital of the Roman Empire, became Italy's capital in 1871 after unification.



# Prompt Engineering

### 1. Structured Output

In [5]:
system_prompt = """
You are a helpful assistant that answers questions about capitals of countries.

Your output must be a **valid JSON array** in this format:[
    {
        "country": "Country Name",
        "capital": "Capital Name"
    }
]
You are not allowed to include anything outside this JSON format. If asked a question, respond only in this format. don't include any other information in your response like file type or any other information.
"""
messages = [{'role':'system','content':system_prompt}]
messages.append({'role':'user','content':"What's the capital of USA"})
response = get_chatbot_response(client,model_name,messages)
print(response)

[
    {
        "country": "United States of America",
        "capital": "Washington, D.C."
    }
]



In [6]:
type(response)

str

In [7]:
def response_to_json(response):
    try:
        # Validate and preprocess the response
        if not response:
            raise ValueError("Response is empty.")
        
        # Extract JSON if necessary
        match = re.search(r"\[.*\]", response, re.DOTALL)
        if match:
            json_string = match.group()
        else:
            raise ValueError("No valid JSON array found in the response.")
        
        # Load the JSON
        json_response = json.loads(json_string)
        print(json_response)

    except json.JSONDecodeError as e:
        print("Invalid JSON response:", response)
        raise e
    except ValueError as ve:
        print("Error:", ve)

In [8]:
json_response=response_to_json(response)

[{'country': 'United States of America', 'capital': 'Washington, D.C.'}]


In [9]:
type(json_response)

NoneType

### 2. input structuring


In [10]:
user_prompt = """
Get me the capitals of the following countries:
```
1. Italy
2. France
3. Germany
``
"""
messages = [{'role':'system','content':system_prompt}]
messages.append({'role':'user','content':user_prompt})
response = get_chatbot_response(client,model_name,messages)
print(response)

[
    {
        "country": "Italy",
        "capital": "Rome"
    },
    {
        "country": "France",
        "capital": "Paris"
    },
    {
        "country": "Germany",
        "capital": "Berlin"
    }
]



In [11]:
json_response=response_to_json(response)
print(json_response)

[{'country': 'Italy', 'capital': 'Rome'}, {'country': 'France', 'capital': 'Paris'}, {'country': 'Germany', 'capital': 'Berlin'}]
None


### 3. Give the model time to think (Chain of thought) (used GEMINI PRO FOR THIS,NOT FLASH)
##### 

In [12]:
user_prompt = """
Calculate the result of this equation: 1+3

Your output should be in a structured json format exactly like the one bellow. You are not allowed to write anything other than the json object:
{
    result: The final number resulted from calculating the equation above
}
"""

messages = [{'role':'user','content':user_prompt}]
response = get_chatbot_response(client,model_name,messages)
print(response)

```json
{
  "result": 4
}
```


In [13]:
42349/2*3454+45345*22-45354


74088959.0

In [14]:
user_prompt = """
Calculate the result of this equation: 42349/2*3454+45345*22-45354


Your output should be in a structured json format exactly like the one bellow. You are not allowed to write anything other than the json object:
{
    result: The final number resulted from calculating the equation above
}
"""

messages = [{'role':'user','content':user_prompt}]
response = get_chatbot_response(client,model_name,messages)
print(response)

```json
{
  "result": 78570747
}
```


In [16]:
#ERROR
74088959-78570747

-4481788

#### Modified Prompt for chain of thoughts

In [15]:
user_prompt = """
Calculate the result of this equation: 42349/2*3454+45345*22-45354

Your output should be in a structured json format exactly like the one bellow. You are not allowed to write anything other than the json object:
{
    steps: This is where you solve the equation bit by bit following the BEDMAS order of operations. You need to show your work and calculate each step leading to final result. Feel free to write here in free text. 
    result: The final number resulted from calculating the equation above
}
"""

messages = [{'role':'user','content':user_prompt}]
response = get_chatbot_response(client,model_name,messages)
print(response)

```json
{
  "steps": "Following BEDMAS (Brackets, Exponents, Division/Multiplication, Addition/Subtraction):\n\n1. Division: 42349 / 2 = 21174.5\n2. Multiplication 1: 21174.5 * 3454 = 73081983\n3. Multiplication 2: 45345 * 22 = 997590\n4. Addition: 73081983 + 997590 = 74079573\n5. Subtraction: 74079573 - 45354 = 74034219",
  "result": 74034219
}
```


In [17]:
#ERROR NOW-> REDUCED
74088959-74034219

54740

# RAG - Retrieval Augmented Generation

#### Asking about a subject that the LLM does not know anything about

In [21]:
user_prompt = """
What's new in the product my company launched?
"""

messages = [{'role':'user','content':user_prompt}]
response = get_chatbot_response(client,model_name,messages)
print(response)

Please tell me what product your company launched! I need that information to tell you what's new.  For example, you could say:

"My company launched a new version of our photo editing software, PhotoSpark."

Or:

"My company just released the FizzBuzz 3000, our latest fizzy drink dispenser."



#### Giving Context to the unknown subject

In [22]:

gpx_3000 =""" The TitanX GPX introduces groundbreaking advancements, setting a new standard for high-performance graphic processors. It features a cutting-edge 5nm architecture, delivering unparalleled efficiency and performance, with 20 billion transistors optimized for AI-driven workloads and next-gen gaming. The TitanX GPX is available in two variants: the standard GPX 7500 and the flagship GPX 9500, boasting core clock speeds of 2.2 GHz and 2.5 GHz respectively.

Equipped with the latest RAYGEN 3.0 technology, it offers real-time ray tracing with 40% faster rendering speeds compared to its predecessor. The enhanced TensorCore 5.0 delivers up to 2x the AI performance, enabling features like super-resolution upscaling and AI-assisted image rendering with minimal latency. For content creators, the TitanX GPX introduces ProRender Studio, a hardware-accelerated toolset for seamless 8K video editing and 3D rendering.

Cooling technology has also been reimagined, featuring a patented triple-fan VaporFlow system that maintains peak performance under heavy loads while operating at ultra-low noise levels. The TitanX GPX supports PCIe Gen 5.0 for up to 60% faster data transfer and is equipped with 24GB of GDDR7 VRAM, ensuring smooth performance even in the most demanding scenarios.

Gamers and professionals will appreciate the new AdaptiveSync AI, which dynamically adjusts refresh rates and power consumption for maximum efficiency and an uninterrupted experience. Additionally, the TitanX GPX includes advanced security features like SecureCompute, ensuring sensitive data remains protected during processing.

With a starting price of $899 for the GPX 7500 and $1,299 for the GPX 9500, the TitanX GPX delivers unmatched power and versatility for gamers, creators, and professionals alike. It is expected to launch globally in Q2 of 2024, ushering in a new era of computational excellence."""


In [23]:
user_prompt = f"""
{gpx_3000}

What's new in my product?
"""

messages = [{'role':'user','content':user_prompt}]
response = get_chatbot_response(client,model_name,messages)
print(response)

The TitanX GPX introduces several groundbreaking features:

* **New Architecture:**  A cutting-edge 5nm architecture with 20 billion transistors optimized for AI and next-gen gaming.
* **Two Variants:** GPX 7500 (2.2 GHz) and GPX 9500 (2.5 GHz).
* **RAYGEN 3.0:** Real-time ray tracing with 40% faster rendering speeds.
* **TensorCore 5.0:** Up to 2x AI performance for features like super-resolution and AI-assisted image rendering.
* **ProRender Studio:**  A hardware-accelerated toolset for 8K video editing and 3D rendering.
* **VaporFlow Cooling:** Patented triple-fan system for peak performance and low noise.
* **PCIe Gen 5.0 Support:** Up to 60% faster data transfer.
* **24GB GDDR7 VRAM:**  For smooth performance in demanding scenarios.
* **AdaptiveSync AI:** Dynamically adjusts refresh rates and power consumption.
* **SecureCompute:** Advanced security features for data protection.



In [24]:
#### Automatically extract context data from database

In [30]:
rtx_9000 =""" The VegaCore G7 redefines affordability in high-performance graphics processing, delivering cutting-edge technology at a price point that’s accessible to everyone. Built on a refined 4nm architecture, the VegaCore G7 is optimized for efficiency and performance, packing 16 billion transistors into a compact design. With a core clock speed of 2.4 GHz and 18GB of ultra-fast GDDR7 memory, it effortlessly handles modern gaming, AI workloads, and creative applications.

The VegaCore G7 features HyperRay 2.0 technology, offering real-time ray tracing with precision lighting and shadows, rivaling GPUs at twice its cost. Its AI-boosted NeuroCore engine enhances performance in machine learning tasks, upscaling, and real-time image generation. For gamers, the VegaCore G7 introduces SmartSync Max, a dynamic refresh rate system that eliminates screen tearing while conserving power.

Designed for creators on a budget, it supports seamless 4K video editing and rendering with ProRender Lite, delivering professional-grade results. The innovative IceFlow Dual-Cooling system keeps temperatures low during intense workloads, ensuring sustained performance with minimal fan noise. 

The VegaCore G7 also embraces future-proofing with PCIe Gen 5.0 and full support for next-gen APIs like DirectX 12 Ultimate and Vulkan. Despite its advanced features, it remains highly energy-efficient, consuming 30% less power than competing models. 

Priced at just $499, the VegaCore G7 brings high-end graphics capabilities to gamers, creators, and professionals without breaking the bank. Launching in Q1 of 2024, the VegaCore G7 is poised to set a new benchmark for affordable innovation in the GPU market.

"""

In [68]:
data = [gpx_3000,rtx_9000]

In [69]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings


In [70]:
embedding_client = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

In [71]:
def get_embedding(embedding_client,text_input):
    # output = embedding_client.embeddings.create(input = text_input,model=model_name)
    
    # embedings = []
    # for embedding_object in output.data:
    #     embedings.append(embedding_object.embedding)
    text_embeddings = embedding_client.embed_query(text_input)

    return text_embeddings

In [72]:
user_prompt = """What's new in the product my company launched Which is cheaper and has a lot of new technology?"""
user_prompt_embeddings = get_embedding(embedding_client,user_prompt)
print(user_prompt_embeddings)

[0.025023242458701134, -0.029425138607621193, -0.02370302565395832, 0.02087012492120266, 0.03005886636674404, -0.017354816198349, 0.040655795484781265, 0.015004133805632591, -0.03316664695739746, -0.0006572362035512924, -0.010689714923501015, 0.02045612595975399, 0.030596641823649406, 0.028133098036050797, 0.00045240126200951636, -0.05062270537018776, 0.0379929393529892, 0.010903503745794296, -0.08561782538890839, -0.04043088108301163, 0.008252979256212711, 0.017564963549375534, -0.06262002140283585, -0.01615682989358902, -0.01953445002436638, -0.00429777754470706, 0.02592288702726364, 0.009991437196731567, -0.010552148334681988, -0.0050605591386556625, 0.0011955726658925414, 0.024166421964764595, -0.015059259720146656, 1.9046781289944192e-06, 0.016891827806830406, 0.01892942190170288, -0.0514502115547657, 0.06808768212795258, -0.0005848486325703561, 0.002134028123691678, -0.01915089413523674, -0.031328149139881134, -0.07271043956279755, 0.01125387568026781, 0.04785545915365219, -0.022

In [86]:
len(user_prompt_embeddings)

768

In [79]:
data_embeddings = [get_embedding(embedding_client,x) for x in data]
print(data_embeddings)

[[0.006872884929180145, 0.0048769558779895306, -0.02769157849252224, 0.004376297816634178, 0.0299284178763628, -0.028672056272625923, 0.047561176121234894, -0.00886872224509716, 0.008629356510937214, -0.06792403757572174, -0.020681461319327354, 0.04515189677476883, 0.023318469524383545, 0.054365020245313644, -0.047586020082235336, 0.01730046607553959, 0.011260353028774261, -0.04899723455309868, -0.09349747747182846, 0.031237086281180382, -0.000667278713081032, -0.009611167013645172, -0.08710430562496185, 0.006718757096678019, 0.005019885953515768, 0.0018020081333816051, -0.005059306975454092, 0.00694841705262661, -0.00746766896918416, -0.023207640275359154, 0.012950435280799866, -0.02921873703598976, 0.008197754621505737, -0.015785759314894676, -0.029037920758128166, -0.016614127904176712, 0.015741651877760887, 0.017013147473335266, 0.10134094953536987, -0.04961330071091652, -0.04164060577750206, 0.012987587600946426, -0.019380154088139534, 0.030820991843938828, 0.015942061319947243, 0

In [91]:
print(len(data_embeddings))
len(data_embeddings[0])

2


768

In [81]:

data_similaraty_scores = cosine_similarity([user_prompt_embeddings], data_embeddings)
print(data_similaraty_scores)

[[0.46729566 0.48160282]]


In [82]:
closest_entry_index=data_similaraty_scores.argmax()
closest_entry_index


np.int64(1)

In [83]:

data[closest_entry_index]

' The VegaCore G7 redefines affordability in high-performance graphics processing, delivering cutting-edge technology at a price point that’s accessible to everyone. Built on a refined 4nm architecture, the VegaCore G7 is optimized for efficiency and performance, packing 16 billion transistors into a compact design. With a core clock speed of 2.4 GHz and 18GB of ultra-fast GDDR7 memory, it effortlessly handles modern gaming, AI workloads, and creative applications.\n\nThe VegaCore G7 features HyperRay 2.0 technology, offering real-time ray tracing with precision lighting and shadows, rivaling GPUs at twice its cost. Its AI-boosted NeuroCore engine enhances performance in machine learning tasks, upscaling, and real-time image generation. For gamers, the VegaCore G7 introduces SmartSync Max, a dynamic refresh rate system that eliminates screen tearing while conserving power.\n\nDesigned for creators on a budget, it supports seamless 4K video editing and rendering with ProRender Lite, del

In [84]:
user_prompt_with_data = f"""
{data[closest_entry_index]}

{user_prompt}
""" 

In [85]:
messages = [{'role':'user','content':user_prompt_with_data}]
response = get_chatbot_response(client,model_name,messages)
print(response)

Your company launched the VegaCore G7 GPU, and its key selling points as a cheaper, technologically advanced product are:

* **Refined 4nm Architecture:**  Smaller, more efficient, and more powerful due to packing 16 billion transistors.
* **High-Speed Components:** 2.4 GHz core clock, 18GB GDDR7 memory for smooth handling of demanding tasks.
* **HyperRay 2.0:** Real-time ray tracing comparable to much more expensive GPUs.
* **NeuroCore AI Engine:** Boosts performance in AI tasks, upscaling, and image generation.
* **SmartSync Max:** Dynamic refresh rate eliminates screen tearing and saves power.
* **ProRender Lite:** Enables professional 4K video editing and rendering for creators.
* **IceFlow Dual-Cooling:** Efficient cooling system for quiet and sustained performance.
* **Future-Proofing:** PCIe Gen 5.0 support and compatibility with next-gen APIs (DirectX 12 Ultimate, Vulkan).
* **High Energy Efficiency:** Consumes 30% less power than competitors.
* **Affordable Price:**  $499 make