# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import re
import json
import csv
import psycopg2
from sentence_transformers import SentenceTransformer
from groq import Groq

  from .autonotebook import tqdm as notebook_tqdm


# Creating Embedding of Questions
    SentenceTransformer('multi-qa-mpnet-base-dot-v1')

In [None]:
model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')

In [None]:
def create_embedding(csv_file):

    # Load the CSV file
    data = pd.read_csv(csv_file)

    # Generate embeddings for the questions
    data['embedding'] = data['question'].apply(lambda x: model.encode(x).tolist())

    print("Embedding Created")

    # Return the dataFrame with embedding
    return data


# Storing Embedding in tha Database
    PgVector

In [None]:
def get_db_connection():
    # Connect to your PostgreSQL database
    conn = psycopg2.connect(
        dbname="chatbot",
        user="postgres",
        password="1234",
        host="localhost",  # Or the IP address of your PostgreSQL server
        port="5432"        # Default port for PostgreSQL
    )
    return conn


In [None]:
def insert_data(data):
    conn = get_db_connection()
    cur = conn.cursor()

# Insert data into the PostgreSQL table
    for index, row in data.iterrows():
        question = row['question']
        answer = row['answer']
        embedding = row['embedding']  # This is a list of floats


        insert_query = """
    INSERT INTO questions (question, answer, embedding)
    VALUES (%s, %s, %s::vector)
    """

        cur.execute(insert_query, (question, answer, embedding))

# Commit changes and close the connection
    conn.commit()
    cur.close()
    conn.close()

    print("Data inserted successfully!")


In [None]:
data1 = create_embedding("CuratedDataSet.csv")
data2 =  create_embedding("ShravanDataSet.csv")
data3 = create_embedding("Extra.csv")

Embedding Created
Embedding Created
Embedding Created


In [None]:
insert_data(data1)
insert_data(data2)
insert_data(data3)

Data inserted successfully!
Data inserted successfully!
Data inserted successfully!


# Function for answer retrival

In [None]:
def top_answer(query, model, conn, top_k=10):
    # Generate embedding for the input query
    query_embedding = model.encode(query, convert_to_tensor=False)

    # Convert query_embedding to string format suitable for PostgreSQL
    embedding_str = '[' + ','.join(map(str, query_embedding)) + ']'

    cursor = conn.cursor()

    # Perform similarity search using pgvector's <=> operator for cosine similarity
    cursor.execute("""
    SELECT id, question, answer, embedding
    FROM questions
    ORDER BY embedding <=> %s
    LIMIT %s
    """, (embedding_str, top_k))

    rows = cursor.fetchall()

    retrieved_responses= []
    for row in rows:
        retrieved_responses.append(row[2])  # Answer column

    cursor.close()

    return retrieved_responses


# Lamma Answers Generations
    Using Groq API

In [None]:
grok_api_key = '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'

In [None]:
# Get this from Groq Documentation
def GroqChat(question):
    client = Groq(
        api_key=grok_api_key,

    )

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": question,
            }
        ],
            model = "llama-3.1-70b-versatile"
    )

    cleaned_json_string = chat_completion.choices[0].message.content

    json_str = re.sub(r'}\s*{', '}, {', cleaned_json_string)
    return json_str

In [None]:
def generate_answer(query, retrieved_responses):
    result = []
    if not retrieved_responses:
        return "We are unable to response for this query."

    context = "\n".join(retrieved_responses)
    result.append(context)

    prompt = f"Answer the following query based solely on the provided context. Do not include any information from outside the context, and do not mention that a context is provided. If the context does not address the query, respond with ' We're currently in the process of collecting data to provide a comprehensive answer. Thank you for your patience as we work on this. ' If the query includes greetings like 'Good morning' or 'Good evening', respond accordingly. Query: {query} Context: {context}"

    groq_answer = GroqChat(prompt)
    result.append(groq_answer)

    return result


# Driver Code

In [None]:
if __name__ == "__main__":

    # model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')

    # Establish DB connection
    conn = get_db_connection()

    while True:
        query = input("Enter your query here: ")

        if query == '':
            continue

        if query.lower() == "stop":
            break

        retrieved_responses = list(set(top_answer(query, model, conn)))
        generated_answer = generate_answer(query, retrieved_responses)
        print('You: ',query)
        print("Answer: ", generated_answer[1])

        print()

    conn.close()  # Close DB connection when done

You:  highest package till now ?
Llama Answer:  We're currently in the process of collecting data to provide a comprehensive answer. Thank you for your patience as we work on this.

You:  highets placement ?
Llama Answer:  We're currently in the process of collecting data to provide a comprehensive answer on highest placement figures.Placement statistics will be available once the first batch of students graduates. However, students are currently gaining experience through internships at notable startups with competitive stipends.

You:  highest package till now from siatre university ?
Llama Answer:  We're currently in the process of collecting data to provide a comprehensive answer. Thank you for your patience as we work on this.

