In [None]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
Successfully installed openai-0.28.0


In [None]:
pip install transformers sentence-transformers scikit-learn

Collecting sentence-transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.1.1


In [None]:
from sentence_transformers import SentenceTransformer
import json
import re
import numpy as np
import os

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def preprocess_text(text):
    chunks = re.split(r"(?=\d{5})", text.strip())
    clean_chunks = [re.sub(r'\n+', ' ', chunk.strip()) for chunk in chunks if chunk.strip()]
    return clean_chunks

def load_and_preprocess(filepath):
    with open(filepath, 'r') as file:
        text = file.read()
    chunks = preprocess_text(text)
    return chunks

def convert_to_vectors(chunks):
    vectors = model.encode(chunks)
    return vectors

def save_vectors_to_json(chunks, vectors, output_json):
    data = [{'chunk': chunk, 'vector': vector.tolist()} for chunk, vector in zip(chunks, vectors)]
    with open(output_json, 'w') as json_file:
        json.dump(data, json_file)

chunks = load_and_preprocess('/content/Engineering.txt')
vectors = convert_to_vectors(chunks)

save_vectors_to_json(chunks, vectors, 'college_data_vectors.json')


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def load_vectors_from_json(input_json):
    with open(input_json, 'r') as json_file:
        data = json.load(json_file)
    chunks = [item['chunk'] for item in data]
    vectors = [np.array(item['vector']) for item in data]
    return chunks, np.array(vectors)

# Find the most similar chunk
def find_similar_chunk(query, chunks, vectors, top_n=5):
    # Convert the query to a vector
    query_vector = model.encode([query])[0]

    # Calculate cosine similarity between the query vector and all text vectors
    similarities = cosine_similarity([query_vector], vectors)[0]

    # Get the indices of the top N most similar chunks
    top_indices = similarities.argsort()[-top_n:][::-1]

    # Return the most similar chunks
    similar_chunks = [(chunks[i], similarities[i]) for i in top_indices]
    return similar_chunks

chunks, vectors = load_vectors_from_json('college_data_vectors.json')

In [None]:
os.environ["OPENAI_API_KEY"] = "api-key"

In [None]:
import openai

def generate_text(prompt):
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo',
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return response['choices'][0]['message']['content']

while True:
  query = input()
  if query not in ['exit','Exit']:
    similar_chunks = find_similar_chunk(query, chunks, vectors)
    print(f"Query: {query}\n")

    prompt = str(similar_chunks)+"using this data answer this query"+query
    generated_text = generate_text(prompt)
    print(generated_text)

Query: what is the college id of KONERU LAKSHMAIAH EDUCATION FOUNDATION  UNIVERSITY (K L COLLEGE OF ENGINEERING)

The college id of KONERU LAKSHMAIAH EDUCATION FOUNDATION UNIVERSITY (K L COLLEGE OF ENGINEERING) is 31645.
Query: state of the RAMACHANDRA COLLEGE OF ENGINEERING

RAMACHANDRA COLLEGE OF ENGINEERING is located in the state of Andhra Pradesh.
Query: Cource provided by BONAM VENKATACHALAMAYYA INSTITUTE OF  TECHNOLOGY & SCIENCE

The courses provided by BONAM VENKATACHALAMAYYA INSTITUTE OF TECHNOLOGY & SCIENCE are:
1. COMPUTER SCIENCE AND TECHNOLOGY
2. ELECTRONICS & TECHNOLOGY
3. MECHANICAL ENGINEERING TECHNOLOGY
4. ELECTRICAL AND TECHNOLOGY
Query: total number of Cources offered by BONAM VENKATACHALAMAYYA INSTITUTE OF  TECHNOLOGY & SCIENCE

There are a total of 5 courses offered by BONAM VENKATACHALAMAYYA INSTITUTE OF TECHNOLOGY & SCIENCE. These courses are:
1. COMPUTER SCIENCE AND TECHNOLOGY
2. MECHANICAL ENGINEERING TECHNOLOGY
3. ELECTRONICS & TECHNOLOGY
4. ELECTRICAL AND TEC