In [1]:
import torch
from transformers import AutoModelForQuestionAnswering ,AutoTokenizer , pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = 'deepset/roberta-base-squad2'
model1 = "all-mpnet-base-v2"


In [3]:
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
encoder = SentenceTransformer(model1)

In [4]:
from langchain.document_loaders import TextLoader

loader = TextLoader(r"C:\Users\smdar\OneDrive\Desktop\chat.txt")
text = loader.load()

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators = ["\n"," ",","],
    chunk_size = 100,
    chunk_overlap = 0
)
chunks = text_splitter.split_documents(text)

In [6]:
import pandas as pd
try:
    df = pd.read_csv(r"C:\Users\smdar\Desktop\QA.csv",on_bad_lines='skip')
except pd.errors.ParserError as e:
    print(f"Error parsing file: {e}")

In [7]:
df.head()

Unnamed: 0,Question,Answer
0,What is machine learning?,Machine learning is a subset of artificial int...
1,What are the main types of machine learning?,The main types of machine learning are supervi...
2,Can you explain supervised learning?,Supervised learning is a type of machine learn...
3,What is unsupervised learning?,Unsupervised learning is a type of machine lea...
4,Explain reinforcement learning.,Reinforcement learning is a type of machine le...


In [8]:
# embeddings = [encoder.encode(str(ans)) for ans in df.Question]

In [9]:
# import pickle
# with open("embeddings_bert.pkl","wb") as f:
#     pickle.dump(embeddings,f)

In [10]:
import pickle
with open("embeddings_bert.pkl","rb") as f:
    vector_index = pickle.load(f)

In [11]:
def get_similar_chunks(query):
    similarity_score = []
    encoded_query = encoder.encode(query)

    for i in vector_index:
        similarity_score.append(cosine_similarity([i], [encoded_query])[0, 0])

    n = similarity_score.copy()
    n.sort(reverse=True)

    ind1 = similarity_score.index(n[0])
    # ind2 = similarity_score.index(n[1])
    lst =  df.Answer[ind1] #+ chunks[ind2].page_content
    return lst


In [12]:
get_similar_chunks("what is machine learning")

'Machine learning is a subset of artificial intelligence that involves the development of algorithms and models that enable computers to learn from data and make predictions or decisions without being explicitly programmed.'

In [13]:
question = [{'question':'who is the prime minister of india ?','context':'''beyond theoretical learning by providing students with ample opportunities to hone their skills. State-of-the-art laboratories equipped with modern technology allow students to experiment, design, and build prototypes, bridging the gap between theory and practice.
The college fosters a culture of innovation and entrepreneurship through initiatives like the Incubation Centre, which provides support and guidance to student startups. This hands-on experience equips graduates with the necessary skills to thrive in the dynamic corporate world.SECE recognizes that holistic development is crucial for success. The college provides a vibrant campus life filled with extracurricular activities, events, and clubs. Students can pursue their passions in sports, music, arts, and literature through various clubs and societies.
The college also prioritizes physical well-being with well-maintained sports facilities and a focus on healthy living. This holistic approach fosters a positive and supportive environment where students can develop not only their academic skills but also their social and personal skills.SECE's commitment to academic excellence, industry preparedness, and holistic development has earned it a stellar reputation. The college has consistently been ranked among the top engineering colleges in India by various magazines and ranking agencies, a testament to its unwavering dedication to quality education.Prime minister of india is narendra modi'''}]
inputs = tokenizer(question[0]['question'],question[0]['context'],return_tensors="pt")
output = model(**inputs)

In [14]:
def get_answer(question):
    context = get_similar_chunks(question)
    print(context)
    inputs = tokenizer(question,context,return_tensors="pt")
    output = model(**inputs)
    answer_start_id  = torch.argmax(output.start_logits)
    answer_end_id = torch.argmax(output.end_logits)
    answer_tokens = inputs.input_ids[0,answer_start_id:answer_end_id+1]
    answer = tokenizer.decode(answer_tokens)
    if "<s>" in answer:
        answer = "Sorry i'm Unable to answer the question..."
    return answer

In [15]:
get_answer("what is what are convolutional neural networks")

Convolutional Neural Networks (CNNs) are a type of neural network designed for processing grid-like data, such as images and video. They use convolutional layers to automatically learn hierarchical representations, making them particularly effective in computer vision tasks like image classification and object detection.


'CNNs) are a type of neural network designed for processing grid-like data'

In [16]:
from flask import Flask , request , jsonify

app = Flask(__name__)

@app.route("/bert",methods=["GET"])
def return_response():
    d = {}
    inputtext = str(request.args["query"])
    
    answer = get_answer(inputtext)
    print(answer)
    d['output'] = answer
    return jsonify(d)

app.run(port = 4000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:4000
Press CTRL+C to quit


Convolutional Neural Networks (CNNs) are a type of neural network designed for processing grid-like data, such as images and video. They use convolutional layers to automatically learn hierarchical representations, making them particularly effective in computer vision tasks like image classification and object detection.


127.0.0.1 - - [27/Dec/2023 15:53:33] "GET /bert?query=what%20are%20cnn HTTP/1.1" 200 -


Convolutional Neural Networks
