In [None]:
import spacy

# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

def preprocess_text(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return ' '.join(tokens)

# Example usage
text = "The quick brown fox jumps over the lazy dog."
preprocessed_text = preprocess_text(text)
print(preprocessed_text)


In [None]:
from sentence_transformers import SentenceTransformer

# Load pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

def generate_embedding(text):
    embedding = model.encode(text)
    return embedding

# Example usage
embedding = generate_embedding(preprocessed_text)
print(embedding)


In [None]:
import faiss
import numpy as np

# Example embeddings (you'll have a large list in practice)
embeddings = np.array([generate_embedding(preprocess_text("Text of chapter 1")),
                       generate_embedding(preprocess_text("Text of chapter 2"))])

# Create FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

# Save index for later use
faiss.write_index(index, "embeddings.index")


In [None]:
def search(query, index, top_k=5):
    query_embedding = generate_embedding(preprocess_text(query))
    D, I = index.search(np.array([query_embedding]), top_k)
    return I[0]  # Returns the indices of the top_k most similar embeddings

# Load index
index = faiss.read_index("embeddings.index")

# Example query
query = "Find resources for brown fox."
results = search(query, index)
print(results)


In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/search', methods=['GET'])
def search_endpoint():
    query = request.args.get('query')
    results = search(query, index)
    return jsonify({"results": results.tolist()})

if __name__ == '__main__':
    app.run(debug=True)
