<a href="https://colab.research.google.com/github/Jayameena832/Query-Engine-for-English-language./blob/main/Query_detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from qdrant_client import QdrantClient
import tensorflow as tf
from flask import Flask, jsonify, request

# Load the BigBasket's Products List data
products_df = pd.read_csv('/content/bigBasketProducts.csv')

# Preprocess the data
products_df['description_lower'] = products_df['description'].str.lower()
products_df['description_tokenized'] = products_df['description_lower'].str.split()

In [22]:
# Check for missing values
missing_values = products_df['description_tokenized'].isnull().sum()
if missing_values > 0:
    # Handle missing values (fill or remove)
    products_df['description_tokenized'].fillna('', inplace=True)  # Filling with an empty string for illustration

# Ensure 'description' column contains only strings
products_df['description_tokenized'] = products_df['description_tokenized'].astype(str)


In [24]:
# Vectorize the product descriptions using Word2Vec
word2vec_model = Word2Vec(products_df['description_tokenized'], vector_size=128, min_count=5, window=5)
product_vectors = []
for description_tokenized in products_df['description_tokenized']:
    product_vector = np.zeros(128)
    for token in description_tokenized:
        try:
            product_vector += word2vec_model.wv[token]
        except KeyError:
            pass
    product_vector /= len(description_tokenized)
    product_vectors.append(product_vector)

  product_vector /= len(description_tokenized)


In [28]:
from qdrant_client import AsyncQdrantClient, models

In [None]:
# Store the vectorized product descriptions in Qdrant
qdrant_client = Client()
qdrant_client.create_collection('products', vectors=product_vectors)

In [None]:
# Implement a Language Model using TensorFlow
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 128),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128)),
    tf.keras.layers.Dense(128, activation='tanh'),
    tf.keras.layers.Dense(1)
])
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(products_df['description_tokenized'].tolist(), products_df['price'].tolist(), epochs=10)

In [None]:
# Wrap the LLM as an API using Flask
app = Flask(__name__)

@app.route('/query', methods=['POST'])
def query_products():
    query = request.json['query']
    # Parse the natural language query into meaningful instructions for the LLM
    # Use the LLM model to generate a contextual response
    response = model.predict(query)
    return jsonify({'response': response})

if __name__ == '__main__':
    app.run(debug=True)