In [9]:
import gensim
import gensim.downloader as api
from gensim.models import Word2Vec
import pymilvus
from pymilvus import Collection, connections, CollectionSchema, FieldSchema, DataType, utility
import google.generativeai as genai
import numpy as np
import re
import os

# Step 1: Setting up Milvus Vector Database
connections.connect("default", host="localhost", port="19530")
collection_name = "rag_collection"

# Define a schema for Milvus collection (e.g., 300-dimensional vector)
if not utility.has_collection(collection_name):
    collection_schema = CollectionSchema(fields=[
        FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=300)
    ])
    collection = Collection(name=collection_name, schema=collection_schema)
    # Create an index for the collection
    index_params = {
        "index_type": "IVF_FLAT",
        "metric_type": "L2",
        "params": {"nlist": 1024}
    }
    collection.create_index(field_name="embedding", index_params=index_params)
    collection.load()
else:
    collection = Collection(collection_name)
    if not collection.has_index():
        # Create an index if it does not exist
        index_params = {
            "index_type": "IVF_FLAT",
            "metric_type": "L2",
            "params": {"nlist": 1024}
        }
        collection.create_index(field_name="embedding", index_params=index_params)
    collection.load()

# Step 2: Word2Vec to Convert Text to Vectors
try:
    word2vec_model = api.load("word2vec-google-news-300")
except ValueError as e:
    print("Error loading Word2Vec model:", e)
    raise

def get_sentence_vector(sentence, model):
    # Clean and tokenize the input sentence
    tokens = gensim.utils.simple_preprocess(sentence)
    # Get embeddings for each word and calculate their average
    vector = np.mean([model[word] for word in tokens if word in model], axis=0)
    return vector

# Step 3: Adding Data to Milvus
sample_corpus = ["Artificial Intelligence is transforming technology.",
                 "Deep Learning is a subset of Machine Learning.",
                 "Natural Language Processing is used for text data."]

entities = []
for i, text in enumerate(sample_corpus):
    vector = get_sentence_vector(text, word2vec_model) if 'word2vec_model' in locals() else np.zeros(300)
    entities.append([i, vector])

if len(entities) > 0:
    ids = [entity[0] for entity in entities]
    embeddings = [entity[1] for entity in entities]
    collection.insert([ids, embeddings])
    collection.load()  # Reload collection after inserting data

# Step 4: Retrieval Function

def retrieve_similar_context(query, top_k=3):
    # Convert query to vector
    query_vector = get_sentence_vector(query, word2vec_model) if 'word2vec_model' in locals() else np.zeros(300)
    # Search for top_k similar vectors in Milvus
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    results = collection.search([query_vector], "embedding", param=search_params, limit=top_k)
    # Return the retrieved texts
    return [sample_corpus[result.id] for result in results[0]]

# Step 5: Gemini API to Generate Text

gemini.configure(api_key="AIzaSyCSzNmhnw6aCmv1Vf-GddwQIapK0oSXjE4")
from google.generativeai import GenerativeModel

model : GenerativeModel = genai.GenerativeModel("gemini-1.5-flash")

def generate_answer(query, context):
    prompt = "Context: " + "\n".join(context) + "\n" + "Query: " + query
    response = model.generate_content(prompt)
    return response.text

# Step 6: Putting It All Together
if __name__ == "__main__":
    user_query = "How is AI transforming industries?"
    context = retrieve_similar_context(user_query)
    answer = generate_answer(user_query, context)
    print("Generated Answer:", answer)

# Disconnect Milvus
connections.disconnect("default")

Generated Answer: ## How is AI Transforming Industries?

AI, with its subsets like Deep Learning and Natural Language Processing (NLP), is revolutionizing industries across the board. Here's a breakdown of how:

**1. Automation & Efficiency:**

* **Manufacturing:** Robots powered by AI are automating tasks, increasing production speed, and minimizing human error.
* **Customer Service:** Chatbots and virtual assistants provide 24/7 support, freeing up human agents for more complex tasks.
* **Finance:** Algorithmic trading and fraud detection systems are automating processes, improving accuracy and reducing risk.

**2. Personalized Experiences:**

* **Retail:** AI-powered recommendation engines personalize product suggestions, improving customer satisfaction and sales.
* **Healthcare:** AI analyzes patient data to personalize treatment plans, diagnose diseases earlier, and predict health risks.
* **Entertainment:** Streaming services use AI to tailor content recommendations based on user

In [10]:
with open("README.md", 'w') as f:
    f.write("""##Chatlink \n https://chatgpt.com/share/67050769-62b0-8007-a9b8-20c33be6d364""")