# DocAgent

## Text generator

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index import ServiceContext, StorageContext, load_index_from_storage, set_global_service_context
from llama_index.llms.openai import OpenAI


PERSIST_DIR = "./storage2"
if not os.path.exists(PERSIST_DIR):
    llm = OpenAI(model_name="gpt-3.5-turbo", token=os.environ["OPENAI_API_KEY"])
    service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm, embed_model="local")
    set_global_service_context(service_context)
    storage_context = StorageContext.from_defaults()
    docs = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(docs, service_context=service_context, storage_context=storage_context)
    index.storage_context.persist(persist_dir=PERSIST_DIR)

else:
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

In [3]:
query_engine = index.as_query_engine()
docagent = "DocAgent:"
exit_conditions = (":q", "quit", "exit", "bye")

In [4]:
def text_generator(query):
    answer = query_engine.query(query)
    if answer:
        print("Doctor:",query)
        print(docagent,end=" ")
        print(answer.response.strip())
        print()
    else:
        print(docagent, "Sorry I can't help you with that. Try rephrasing your question.")
        print()

## Image Searcher

In [9]:
from transformers import CLIPProcessor, CLIPModel
import torch

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")


In [10]:
from PIL import Image

def image_searcher(query):
    folder_path = 'Images'
    image_embeddings = {}
    image_paths = []


    for filename in os.listdir(folder_path):

        if (filename.endswith('.jpg') or filename.endswith('.jpeg')):
            print(filename)
            img_path = os.path.join(folder_path, filename)
            size_in_bytes = os.path.getsize(img_path)

            # Convert bytes to kilobytes (1 KB = 1024 bytes)
            size_in_kb = size_in_bytes / 1024
            if size_in_kb<1000:
                img_path = os.path.join(folder_path, filename)
                image_paths.append(img_path)

    # Load images from your folder
    images = [Image.open(img_path) for img_path in image_paths]

    # Preprocess images and text using the model's processor
    inputs = processor(text=query, images=images, return_tensors="pt")

    outputs = model(**inputs)
    image_features = outputs.image_embeds
    text_features = outputs.text_embeds

    # Calculate cosine similarities between text and image embeddings
    similarities = torch.cosine_similarity(text_features, image_features)

    # Get indices of top-k similar images
    top_k = similarities.topk(k=2)  # Retrieve top 5 similar images
    similar_image_indices = top_k.indices.tolist()

    # Load and display retrieved images
    similar_images = [images[index] for index in similar_image_indices]
    for image in similar_images:
        display(image)

In [7]:
import re

def identify_prompt_type(prompt):
  """
  Identifies whether the given prompt is a text prompt or an image prompt.

  Args:
    prompt: The user-provided prompt.

  Returns:
    A string indicating the prompt type: "text" or "image".
  """
  # Check for presence of keywords indicative of text retrieval
  text_keywords = ["what", "who", "why", "where", "when", "how"]
  text_match = any(keyword in prompt.lower() for keyword in text_keywords)

  # Check for file extensions or image-related words
  image_extensions = [".jpg", ".jpeg", ".png", ".gif"]
  image_words = ["image", "picture", "photo", "visual","report"]
  image_match = any(ext in prompt for ext in image_extensions) or \
               any(word in prompt.lower() for word in image_words)

  # Determine prompt type based on matches
  if image_match and not text_match:
    return "image"
  elif text_match and not image_match:
    return "text"
  else:
    return "text"

In [11]:
print("Please start entering your questions below. To quit, enter one of these keywords: ", exit_conditions)
while(True):
    query = input("> ")
    if(query in exit_conditions):
        print(docagent, "Thank you Doc, Happy to assist you!")
        break

    else:
        prompt_type = identify_prompt_type(query)

        if prompt_type=="text":
            text_generator(query)
        elif prompt_type=="image":
            image_searcher(query)

Please start entering your questions below. To quit, enter one of these keywords:  (':q', 'quit', 'exit', 'bye')


ValueError: shapes (1536,) and (384,) not aligned: 1536 (dim 0) != 384 (dim 0)