In [None]:
import os
import pandas as pd

# Folder path containing .json files
folder_path = '/content/drive/MyDrive/microlabs_usa'

# Get the list of .json files in the folder without their extensions
json_files = [os.path.splitext(file)[0] for file in os.listdir(folder_path) if file.endswith('.json')]

# Create a DataFrame with the file names
df = pd.DataFrame(json_files, columns=['product_name'])

# Save the CSV to a writable location
output_path = '/content/product.csv'
df.to_csv(output_path, index=False)

print(f"CSV file saved at: {output_path}")


CSV file saved at: /content/product.csv


In [None]:
df=pd.read_csv('/content/product.csv')

In [None]:
df

Unnamed: 0,product_name
0,Amlodipine Besylate and Olmesartan Medoxomil T...
1,"Amoxicillin and Clavulanate Potassium Tablets,..."
2,Amoxicillin and Clavulanate Potassium for Oral...
3,Acetazolamide Extended-Release Capsules
4,Aspirin and Extended-Release Dipyridamole Caps...
5,Diclofenac Sodium and Misoprostol Delayed-Rele...
6,Dalfampridine Extended-Release Tablets
7,Dorzolamide HCl and Timolol Maleate Ophthalmic...
8,Chlordiazepoxide Hydrochloride and Clidinium B...
9,"Clindamycin Hydrochloride Capsules, USP"


In [None]:
from googleapiclient.discovery import build

def search_internet_with_api(prompt: str) -> str:
    """
    Searches the internet using Google Custom Search API.
    """
    api_key = "AIzaSyBjRaheeBpCVLHX0axjpuFRvteKnMznTo4"  # Replace with your API key
    cse_id = "07bebcb0eb0b04488"    # Replace with your CSE ID
    service = build("customsearch", "v1", developerKey=api_key)
    result = service.cse().list(q=prompt, cx=cse_id).execute()
    if "items" in result:
        snippets = [item["snippet"] for item in result["items"][:3]]  # Top 3 results
        return " ".join(snippets)
    return "No internet results found."

# Update DataFrame with Internet Results
for product in df['product_name']:
    df.loc[df['product_name'] == product, 'internet_results'] = search_internet_with_api(product)


In [None]:
df['internet_results']

Unnamed: 0,internet_results
0,"Jan 31, 2024 ... Amlodipine and olmesartan is ..."
1,"Augmentin, Augmentin ES-600, amoxicillin and c..."
2,Amoxicillin and Clavulanate Potassium for Oral...
3,Acetazolamide extended-release capsules provid...
4,"Aug 15, 2023 ... Aspirin and Extended-Release ..."
5,Diclofenac sodium and misoprostol delayed-rele...
6,DALFAMPRIDINE (dal FAM pri deen) is used to he...
7,"Jan 31, 2024 ... Dorzolamide and timolol ophth..."
8,Chlordiazepoxide Hydrochloride and Clidinium B...
9,Clindamycin hydrochloride capsules contain cli...


In [None]:
!pip install google-api-python-client



In [None]:
import os
import requests
from bs4 import BeautifulSoup
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.llms import LlamaCpp
from langchain_experimental.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

# Function to Split and Chunk Data
def split_and_chunk(path, text_splitter):
    """Returns a list of documents split key-wise and then length-wise."""
    all_documents = []
    for root, folders, files in os.walk(path):
        for file in files:
            file_path = os.path.join(root, file)
            with open(file_path) as user_file:
                file_contents = user_file.read()
            json_data = json.loads(file_contents)
            for key, val in json_data.items():
                json_chunks = text_splitter.create_documents([val])
                for chunk in json_chunks:
                    chunk.metadata["product_name"] = file
                    chunk.metadata["key"] = key
                    all_documents.append(chunk)
    return all_documents

# Function to Create Vector Store
def create_vector_store(documents, embedding_model="sentence-transformers/all-MiniLM-L6-v2"):
    """Creates a vector store with HuggingFace embeddings."""
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
    vector_store = FAISS.from_documents(documents, embeddings)
    vector_store.save_local("faiss_index")
    return vector_store

# Internet Search Function
def search_internet(query: str) -> str:
    """Searches the internet for additional context."""
    search_url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(search_url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract snippets from the search results
    snippets = soup.find_all('span', {'class': 'aCOpRe'})
    results = [snippet.get_text() for snippet in snippets[:3]]
    return " ".join(results) if results else "No internet results found."

# Tools Implementation
def create_tools(vector_store):
    """Creates Q&A, Summarizer, and Recommender tools."""
    # Q&A Tool
    def question_answer_tool(query: str) -> str:
        retriever = vector_store.as_retriever()
        local_results = retriever.get_relevant_documents(query)
        internet_context = search_internet(query)
        combined_context = " ".join([doc.page_content for doc in local_results]) + " " + internet_context
        llm = LlamaCpp(model_path="/path/to/llama-3.bin", temperature=0)
        return llm(f"Context: {combined_context}\nAnswer: {query}")

    q_and_a_tool = Tool(
        name="Q&A with Internet Search",
        func=question_answer_tool,
        description="Answers questions using the database and internet context."
    )

    # Summarizer Tool
    def summarizer_tool(text: str) -> str:
        llm = LlamaCpp(model_path="/path/to/llama-3.bin", temperature=0)
        return llm(f"Summarize the following text in three sentences: {text}")

    summarizer = Tool(
        name="Summarizer",
        func=summarizer_tool,
        description="Summarizes the given text."
    )

    # Recommender Tool
    def recommender_tool(condition: str) -> str:
        recommendations = {
            "pain": ["Paracetamol", "Ibuprofen"],
            "fever": ["Naproxen", "Aspirin"],
            "glaucoma": ["Latanoprost", "Timolol"]
        }
        return ", ".join(recommendations.get(condition.lower(), ["No recommendations found."]))

    recommender = Tool(
        name="Recommender",
        func=recommender_tool,
        description="Provides recommendations based on conditions."
    )

    return [q_and_a_tool, summarizer, recommender]

# Agent Initialization
def create_agent(tools):
    """Creates an agent that orchestrates the tools."""
    llm = LlamaCpp(model_path="/path/to/llama-3.bin", temperature=0)
    agent = initialize_agent(
        tools=tools,
        llm=llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
    )
    return agent

# Main Function
def main():
    # Define text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=5000,
        chunk_overlap=200,
        length_function=len,
        is_separator_regex=False,
    )

    # Dataset path
    path = "/content/drive/MyDrive/microlabs_usa"

    # Vector store creation
    print("Creating vector store...")
    all_documents = split_and_chunk(path, text_splitter)
    vector_store = create_vector_store(all_documents)

    # Tools and agent
    print("Initializing tools and agent...")
    tools = create_tools(vector_store)
    agent = create_agent(tools)

    print("Welcome to Pharma Knowledge Assistant!")
    while True:
        query = input("\nEnter your question or type 'exit' to quit: ").strip()
        if query.lower() == "exit":
            print("Exiting... Stay healthy!")
            break
        try:
            response = agent.run(query)
            print(f"\nAnswer: {response}")
        except Exception as e:
            print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()


ImportError: cannot import name 'RecursiveCharacterTextSplitter' from 'langchain_experimental.text_splitter' (/usr/local/lib/python3.10/dist-packages/langchain_experimental/text_splitter.py)

In [None]:
!pip install langchain llama-cpp-python openai requests bs4 faiss-cpu


Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.2.tar.gz (65.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.0/65.0 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m 

In [None]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.8 (from langchain_community)
  Downloading langchain-0.3.9-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.21 (from langchain_community)
  Downloading langchain_core-0.3.21-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.23.1-py3-none-any.whl.metadata (7.5 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-jso

In [None]:
!pip install langchain_experimental


Collecting langchain_experimental
  Downloading langchain_experimental-0.3.3-py3-none-any.whl.metadata (1.7 kB)
Downloading langchain_experimental-0.3.3-py3-none-any.whl (208 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.0/209.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain_experimental
Successfully installed langchain_experimental-0.3.3
