In [35]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define the URLs
urls = {
    "Glioma Symptoms": "https://www.mayoclinic.org/diseases-conditions/glioma/symptoms-causes/syc-20350251",
    "Glioma Diagnosis": "https://www.mayoclinic.org/diseases-conditions/glioma/diagnosis-treatment/drc-20350255",
    "Meningioma Symptoms": "https://www.mayoclinic.org/diseases-conditions/meningioma/symptoms-causes/syc-20355643",
    "Meningioma Diagnosis": "https://www.mayoclinic.org/diseases-conditions/meningioma/diagnosis-treatment/drc-20355648",
    "Pituitary Tumor Symptoms": "https://www.mayoclinic.org/diseases-conditions/pituitary-tumors/symptoms-causes/syc-20350548",
    "Pituitary Tumor Diagnosis": "https://www.mayoclinic.org/diseases-conditions/pituitary-tumors/diagnosis-treatment/drc-20350553"
}

# Initialize a list to hold the data
data = []

# Loop through the URLs and scrape data
for title, url in urls.items():
    print(f"Scraping {title} from {url}")

    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Initialize content text
    content_text = []

    # Based on the HTML structure, we want to target content div
    content_div = soup.find('div', class_='content')

    if content_div:
        # Get all h2 headers, paragraphs, and unordered lists
        elements = content_div.find_all(['h2', 'h3', 'p', 'ul'])

        for element in elements:
            # If it's a heading, add it with some formatting
            if element.name in ['h2', 'h3']:
                content_text.append(f"\n## {element.get_text().strip()} ##\n")

            # If it's a paragraph, add its text
            elif element.name == 'p':
                para_text = element.get_text().strip()
                if para_text:  # Only add non-empty paragraphs
                    content_text.append(para_text)

            # If it's an unordered list, extract all list items
            elif element.name == 'ul':
                list_items = element.find_all('li')
                for item in list_items:
                    item_text = item.get_text().strip()
                    if item_text:  # Only add non-empty list items
                        content_text.append(f"• {item_text}")

    # If we found content, join it all together
    if content_text:
        content = ' '.join(content_text)
    else:
        # If the content div wasn't found or had no relevant content, try another approach
        all_text = []

        # Try to get sections
        sections = soup.find_all('section')
        for section in sections:
            section_text = section.get_text().strip()
            if section_text:
                all_text.append(section_text)

        # If we found sections, join them
        if all_text:
            content = ' '.join(all_text)
        else:
            content = "Content not found"

    # Append the data
    data.append({
        "Title": title,
        "URL": url,
        "Content": content
    })
    print(f"Completed scraping {title}")

# Create a DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('tumor_information.csv', index=False)
print("Data has been scraped and saved to 'tumor_information.csv'")

Scraping Glioma Symptoms from https://www.mayoclinic.org/diseases-conditions/glioma/symptoms-causes/syc-20350251
Completed scraping Glioma Symptoms
Scraping Glioma Diagnosis from https://www.mayoclinic.org/diseases-conditions/glioma/diagnosis-treatment/drc-20350255
Completed scraping Glioma Diagnosis
Scraping Meningioma Symptoms from https://www.mayoclinic.org/diseases-conditions/meningioma/symptoms-causes/syc-20355643
Completed scraping Meningioma Symptoms
Scraping Meningioma Diagnosis from https://www.mayoclinic.org/diseases-conditions/meningioma/diagnosis-treatment/drc-20355648
Completed scraping Meningioma Diagnosis
Scraping Pituitary Tumor Symptoms from https://www.mayoclinic.org/diseases-conditions/pituitary-tumors/symptoms-causes/syc-20350548
Completed scraping Pituitary Tumor Symptoms
Scraping Pituitary Tumor Diagnosis from https://www.mayoclinic.org/diseases-conditions/pituitary-tumors/diagnosis-treatment/drc-20350553
Completed scraping Pituitary Tumor Diagnosis
Data has been 

In [36]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('../tumor_information.csv')

# Display the first few rows to understand its structure
print(df)


                       Title  \
0            Glioma Symptoms   
1           Glioma Diagnosis   
2        Meningioma Symptoms   
3       Meningioma Diagnosis   
4   Pituitary Tumor Symptoms   
5  Pituitary Tumor Diagnosis   

                                                 URL  \
0  https://www.mayoclinic.org/diseases-conditions...   
1  https://www.mayoclinic.org/diseases-conditions...   
2  https://www.mayoclinic.org/diseases-conditions...   
3  https://www.mayoclinic.org/diseases-conditions...   
4  https://www.mayoclinic.org/diseases-conditions...   
5  https://www.mayoclinic.org/diseases-conditions...   

                                             Content  
0  \n## Overview ##\n \n## Glioma ##\n \n## Gliom...  
1  \n## Diagnosis ##\n \n## Brain tumor MRI ##\n ...  
2  \n## Overview ##\n \n## Meninges ##\n \n## Men...  
3  \n## Diagnosis ##\n \n## Meningioma ##\n \n## ...  
4  \n## Overview ##\n \n## Pituitary tumor ##\n \...  
5  \n## Diagnosis ##\n Pituitary tumors often are...

In [37]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('../tumor_information.csv')

# Combine 'title' and 'content' columns into a new column
df['Combined'] = df['Title'] + "  " + df['Content']

# Display the updated DataFrame
print(df[['Title', 'Content', 'Combined']])

df["Combined"].to_csv("combined_information.csv", index=False, sep="\n", header=False)


                       Title  \
0            Glioma Symptoms   
1           Glioma Diagnosis   
2        Meningioma Symptoms   
3       Meningioma Diagnosis   
4   Pituitary Tumor Symptoms   
5  Pituitary Tumor Diagnosis   

                                             Content  \
0  \n## Overview ##\n \n## Glioma ##\n \n## Gliom...   
1  \n## Diagnosis ##\n \n## Brain tumor MRI ##\n ...   
2  \n## Overview ##\n \n## Meninges ##\n \n## Men...   
3  \n## Diagnosis ##\n \n## Meningioma ##\n \n## ...   
4  \n## Overview ##\n \n## Pituitary tumor ##\n \...   
5  \n## Diagnosis ##\n Pituitary tumors often are...   

                                            Combined  
0  Glioma Symptoms  \n## Overview ##\n \n## Gliom...  
1  Glioma Diagnosis  \n## Diagnosis ##\n \n## Bra...  
2  Meningioma Symptoms  \n## Overview ##\n \n## M...  
3  Meningioma Diagnosis  \n## Diagnosis ##\n \n##...  
4  Pituitary Tumor Symptoms  \n## Overview ##\n \...  
5  Pituitary Tumor Diagnosis  \n## Diagnosis ##\n...

In [58]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader

raw_documents = TextLoader("../combined_information.csv").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
documents = text_splitter.split_documents(raw_documents)

In [59]:
documents[0]

Document(metadata={'source': 'combined_information.csv'}, page_content='"Glioma Symptoms  \n## Overview ##\n \n## Glioma ##\n \n## Glioma ##')

In [60]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2"

db_info = Chroma.from_documents(documents, embedding=HuggingFaceEmbeddings(model_name=embeddings_model_name))

In [63]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

template = """
You are a medical information specialist providing detailed, comprehensive answers about brain tumors.

Use the following pieces of context to answer the question. The context contains important medical information
about symptoms, diagnoses, and treatments for different types of brain tumors.

Context: {context}

Question: {question}

Detailed Answer:
"""
custom_prompt = PromptTemplate.from_template(template)

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
# Ensure API Key is loaded
if not api_key:
    raise ValueError("GEMINI_API_KEY not found in environment variables. Please set it in your .env file.")

# Initialize Gemini-1.5-Flash Model with increased max_output_tokens
gemini_chat_model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.2,
    max_output_tokens=1024,  # Adjust this value as needed for longer responses
    api_key=api_key
)

# Create Retrieval-based Q&A Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=gemini_chat_model,
    retriever=db_info.as_retriever(search_kwargs={"k": 5}),  # Retrieve more documents
    return_source_documents=False,  # No need to return source documents
    chain_type="stuff",  # Use stuff chain to include all context
    chain_type_kwargs={
        "prompt": custom_prompt,
    }
)

# Function to Query Gemini AI with Retrieved Info
def query_gemini(question):
    # Add a system instruction to further enhance the response quality
    enhanced_question = question

    # Get response
    response = qa_chain.invoke(enhanced_question)

    # Just return the result directly
    return response["result"]

# Example Query
user_question = "What are the symptoms of Glioma?"
answer = query_gemini(user_question)

print("Answer:", answer)


Answer: Gliomas present a wide range of symptoms, the specifics of which depend on several factors: the type of glioma, its size, its location within the brain, and its growth rate.  There is no single, universally experienced symptom profile.  However, some common signs and symptoms include:

* **Headache:** This is a frequently reported symptom, often characterized by its intensity, particularly being most severe in the mornings.

* **Nausea and Vomiting:** These gastrointestinal symptoms can be associated with increased intracranial pressure.

* **Cognitive Decline:** This encompasses a broad spectrum of neurological impairments, including problems with thinking, understanding information, memory loss, and changes in personality or increased irritability.

* **Vision Problems:**  These can manifest as blurred vision, double vision, or other visual disturbances.


It is crucial to remember that the presence of these symptoms does not automatically indicate a glioma.  Many other condi

In [64]:
# Example Query
user_question = "What is the chemotherapy?"
answer = query_gemini(user_question)

print("Answer:", answer)

Answer: Chemotherapy, in the context of brain tumor treatment, is the use of drugs to kill cancer cells.  It's frequently used in conjunction with radiation therapy, meaning the two treatments are often administered simultaneously or in close succession.  The specific chemotherapy regimen (the types and doses of drugs used) will vary greatly depending on the type and grade of brain tumor, the patient's overall health, and other factors.  It's crucial to understand that chemotherapy is not a standalone treatment for brain tumors in most cases; it's typically part of a broader treatment plan that may also include surgery, radiation, and/or targeted therapy.  The goal of combining chemotherapy with radiation is often to enhance the effectiveness of both treatments and improve patient outcomes.
