# Smart Research Assistant

This notebook provides an interactive interface to upload PDF files, get summaries, and ask questions about the content using Gemini AI.

In [None]:
# Install required packages
!pip install -q google-generativeai python-dotenv PyPDF2 ipywidgets

In [10]:
import os
import json
from typing import List, Dict, Any
import PyPDF2
from difflib import SequenceMatcher
import signal
import google.generativeai as genai
from dotenv import load_dotenv
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [11]:
# Load environment variables
load_dotenv()
#Configure Gemini API using Kaggle Secrets
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

if not GOOGLE_API_KEY:
    raise ValueError("Please set your GOOGLE_API_KEY in the .env file")
    
try:
    genai.configure(api_key=GOOGLE_API_KEY, transport= 'rest')
    logger.info("Successfully configured Gemini API")
except Exception as e:
    logger.error(f"Failed to configure Gemini API: {str(e)}")
    raise

INFO:__main__:Successfully configured Gemini API


In [12]:
class ResearchAssistant:
    def __init__(self):
        try:
            models = genai.list_models()
            logger.info(f"Available models: {[m.name for m in models]}")

            self.model = genai.GenerativeModel('gemini-1.5-pro')
            self.current_pdf = None
            self.current_text = ""
            logger.info("ResearchAssistant initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize ResearchAssistant: {str(e)}")
            raise
        
    def process_pdf(self, pdf_path: str) -> str:
        """Extract text from PDF file."""
        try:
            logger.info(f"Processing PDF: {pdf_path}")
            reader = PyPDF2.PdfReader(pdf_path)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
            
            if not text.strip():
                raise ValueError("No text could be extracted from the PDF")
            
            self.current_pdf = pdf_path
            self.current_text = text
            logger.info(f"Successfully processed PDF")
            return text
        except Exception as e:
            logger.error(f"Error processing PDF: {str(e)}")
            return f"Error processing PDF: {str(e)}"
    
    def get_summary(self, text: str) -> Dict[str, Any]:
        """Generate a structured summary of the document."""
        try:
            logger.info("Generating summary...")
        
            # Use first 4000 characters for summary to stay within token limits
            text_for_summary = text[:4000]
        
            prompt = f"""
            Please provide a structured summary of the following research document.
            Format your response as a JSON object with the following structure:
            {{
                "title": "Main topic or title",
                "key_points": ["point1", "point2", "point3"],
                "summary": "Brief overall summary",
                "topics": ["topic1", "topic2", "topic3"]
            }}
        
            Document text:
            {text_for_summary}
            """
        
            response = self.model.generate_content(prompt)
            # Extract the text from the response
            response_text = response.text
        
            # Try to find JSON content in the response
            try:
                # Find the first { and last } to extract JSON
                start = response_text.find('{')
                end = response_text.rfind('}') + 1
                if start != -1 and end != -1:
                    json_str = response_text[start:end]
                    result = json.loads(json_str)
                else:
                    # If no JSON found, create a structured response from the text
                    result = {
                        "title": "Document Summary",
                        "key_points": [point.strip() for point in response_text.split('\n') if point.strip()],
                        "summary": response_text,
                        "topics": []
                    }
            except json.JSONDecodeError:
                # If JSON parsing fails, create a structured response
                result = {
                    "title": "Document Summary",
                    "key_points": [point.strip() for point in response_text.split('\n') if point.strip()],
                    "summary": response_text,
                    "topics": []
                }
            
            logger.info("Summary generated successfully")
            return result
        except Exception as e:
            logger.error(f"Failed to generate summary: {str(e)}")
            return {
                "error": f"Failed to generate summary: {str(e)}",
                "raw_response": ""
            }
    
    def answer_question(self, question: str) -> Dict[str, Any]:
        """Answer questions about the document."""
        if not self.current_text:
            return {"error": "No documents have been processed yet"}
        
        try:
            logger.info(f"Processing question: {question}")
            
            # Use first 4000 characters for context to stay within token limits
            context = self.current_text[:4000]
            
            prompt = f"""
            Use the following context to answer the question. If you don't know the answer, say so.
            
            Context: {context}
            
            Question: {question}
            
            Answer:
            """
            
            response = self.model.generate_content(prompt)
            logger.info("Question answered successfully")
            return {
                "answer": response.text,
                "sources": [context]
            }
        except Exception as e:
            logger.error(f"Failed to answer question: {str(e)}")
            return {"error": f"Failed to answer question: {str(e)}"}

In [13]:
# Initialize the research assistant
try:
    assistant = ResearchAssistant()
    logger.info("ResearchAssistant instance created successfully")
except Exception as e:
    logger.error(f"Failed to create ResearchAssistant instance: {str(e)}")
    raise

INFO:__main__:Available models: ['models/chat-bison-001', 'models/text-bison-001', 'models/embedding-gecko-001', 'models/gemini-1.0-pro-vision-latest', 'models/gemini-pro-vision', 'models/gemini-1.5-pro-latest', 'models/gemini-1.5-pro-001', 'models/gemini-1.5-pro-002', 'models/gemini-1.5-pro', 'models/gemini-1.5-flash-latest', 'models/gemini-1.5-flash-001', 'models/gemini-1.5-flash-001-tuning', 'models/gemini-1.5-flash', 'models/gemini-1.5-flash-002', 'models/gemini-1.5-flash-8b', 'models/gemini-1.5-flash-8b-001', 'models/gemini-1.5-flash-8b-latest', 'models/gemini-1.5-flash-8b-exp-0827', 'models/gemini-1.5-flash-8b-exp-0924', 'models/gemini-2.5-pro-exp-03-25', 'models/gemini-2.5-pro-preview-03-25', 'models/gemini-2.0-flash-exp', 'models/gemini-2.0-flash', 'models/gemini-2.0-flash-001', 'models/gemini-2.0-flash-exp-image-generation', 'models/gemini-2.0-flash-lite-001', 'models/gemini-2.0-flash-lite', 'models/gemini-2.0-flash-lite-preview-02-05', 'models/gemini-2.0-flash-lite-preview', 

In [14]:
# Create widgets with improved styling
upload_button = widgets.FileUpload(
    accept='.pdf',
    multiple=False,
    description='Upload PDF',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='auto')
)

summary_button = widgets.Button(
    description='Get Summary',
    disabled=True,
    button_style='success',
    layout=widgets.Layout(width='auto')
)

question_input = widgets.Text(
    value='',
    placeholder='Type your question here',
    description='Question:',
    disabled=True,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%')
)

ask_button = widgets.Button(
    description='Ask Question',
    disabled=True,
    button_style='primary',
    layout=widgets.Layout(width='auto')
)

output = widgets.Output()

# Add progress indicator
progress = widgets.FloatProgress(
    value=0,
    min=0,
    max=1,
    description='Progress:',
    bar_style='info',
    style={'bar_color': '#4285f4'},
    layout=widgets.Layout(width='100%')
)

# Status message
status = widgets.HTML(
    value='<div style="color: #666; font-style: italic;">Ready to process documents...</div>',
    layout=widgets.Layout(width='100%')
)

In [15]:
def handle_upload(change):
    with output:
        clear_output()
        status.value = '<div style="color: #4285f4;">Processing PDF...</div>'
        progress.value = 0.2
        
        try:
            # Save the uploaded file
            uploaded_file = list(upload_button.value.values())[0]
            content = uploaded_file['content']
            filename = uploaded_file['metadata']['name']
            
            with open(filename, 'wb') as f:
                f.write(content)
            
            progress.value = 0.4
            
            # Process the PDF
            text = assistant.process_pdf(filename)
            
            if text.startswith('Error'):
                status.value = f'<div style="color: #ea4335;">{text}</div>'
                progress.value = 0
                return
            
            status.value = f'<div style="color: #34a853;">Successfully processed {filename}</div>'
            summary_button.disabled = False
            question_input.disabled = False
            ask_button.disabled = False
            progress.value = 1
        except Exception as e:
            status.value = f'<div style="color: #ea4335;">Error: {str(e)}</div>'
            progress.value = 0
            logger.error(f"Error in handle_upload: {str(e)}")

In [16]:
def handle_summary(button):
    with output:
        clear_output()
        status.value = '<div style="color: #4285f4;">Generating summary...</div>'
        progress.value = 0.2
        
        try:
            # Get text from the current PDF
            text = assistant.current_text
            progress.value = 0.5
            
            summary = assistant.get_summary(text)
            progress.value = 0.8
            
            if "error" in summary:
                status.value = f'<div style="color: #ea4335;">Error: {summary["error"]}</div>'
                progress.value = 0
            else:
                # Format the summary nicely
                html_output = f"""
                <div style="margin: 20px;">
                    <h3 style="color: #4285f4;">Document Summary</h3>
                    <h4 style="color: #34a853;">{summary['title']}</h4>
                    
                    <h5 style="color: #4285f4;">Key Points:</h5>
                    <ul>
                """
                
                for point in summary['key_points']:
                    html_output += f"<li>{point}</li>"
                
                html_output += f"""
                    </ul>
                    
                    <h5 style="color: #4285f4;">Summary:</h5>
                    <p>{summary['summary']}</p>
                    
                    <h5 style="color: #4285f4;">Topics:</h5>
                    <ul>
                """
                
                for topic in summary['topics']:
                    html_output += f"<li>{topic}</li>"
                
                html_output += """
                    </ul>
                </div>
                """
                
                display(HTML(html_output))
                status.value = '<div style="color: #34a853;">Summary generated successfully</div>'
                progress.value = 1
        except Exception as e:
            status.value = f'<div style="color: #ea4335;">Error: {str(e)}</div>'
            progress.value = 0
            logger.error(f"Error in handle_summary: {str(e)}")

In [17]:
def handle_question(button):
    with output:
        clear_output()
        status.value = '<div style="color: #4285f4;">Processing question...</div>'
        progress.value = 0.2
        
        try:
            question = question_input.value
            if not question:
                status.value = '<div style="color: #ea4335;">Please enter a question</div>'
                progress.value = 0
                return
            
            progress.value = 0.5
            answer = assistant.answer_question(question)
            progress.value = 0.8
            
            if "error" in answer:
                status.value = f'<div style="color: #ea4335;">Error: {answer["error"]}</div>'
                progress.value = 0
            else:
                # Format the answer nicely
                html_output = f"""
                <div style="margin: 20px;">
                    <h3 style="color: #4285f4;">Answer</h3>
                    <p>{answer['answer']}</p>
                    
                    <h4 style="color: #4285f4;">Context Used</h4>
                    <div style="margin: 10px 0; padding: 10px; background-color: #f8f9fa; border-radius: 5px;">
                        <p>{answer['sources'][0]}</p>
                    </div>
                </div>
                """
                
                display(HTML(html_output))
                status.value = '<div style="color: #34a853;">Question answered successfully</div>'
                progress.value = 1
        except Exception as e:
            status.value = f'<div style="color: #ea4335;">Error: {str(e)}</div>'
            progress.value = 0
            logger.error(f"Error in handle_question: {str(e)}")

In [18]:
# Set up event handlers
upload_button.observe(handle_upload, names='value')
summary_button.on_click(handle_summary)
ask_button.on_click(handle_question)

# Create a styled container for the interface
container = widgets.VBox([
    widgets.HTML('<h2 style="color: #4285f4;">Smart Research Assistant</h2>'),
    widgets.HTML('<h3 style="color: #34a853;">Upload a PDF Document</h3>'),
    upload_button,
    widgets.HTML('<h3 style="color: #34a853;">Get Document Summary</h3>'),
    summary_button,
    widgets.HTML('<h3 style="color: #34a853;">Ask Questions</h3>'),
    widgets.HBox([question_input, ask_button]),
    progress,
    status,
    widgets.HTML('<h3 style="color: #34a853;">Results</h3>'),
    output
], layout=widgets.Layout(
    width='100%',
    padding='20px',
    border='2px solid #4285f4',
    border_radius='10px'
))
# Display the interface
display(container)

VBox(children=(HTML(value='<h2 style="color: #4285f4;">Smart Research Assistant</h2>'), HTML(value='<h3 style=…