<a href="https://colab.research.google.com/github/MrPhantom2325/30_DecafDictators/blob/main/Final_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import flask
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import requests
import re
import json
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from PyPDF2 import PdfReader
import torch
import io
import base64
from pyngrok import ngrok

class BloodTestAnalyzer:
    def __init__(self, pdf_path, api_key, cx):
        # Google Custom Search API credentials
        self.api_key = api_key
        self.cx = cx

        # Load QA model
        self.qa_model = pipeline(
            "question-answering",
            model="deepset/roberta-base-squad2"
        )

        # Load generative model
        try:
            self.medical_model = AutoModelForCausalLM.from_pretrained("microsoft/biogpt")
            self.medical_tokenizer = AutoTokenizer.from_pretrained("microsoft/biogpt")
        except:
            self.medical_model = AutoModelForCausalLM.from_pretrained("gpt2")
            self.medical_tokenizer = AutoTokenizer.from_pretrained("gpt2")

        # Process PDF
        self.pdf_path = pdf_path
        self.pdf_text = self.extract_text_from_pdf()
        self.parameters = self.extract_parameters_and_values()
        self.ranges = self.extract_parameter_ranges()

    def extract_text_from_pdf(self):
        """Extract text from PDF file"""
        try:
            reader = PdfReader(self.pdf_path)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
            return text
        except Exception as e:
            print(f"Error extracting PDF text: {e}")
            return ""

    def extract_parameters_and_values(self):
        """Extract parameters and their values from PDF text"""
        # More flexible parameter extraction
        pattern = r'(\w+(?:\s+\w+)?)\s+(\d+\.?\d*)\s*([a-zA-Z/]+)?'
        matches = re.findall(pattern, self.pdf_text)

        parameters = []
        for match in matches:
            param, value, unit = match
            try:
                numeric_value = float(value)
                parameters.append({
                    'name': param.strip(),
                    'value': numeric_value,
                    'unit': unit or ''
                })
            except ValueError:
                continue

        return parameters

    def extract_parameter_ranges(self):
        """Dynamically extract reference ranges from PDF text"""
        ranges = {}

        # Look for range patterns in the text
        range_patterns = [
            r'(\w+(?:\s+\w+)?)\s*:\s*(\d+\.?\d*)\s*-\s*(\d+\.?\d*)',  # "Parameter: low - high"
            r'Normal\s+Range\s*:\s*(\w+(?:\s+\w+)?)\s*(\d+\.?\d*)\s*-\s*(\d+\.?\d*)'  # "Normal Range: Parameter low - high"
        ]

        for pattern in range_patterns:
            matches = re.findall(pattern, self.pdf_text, re.IGNORECASE)
            for match in matches:
                try:
                    # Handle different match structures
                    if len(match) == 3:
                        param, low, high = match
                    else:
                        param, low, high = match[1], match[2], match[3]

                    param = param.strip().upper()
                    ranges[param] = {
                        'low': float(low),
                        'high': float(high)
                    }
                except (ValueError, IndexError):
                    continue

        # Fallback to predefined ranges if no ranges found
        if not ranges:
            ranges = {
                'HAEMOGLOBIN': {'low': 12, 'high': 15},
                'RBC COUNT': {'low': 3.8, 'high': 4.8},
            }

        return ranges

    def google_search(self, query):
        """
        Search Google using the Custom Search JSON API.
        """
        url = f"https://www.googleapis.com/customsearch/v1?q={query}&key={self.api_key}&cx={self.cx}"
        try:
            response = requests.get(url).json()
            results = [item.get('snippet', '') for item in response.get('items', [])]
            return results[:3]  # Return top 3 snippets
        except Exception as e:
            return [f"Error fetching data from Google: {e}"]

    def generate_medical_insight(self, parameter, status, value):
        """
        Generate detailed medical insights with Google Search for additional context
        """
        try:
            # Construct a specific query about the parameter and its health implications
            query = f"Health implications of {parameter} being {status} normal range (value {value})"

            # Perform Google Search
            search_results = self.google_search(query)

            # Combine search results into a comprehensive insight
            google_insights = " ".join(search_results)

            # Generate a base medical insight
            base_insight = (
                f"Your {parameter} is {status} the normal range. "
                f"Current value: {value}. "
            )

            # Analyze potential implications
            if status == "below":
                implication = (
                    f"Low {parameter} levels may indicate potential health concerns such as "
                    "nutrient deficiency, anemia, or underlying medical conditions. "
                )
            elif status == "above":
                implication = (
                    f"High {parameter} levels could suggest potential health issues like "
                    "inflammation, infection, or metabolic disorders. "
                )
            else:
                implication = "Your parameter is within the normal range, which is a good sign. "

            # Combine insights
            full_insight = base_insight + implication

            # Append Google search insights
            if google_insights:
                full_insight += f"\n\nAdditional Insights: {google_insights}"

            return full_insight

        except Exception as e:
            return f"Could not generate medical insight: {str(e)}"

    def analyze_parameters(self):
        """Comprehensive analysis of blood test parameters"""
        analysis_results = {}

        for param in self.parameters:
            name = param['name'].upper()
            value = param['value']

            # Check if parameter exists in ranges
            if name in self.ranges:
                range_info = self.ranges[name]

                # Determine status
                if value < range_info['low']:
                    status = "below"
                elif value > range_info['high']:
                    status = "above"
                else:
                    status = "within"

                # Generate medical insight with detailed analysis
                insight = self.generate_medical_insight(name, status, value)

                # Create analysis entry
                analysis_results[name] = {
                    'value': value,
                    'status': status,
                    'range': f"{range_info['low']}-{range_info['high']}",
                    'insight': insight
                }

        return analysis_results

    def interactive_chatbot(self):
        """Enhanced interactive medical chatbot with comprehensive PDF querying"""
        print("\n🩺 Medical Report Chatbot 🩺")
        print("Ask questions about your blood test or health. Type 'quit' to exit.")

        while True:
            # Get user input
            user_input = input("\n> Your Question: ").lower()

            # Exit condition
            if user_input in ['quit', 'exit', 'bye']:
                print("Thank you for using the Medical Report Chatbot. Stay healthy!")
                break

            try:
                # Improved PDF text search
                # Add more flexible matching
                pdf_matches = re.findall(r'\b' + re.escape(user_input) + r'\b', self.pdf_text, re.IGNORECASE)

                if pdf_matches:
                    # If direct match found in PDF
                    context_window = 100  # characters before and after the match
                    detailed_context = []

                    for match in pdf_matches:
                        start = max(0, self.pdf_text.find(match) - context_window)
                        end = min(len(self.pdf_text), self.pdf_text.find(match) + context_window)
                        context = self.pdf_text[start:end]
                        detailed_context.append(context.strip())

                    print("\n🩺 PDF Report Insights:")
                    for context in detailed_context:
                        print(f"- {context}")

                # QA model fallback
                qa_response = self.qa_model(
                    question=user_input,
                    context=self.pdf_text
                )

                if qa_response['answer'] and qa_response['score'] > 0.4:
                    print("\n🤖 Direct Answer:")
                    print(f"{qa_response['answer']} (Confidence: {qa_response['score']:.2f})")

                # If no good PDF answer, try Google Search
                if not pdf_matches and (not qa_response['answer'] or qa_response['score'] < 0.4):
                    google_results = self.google_search(user_input)

                    print("\n🌐 Additional Insights:")
                    for i, result in enumerate(google_results, 1):
                        print(f"{i}. {result}")

            except Exception as e:
                print(f"Error processing your question: {e}")

    def display_full_report(self):
        """Display comprehensive blood test report"""
        print("\n🩸 Comprehensive Blood Test Report 🩸")

        # Analyze parameters
        analysis = self.analyze_parameters()

        # Display detailed analysis
        for param, details in analysis.items():
            print(f"\n{param}:")
            print(f"  Value: {details['value']}")
            print(f"  Status: {details['status']} normal range")
            print(f"  Reference Range: {details['range']}")
            print(f"  Medical Insight: {details['insight']}")







app = Flask(__name__)
CORS(app)

# Global variable to store the analyzer
global_analyzer = None

@app.route('/upload', methods=['POST'])
def upload_pdf():
    global global_analyzer

    try:
        # Get base64 encoded PDF
        pdf_base64 = request.json.get('pdf', '')

        # Decode base64 PDF
        pdf_bytes = base64.b64decode(pdf_base64)

        # Create a file-like object
        pdf_file = io.BytesIO(pdf_bytes)

        # Extract text from PDF
        reader = PdfReader(pdf_file)
        pdf_text = ""
        for page in reader.pages:
            pdf_text += page.extract_text()

        # Initialize analyzer
        api_key = "AIzaSyC12bLtgjDllE8h4ew8KxKkqHLCMWeKNkU"
        cx = "c3d4dc32c9faa4b9c"
        global_analyzer = BloodTestAnalyzer(pdf_text, api_key, cx)

        # Generate initial report
        analysis = global_analyzer.analyze_parameters()

        return jsonify({
            'message': 'PDF processed successfully',
            'report': analysis
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/chat', methods=['POST'])
def chat():
    global global_analyzer

    if global_analyzer is None:
        return jsonify({'error': 'No PDF uploaded yet'}), 400

    user_input = request.json.get('message', '')

    try:
        # Perform PDF text search
        pdf_matches = re.findall(r'\b' + re.escape(user_input) + r'\b', global_analyzer.pdf_text, re.IGNORECASE)

        response = {
            'pdf_matches': [],
            'qa_response': None,
            'google_results': []
        }

        if pdf_matches:
            # Context extraction
            context_window = 100
            detailed_context = []

            for match in pdf_matches:
                start = max(0, global_analyzer.pdf_text.find(match) - context_window)
                end = min(len(global_analyzer.pdf_text), global_analyzer.pdf_text.find(match) + context_window)
                context = global_analyzer.pdf_text[start:end]
                detailed_context.append(context.strip())

            response['pdf_matches'] = detailed_context

        # QA model fallback
        qa_response = global_analyzer.qa_model(
            question=user_input,
            context=global_analyzer.pdf_text
        )

        if qa_response['answer'] and qa_response['score'] > 0.4:
            response['qa_response'] = {
                'answer': qa_response['answer'],
                'confidence': qa_response['score']
            }

        # Google Search fallback
        if not pdf_matches and (not qa_response['answer'] or qa_response['score'] < 0.4):
            google_results = global_analyzer.google_search(user_input)
            response['google_results'] = google_results

        return jsonify(response)

    except Exception as e:
        return jsonify({'error': str(e)}), 500


ngrok.set_auth_token('2pSIzr1MNcqoBh1MV6DLE6eKNip_6ZoPPkhQqzAutrP3G3iT4')
# Function to start ngrok tunnel
def start_ngrok():
    # Open an ngrok tunnel to the Flask app
    public_url = ngrok.connect(5000)
    print(f"Public URL: {public_url}")

# Main execution
def main():
    # Start ngrok tunnel in a separate thread
    threading.Thread(target=start_ngrok, daemon=True).start()

    # Run Flask app
    app.run(debug=True,port=6000)

if __name__ == '__main__':
    main()

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:6000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


Public URL: NgrokTunnel: "https://2f8c-34-16-123-191.ngrok-free.app" -> "http://localhost:5000"


SystemExit: 1