In [30]:
import pytesseract
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from datetime import datetime

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Function to extract text from image
def extract_text_from_image(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image)
    return text

# Function to preprocess lab data
def preprocess_lab_data(text, age, cardiac_history, physical_activity, kidney_disease, eye_disease):
    text = text.lower().replace('\n', ' ').strip()
    
    lab_data = {
        'Age': age, 
        'CardiacHistory': cardiac_history, 
        'PhysicalActivity': physical_activity, 
        'KidneyDisease': 1 if kidney_disease.lower() == 'yes' else 0, 
        'EyeDisease': 1 if eye_disease.lower() == 'yes' else 0
    }
    
    if 'hba1c: ' in text:
        try:
            lab_data['HbA1c'] = float(text.split('hba1c: ')[1].split('%')[0].strip())
        except ValueError:
            lab_data['HbA1c'] = None
    else:
        lab_data['HbA1c'] = None
    
    if 'blood pressure: ' in text:
        try:
            bp_values = text.split('blood pressure: ')[1].split('/')[0:2]
            lab_data['BloodPressureSystolic'] = int(bp_values[0].strip())
            lab_data['BloodPressureDiastolic'] = int(bp_values[1].split(' ')[0].strip())
        except ValueError:
            lab_data['BloodPressureSystolic'] = None
            lab_data['BloodPressureDiastolic'] = None
    else:
        lab_data['BloodPressureSystolic'] = None
        lab_data['BloodPressureDiastolic'] = None
    
    if 'cholesterol: ' in text:
        try:
            lab_data['Cholesterol'] = int(text.split('cholesterol: ')[1].split(' ')[0].strip())
        except ValueError:
            lab_data['Cholesterol'] = None
    else:
        lab_data['Cholesterol'] = None
    
    if 'bmi: ' in text:
        try:
            lab_data['BMI'] = float(text.split('bmi: ')[1].split(' ')[0].strip())
        except ValueError:
            lab_data['BMI'] = None
    else:
        lab_data['BMI'] = None
    
    return lab_data

# Load dataset
df = pd.read_csv('health_dataset_synthetic.csv')

# Convert categorical variables to numerical
df['KidneyDisease'] = df['KidneyDisease'].apply(lambda x: 1 if x.lower() == 'yes' else 0)
df['EyeDisease'] = df['EyeDisease'].apply(lambda x: 1 if x.lower() == 'yes' else 0)

# Extract features and target variable
X = df[['Age', 'CardiacHistory', 'PhysicalActivity', 'HbA1c', 'BloodPressureSystolic', 'BloodPressureDiastolic', 'Cholesterol', 'BMI', 'KidneyDisease', 'EyeDisease']]
y = df['Disease']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
#print(classification_report(y_test, y_pred))

# Function to analyze patient data
def analyze_patient_data(lab_data, model):
    prediction = model.predict([[lab_data['Age'], lab_data['CardiacHistory'], lab_data['PhysicalActivity'], lab_data['HbA1c'], lab_data['BloodPressureSystolic'], lab_data['BloodPressureDiastolic'], lab_data['Cholesterol'], lab_data['BMI'], lab_data['KidneyDisease'], lab_data['EyeDisease']]])[0]
    
    recommendations = []
    observations = []
    
    if prediction == 'Diabetes':
        recommendations.append("Maintain a healthy diet, exercise regularly, and monitor blood sugar levels.")
        observations.append("High HbA1c indicates diabetes risk.")
        if lab_data['EyeDisease'] == 1:
            recommendations.append("Consult an ophthalmologist for diabetic eye disease management.")
        if lab_data['Age'] > 45:
            recommendations.append("Regular screenings for diabetic complications are highly recommended.")
        if lab_data['Cholesterol'] > 200:
            recommendations.append("Reduce saturated fats and cholesterol intake to manage diabetes effectively.")
            observations.append("High cholesterol can worsen diabetes.")
    elif prediction == 'Hypertension':
        recommendations.append("Reduce salt intake, exercise regularly, and monitor blood pressure levels.")
        observations.append("High blood pressure indicates hypertension risk.")
        if lab_data['Age'] > 45:
            recommendations.append("Monitor cardiovascular health closely and consult a cardiologist if necessary.")
        if lab_data['Cholesterol'] > 200:
            recommendations.append("Maintain a heart-healthy diet with low cholesterol to prevent complications.")
            observations.append("High cholesterol increases cardiovascular risks in hypertension patients.")     
    elif prediction == 'Both':
        recommendations.append("Follow a balanced diet, exercise regularly, and monitor both blood pressure and blood sugar levels.")
        observations.append("Signs of both diabetes and hypertension detected.")
        if lab_data['Age'] > 45:
            recommendations.append("Monitor cardiovascular health closely and consult a cardiologist if necessary.")
        if lab_data['Cholesterol'] > 200:
            recommendations.append("Maintain a heart-healthy diet with low cholesterol to prevent complications.")
            observations.append("High cholesterol increases cardiovascular risks in hypertension patients.")     
    else:
        recommendations.append("Maintain a healthy lifestyle to prevent chronic diseases.")
        observations.append("No chronic diseases detected. You are healthy.")
    
    if lab_data['BloodPressureSystolic'] >= 180 and lab_data['BloodPressureDiastolic'] >= 110:
        observations.append("Urgent case: Blood pressure is in a critical range! Seek immediate medical attention.")
        
    if 140 <= lab_data['BloodPressureSystolic'] < 150 and lab_data['BloodPressureDiastolic'] < 100:
        observations.append("Stage 1 Hypertension: Blood pressure is in the range of 140-150/90.")
    elif 150 <= lab_data['BloodPressureSystolic'] < 180 and 100 <= lab_data['BloodPressureDiastolic'] <= 110:
        observations.append("Stage 2 Hypertension: Blood pressure is in the range of 150-180/100-110.")
    if lab_data['BloodPressureSystolic'] >= 180 and lab_data['BloodPressureDiastolic'] >= 110:
        observations.append("Urgent case: Blood pressure is in a critical range! Seek immediate medical attention.")
    
    
    return prediction, recommendations, observations
def analyze_collective_data(lab_reports, age, CardiacHistory, PhysicalActivity, KidneyDisease, EyeDisease,  model):
    all_lab_data = []
    for report in lab_reports:
        lab_report_text = extract_text_from_image(report)
        lab_data = preprocess_lab_data(lab_report_text, age, CardiacHistory, PhysicalActivity, KidneyDisease, EyeDisease)
        all_lab_data.append(lab_data)
    
    all_lab_data.sort(key=lambda x: x.get('Date', datetime.min))
    trends = {
        'HbA1c': [data['HbA1c'] for data in all_lab_data],
        'BloodPressureSystolic': [data['BloodPressureSystolic'] for data in all_lab_data],
        'BloodPressureDiastolic': [data['BloodPressureDiastolic'] for data in all_lab_data],
        'Cholesterol': [data['Cholesterol'] for data in all_lab_data],
        'BMI': [data['BMI'] for data in all_lab_data]
    }
    
    trends_analysis = {}
    for key, values in trends.items():
        trend_result = []
        for i in range(1, len(values)):
            if values[i] is None or values[i - 1] is None:
                trend_result.append("unknown")
            elif values[i] > values[i - 1]:
                trend_result.append("increasing")
            elif values[i] < values[i - 1]:
                trend_result.append("decreasing")
            else:
                trend_result.append("constant")
        trends_analysis[key] = trend_result
    latest_lab_data = all_lab_data[-1] if all_lab_data else None
    if latest_lab_data:
        prediction, recommendations, observations = analyze_patient_data(latest_lab_data, model)
    else:
        prediction, recommendations, observations = "No Data", [], []
    
    return trends, trends_analysis, prediction, recommendations, observations

# Function to handle user request
def handle_user_request(request_type, single_lab_report, collective_lab_reports, age, CardiacHistory, PhysicalActivity, KidneyDisease, EyeDisease, model):
    if request_type == 'single':
        lab_report_text = extract_text_from_image(single_lab_report)
        lab_data = preprocess_lab_data(lab_report_text, age, CardiacHistory, PhysicalActivity, KidneyDisease, EyeDisease)
        prediction, recommendations, observations = analyze_patient_data(lab_data, model)
        print("Predicted Disease:", prediction)
        print("Recommendations:", recommendations)
        print("Observations:", observations)
    elif request_type == 'collective':
        trends, trends_analysis, prediction, recommendations, observations = analyze_collective_data(collective_lab_reports, age, CardiacHistory, PhysicalActivity,KidneyDisease, EyeDisease, model)
         # Print trends
        print("Trends over time:")
        for key, values in trends.items():
            print(f"{key}: {values}")
        
        # Print trends analysis
        print("\nTrends analysis:")
        for key, values in trends_analysis.items():
            print(f"{key}: {values}")
        
        # Print current state
        print("\nCurrent Disease:", prediction)
        print("Current Recommendations:", recommendations)
        print("Current Observations:")
        for observation in observations:
            print("-", observation)
    else:
        print("Invalid request type. Please select 'single' or 'collective'")
# Example input data
request_type = input("Enter 'single' for single analysis or 'collective' for collective analysis: ")
single_lab_report = 'Rai-Surya_lab-report.jpg'
collective_lab_reports = ['Lab_Report[1].jpg','Lab_Report[2].jpg','Lab_Report[3]_page-0001.jpg','Lab_Report[4]_page-0001.jpg','Lab_Report[5]_page-0001.jpg','Lab_Report[6]_page-0001.jpg','Lab_Report[7]_page-0001.jpg','Lab_Report[8]_page-0001.jpg','Lab_Report[9]_page-0001.jpg','Lab_Report[10]_page-0001.jpg','Lab_Report[11]_page-0001.jpg','Lab_Report[12]_page-0001.jpg']
age = int(input("Enter Age: "))
CardiacHistory = int(input("Enter 1 if cardiac history exists, otherwise 0: "))
PhysicalActivity = int(input("Enter physical activity level (0-10): "))
KidneyDisease = input("Do you have a history of kidney disease? (yes/no): ")
EyeDisease = input("Do you have a history of eye disease? (yes/no): ")


handle_user_request(request_type, single_lab_report, collective_lab_reports, age, CardiacHistory, PhysicalActivity, KidneyDisease, EyeDisease, model)


Enter 'single' for single analysis or 'collective' for collective analysis:  collective
Enter Age:  47
Enter 1 if cardiac history exists, otherwise 0:  1
Enter physical activity level (0-10):  5
Do you have a history of kidney disease? (yes/no):  yes
Do you have a history of eye disease? (yes/no):  no


Trends over time:
HbA1c: [4.5, 4.6, 4.5, 4.4, 4.4, 4.7, 4.6, 4.5, 4.5, 4.6, 4.4, 4.5]
BloodPressureSystolic: [130, 135, 128, 133, 129, 138, 132, 125, 126, 130, 128, 130]
BloodPressureDiastolic: [80, 85, 78, 82, 80, 88, 84, 78, 79, 82, 80, 80]
Cholesterol: [20, 208, 210, 215, 200, 210, 215, 207, 202, 210, 208, 205]
BMI: [21.5, 21.6, 21.5, 21.7, 21.6, 21.8, 22.0, 21.9, 21.8, 22.1, 21.7, 21.8]

Trends analysis:
HbA1c: ['increasing', 'decreasing', 'decreasing', 'constant', 'increasing', 'decreasing', 'decreasing', 'constant', 'increasing', 'decreasing', 'increasing']
BloodPressureSystolic: ['increasing', 'decreasing', 'increasing', 'decreasing', 'increasing', 'decreasing', 'decreasing', 'increasing', 'increasing', 'decreasing', 'increasing']
BloodPressureDiastolic: ['increasing', 'decreasing', 'increasing', 'decreasing', 'increasing', 'decreasing', 'decreasing', 'increasing', 'increasing', 'decreasing', 'constant']
Cholesterol: ['increasing', 'increasing', 'increasing', 'decreasing', 'incr

