In [None]:
#importing the libraries into the model
#pytesseract for text extraction
#random forest classifier for training model and testing it

In [2]:
import pytesseract
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import requests

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

In [4]:
# Function to extract text from image
def extract_text_from_image(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image)
    return text

In [5]:
#function to pre process lab data
def preprocess_lab_data(text):
    text = text.lower()
    
    lab_data = {}
    prescription_data= {}
    if 'hba1c: ' in text:
        lab_data['HbA1c'] = float(text.split('hba1c: ')[1].split('%')[0])
    else:
        lab_data['HbA1c'] = None
    
    if 'blood pressure: ' in text:
        lab_data['BloodPressure'] = text.split('blood pressure: ')[1].split('/')[0] + '/' + text.split('blood pressure: ')[1].split('/')[1].split(' ')[0]
    else:
        lab_data['BloodPressure'] = None
    
    if 'cholesterol: ' in text:
        lab_data['Cholesterol'] = int(text.split('cholesterol: ')[1].split(' ')[0])
    else:
        lab_data['Cholesterol'] = None
    
    if 'bmi: ' in text:
        lab_data['BMI'] = float(text.split('bmi: ')[1].split(' ')[0])
    else:
        lab_data['BMI'] = None
    
    if 'medications: ' in text:
        prescription_data = text.split('medications: ')[1].split(':')
    else:
        prescription_data['medications']=None
    if 'dosages: ' in text:
        prescription_data['dosages']=text.split('dosages: ')[1].split(':')
    else:
        prescription_data['dosages']=None
    if 'frequency: ' in text:
        prescription_data['frequency']=text.split('frequency: ')[1].split(':')
    else:
        prescription_data['frequency']=None
        
        
    
    return lab_data, prescription_data

In [22]:
#analyze patients data according to disease 
def analyze_patient_data(lab_data, prescription_data, model):
    features = [lab_data['HbA1c'], int(lab_data['BloodPressure'].split('/')[0]), lab_data['Cholesterol'], lab_data['BMI']]
    prediction = model.predict([features])[0]
    recommendations = []
    observations = []

    if prediction == 'Diabetes':
        recommendations.append("Maintain a healthy diet, exercise regularly, and monitor blood sugar levels.")
        observations.append("High HbA1c indicates diabetes.")
        observations.append("Prescribed diabetes medication.")
    if prediction == 'Hypertension':
        recommendations.append("Reduce salt intake, exercise regularly, and monitor blood pressure levels.")
        observations.append("High blood pressure indicates hypertension.")
        observations.append("Prescribed hypertension medication.")
    if prediction == 'Both':
        recommendations.append("Follow a balanced diet, exercise regularly, and monitor both blood pressure and blood sugar levels.")
        observations.append("Signs of both diabetes and hypertension detected.")
    if prediction == 'None':
        recommendations.append("Maintain a healthy lifestyle to prevent chronic diseases.")
        observations.append("No chronic diseases detected.")
    
   
    
    return prediction, recommendations, observations

In [17]:
df = pd.read_csv('health_dataset_1.csv')

# Inspect column names
print(df.columns)

# Adjust column names based on actual names in the dataset
X = df[['HbA1c', 'Blood Pressure (mm Hg)', 'Cholesterol (mg/dL)', 'BMI']]
X['Blood Pressure (mm Hg)'] = X['Blood Pressure (mm Hg)'].apply(lambda x: int(x.split('/')[0]))  # Convert to numerical
y = df['Disease']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Model evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

Index(['Name', 'Age', 'Gender', 'HbA1c', 'Blood Pressure (mm Hg)',
       'Cholesterol (mg/dL)', 'BMI', 'Disease'],
      dtype='object')
              precision    recall  f1-score   support

    Diabetes       1.00      1.00      1.00         9
Hypertension       1.00      1.00      1.00        11

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Blood Pressure (mm Hg)'] = X['Blood Pressure (mm Hg)'].apply(lambda x: int(x.split('/')[0]))  # Convert to numerical


In [23]:
# Extract and preprocess data
lab_report_text = extract_text_from_image(lab_report_path)
prescription_text = extract_text_from_image(prescription_path)
lab_data, prescription_data = preprocess_lab_data(lab_report_text + "\n" + prescription_text)

# Analyze patient data
disease, recommendations, observations = analyze_patient_data(lab_data, prescription_data, model)

# Print results
print("Disease:", disease)
print("Recommendations:", recommendations)
print("Observations:")
for observation in observations:
    print("-", observation)

Disease: Hypertension
Recommendations: ['Reduce salt intake, exercise regularly, and monitor blood pressure levels.']
Observations:
- High blood pressure indicates hypertension.
- Prescribed hypertension medication.


