In [4]:
import cv2
import pytesseract
import re
import numpy as np
import pandas as pd
import joblib
import easyocr

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

In [8]:
def extract_img_2(path, keywords):
    img = cv2.imread(path)
    # Convert the image to gray scale
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    ocr_output = pytesseract.image_to_string(img_gray)
    dictionary = {}
    for line in ocr_output.split('\n'):
        if 'MCHC' in line :
            continue
            
        for keyword in keywords:
            try :
                try :
                    if re.search(keyword, line, re.IGNORECASE):
                        if '=' in line :
                            list_  = line.split('=')
                            list_ = str(list_)
                            result = re.search('\d+\.{0,1}\d*', list_).group()
                            dictionary[keyword] = float(result)
                            break
                        elif ':' in line :
                            list_  = line.split(':')
                            list_ = str(list_)
                            result = re.search('\d+\.{0,1}\d*', list_).group()
                            dictionary[keyword] = float(result)
                            break          
                except : 
                    if re.search(keyword, line, re.IGNORECASE):
                        list_  = str(line.split())
                        result = re.search('\d+\.{0,1}\d*', list_).group()
                        dictionary[keyword] = float(result)
                        break                    
            except :
                continue
                
    return dictionary

In [11]:
keywords = ['HB' , 'WBCs' , 'Platelet Count','Basophils','Eosinophils']

In [12]:
extract_img_2('Sample\S1.jpg',keywords)

{'HB': 13.6,
 'WBCs': 6000.0,
 'Eosinphils': 2.0,
 'Basophils': 0.0,
 'Platelet Count': 199000.0}

In [9]:
def extract_img_1(path, keywords):

        reader = easyocr.Reader(['en'],gpu=False) # this needs to run only once to load the model into memory
        img = path
        output = reader.readtext(img,detail=1)
        dictionary = {}
        for i in output:
            probability = i[-1]
            
            if probability >= 0.6:
                text = i[1] 
                
            if 'MCHC' in text :
                    continue

            for keyword in keywords :
                try :
                    try :
                        if re.search(keyword, text, re.IGNORECASE):
                            if '=' in text :
                                list_  = text.split('=')
                                list_ = str(list_)
                                result = re.search('\d+\.{0,1}\d*', list_).group()
                                dictionary[keyword] = float(result)
                                break
                            elif ':' in text :
                                list_  = text.split(':')
                                list_ = str(list_)
                                result = re.search('\d+\.{0,1}\d*', list_).group()
                                dictionary[keyword] = float(result)
                                break                            
                    except : 
                        if re.search(keyword, text, re.IGNORECASE):
                            list_  = str(text.split())
                            result = re.search('\d+\.{0,1}\d*', list_).group()
                            dictionary[keyword] = float(result)
                            break                    
                except :
                    continue
        return dictionary   

In [12]:
try : 
        try :
            keywords = ['HB' , 'WBCs' , 'Platelet Count','Basophils','Eosinophils']
            extracted_dic = extract_img_2('Sample\S1.jpg', keywords)
            model_acute_l = joblib.load('Model\RandomForestModel(AcuteL-m,f).h5')
            model_cml = joblib.load('Model\RandomForestModel(CML-m,f).h5')
            features = list(model_acute_l.feature_names_in_)  + list(model_cml.feature_names_in_)
            features.pop(3) # Delete Duplicated Gender_Male from Feature List
            features.pop(3) # Delete Duplicated Hemoglobin from Feature List
            features.pop(3) # Delete Duplicated Platelets from Feature List
            data_dic = {}
            data_dic[features[0]] = extracted_dic['HB']
            data_dic[features[1]] = extracted_dic['WBCs']
            data_dic[features[2]] = extracted_dic['Platelet Count']
            data_dic[features[3]] = extracted_dic['Basophils']
            data_dic[features[4]] = extracted_dic['Eosinophils']
            data_dic[features[5]] = np.random.choice([0,1])
            custom_data = pd.DataFrame(data = [data_dic] )   
            print('-----'*10)
            print('Your Input :\n',custom_data)
            print('-----'*10)       
            pred_acute_l  = model_acute_l.predict(custom_data[list(model_acute_l.feature_names_in_)])[0]
            pred_cml  = model_cml.predict(custom_data[list(model_cml.feature_names_in_)])[0]
            if pred_cml == pred_acute_l :
                                        # As predication of two models is same  
                                        # Two models have predicted Good as Label
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('------'*10)    
                                        print('\t There is No Probability of Acute Leukmia and Chronic Myelogenous Leukemia (CML) ')
                                        output = 'Good'
                                        
            elif pred_cml == 'CML' and pred_acute_l == 'Acute L' :
                                        # As predication of two models is different 
                                        # One model has predicted CML as Label  
                                        # Other model has  predicted Acute L as Label  
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('-----'*10)    
                                        print(
                                             '''
                                           There is Probability of Acute Leukmia and Chronic Myelogenous Leukemia (CML) :
                                                 >> As Your ['Hemoglobin' , 'WBCs', 'Platelets', 'Basophilis','Eosinophilia'] may be not in normal ranges 
                                           '''
                                            )
                                        output = 'Acute  L or CML'
    
            elif pred_cml == 'CML' and pred_acute_l == 'Good' :
                                        # As predication of two models is different 
                                        # One model has predicted CML as Label  
                                        # Other model has  predicted Good as Label  
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('-----'*10)    
                                        print(
                                             '''
                                          There is Probability of Chronic Myelogenous Leukemia (CML) :
                                                >> As Your ['Hemoglobin', 'Platelets','Basophilis','Eosinophilia'] may be not in normal ranges 
                                           ''' 
                                           )
                                        output = 'CML'
                                        
            elif pred_cml == 'Good' and pred_acute_l == 'Acute L' :
                                        # As predication of two models is different 
                                        # One model has predicted Good as Label  
                                        # Other model has  predicted Acute L as Label  
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('-----'*10)    
                                        print(
                                             '''
                                         There is Probability of Acute Leukmia : 
                                            >> As Your ['Hemoglobin' , 'WBCs', 'Platelets'] may be not in normal ranges 
                                           ''' 
                                        )
                                        output = 'Acute L'


        except :  
                keywords = ['HB' , 'WBCs' , 'Platelet Count','Basophils','Eosinphils']
                extracted_dic = extract_img_2('Sample\S1.jpg', keywords)
                model_acute_l = joblib.load('Model\RandomForestModel(AcuteL-m,f).h5')
                model_cml = joblib.load('Model\RandomForestModel(CML-m,f).h5')
                features = list(model_acute_l.feature_names_in_)  + list(model_cml.feature_names_in_)
                features.pop(3) # Delete Duplicated Gender_Male from Feature List
                features.pop(3) # Delete Duplicated Hemoglobin from Feature List
                features.pop(3) # Delete Duplicated Platelets from Feature List
                data_dic = {}
                data_dic[features[0]] = extracted_dic['HB']
                data_dic[features[1]] = extracted_dic['WBCs']
                data_dic[features[2]] = extracted_dic['Platelet Count']
                data_dic[features[3]] = extracted_dic['Basophils']
                data_dic[features[4]] = extracted_dic['Eosinphils']
                data_dic[features[5]] = np.random.choice([0,1])
                custom_data = pd.DataFrame(data = [data_dic] )   
                print('-----'*10)
                print('Your Input :\n',custom_data)
                print('-----'*10)       
                pred_acute_l  = model_acute_l.predict(custom_data[list(model_acute_l.feature_names_in_)])[0]
                pred_cml  = model_cml.predict(custom_data[list(model_cml.feature_names_in_)])[0]
                if pred_cml == pred_acute_l :
                                        # As predication of two models is same  
                                        # Two models have predicted Good as Label
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('------'*10)    
                                        print('\t There is No Probability of Acute Leukmia and Chronic Myelogenous Leukemia (CML) ')
                                        output = 'Good'
                                        
                elif pred_cml == 'CML' and pred_acute_l == 'Acute L' :
                                        # As predication of two models is different 
                                        # One model has predicted CML as Label  
                                        # Other model has  predicted Acute L as Label  
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('-----'*10)    
                                        print(
                                             '''
                                           There is Probability of Acute Leukmia and Chronic Myelogenous Leukemia (CML) :
                                                 >> As Your ['Hemoglobin' , 'WBCs', 'Platelets', 'Basophilis','Eosinophilia'] may be not in normal ranges 
                                           '''
                                            )
                                        output = 'Acute  L or CML'
    
                elif pred_cml == 'CML' and pred_acute_l == 'Good' :
                                        # As predication of two models is different 
                                        # One model has predicted CML as Label  
                                        # Other model has  predicted Good as Label  
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('-----'*10)    
                                        print(
                                             '''
                                          There is Probability of Chronic Myelogenous Leukemia (CML) :
                                                >> As Your ['Hemoglobin', 'Platelets','Basophilis','Eosinophilia'] may be not in normal ranges 
                                           ''' 
                                           )
                                        output = 'CML'
                                        
                elif pred_cml == 'Good' and pred_acute_l == 'Acute L' :
                                        # As predication of two models is different 
                                        # One model has predicted Good as Label  
                                        # Other model has  predicted Acute L as Label  
                                        print('Based on Your Complete Blood Count (CBC) :')
                                        print('-----'*10)    
                                        print(
                                             '''
                                         There is Probability of Acute Leukmia : 
                                            >> As Your ['Hemoglobin' , 'WBCs', 'Platelets'] may be not in normal ranges 
                                           ''' 
                                        )
                                        output = 'Acute L'


except : 
        print('Your uploaded image can\'t be detected')
        print('-----'*10)  
        model_acute_l = joblib.load('Model\RandomForestModel(AcuteL-m,f).h5')
        model_cml = joblib.load('Model\RandomForestModel(CML-m,f).h5')
        features = list(model_acute_l.feature_names_in_)  + list(model_cml.feature_names_in_)
        features.pop(3) # Delete Duplicated Gender_Male from Feature List
        features.pop(3) # Delete Duplicated Hemoglobin from Feature List
        features.pop(3) # Delete Duplicated Platelets from Feature List                            
        print('\t Enter Manually the Following Please : ',features[:-1])


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


--------------------------------------------------
Your Input :
    Hemoglobin    WBCs  Platelets  Basophilis  Eosinophilia  Gender_Male
0        13.6  6000.0   199000.0         0.0           2.0            1
--------------------------------------------------
Based on Your Complete Blood Count (CBC) :
------------------------------------------------------------
	 There is No Probability of Acute Leukmia and Chronic Myelogenous Leukemia (CML) 
