In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
def load_images_from_folder(folder):
    X, y = [], []
    for label, category in enumerate(CLASSES):
        folder_path = os.path.join(folder, category)
        
        if not os.path.exists(folder_path):
            print(f"Dossier non trouvé : {folder_path}")
            continue
        
        for img_name in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_name)
            img = cv2.imread(img_path)

            if img is None:
                print(f"Impossible de lire l'image : {img_path}")
                continue
            
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            img = img / 255.0  # Normalisation
            X.append(img)
            y.append(label)

    return np.array(X), np.array(y)




In [2]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def build_and_train_model(X_train, y_train, X_test, y_test, img_size=224, epochs=10):
    # Chargement du modèle pré-entraîné
    base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(img_size, img_size, 3))
    base_model.trainable = False  # On freeze les poids

    # Ajout des couches personnalisées
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.3)(x)
    output = Dense(3, activation="softmax")(x)  # 3 neurones pour 3 classes

    # Création du modèle final
    model = Model(inputs=base_model.input, outputs=output)

    # Compilation du modèle
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    # Affichage du résumé du modèle
    model.summary()

    # Génération de données augmentées
    datagen = ImageDataGenerator(rotation_range=15, zoom_range=0.2, horizontal_flip=True)

    # Entraînement du modèle
    history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                        validation_data=(X_test, y_test),
                        epochs=epochs)
    
    return model, history


In [3]:
def predict_image(image_path, model, X_test, y_test):
    IMG_SIZE = 224  # Taille utilisée lors de l'entraînement

    # Chargement et prétraitement de l'image
    img = cv2.imread(image_path)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0  # Normalisation
    img = np.expand_dims(img, axis=0)  # Ajouter une dimension b111111atch

    # Prédiction
    predictions = model.predict(img)[0]  # Liste des probabilités pour chaque classe
    class_index = np.argmax(predictions)  # Récupérer l'index de la classe avec la plus haute probabilité
    confidence = predictions[class_index] * 100

    # Correspondance avec les classes
    class_labels = ["bulletin de soin", "ordonnance", "autre document médical"]
    class_label = class_labels[class_index]

    # 🔹 **Calcul de l'accuracy globale du modèle**
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

    print(f"Prédiction : {class_label} | Confiance : {confidence:.2f}%")
    print(f"Accuracy globale du modèle sur test : {accuracy * 100:.2f}%")

In [4]:
import cv2
import numpy as np
import os

def load_templates_from_folder(folder):
    images = {}
    for filename in os.listdir(folder):
        path = os.path.join(folder, filename)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            images[filename] = img
    return images
def show_image(output_image_path):
    output_image = cv2.imread(output_image_path)
    height, width, _ = output_image.shape
    plt.figure(figsize=(width / 100, height / 100))  # Scale factor to control figure size
    plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
    plt.axis("off")
    plt.show()

def find_best_template(test_image, templates):
    sift = cv2.SIFT_create()
    keypoints2, descriptors2 = sift.detectAndCompute(test_image, None)
    
    best_match = None
    best_match_count = 0
    best_template = None
    
    for filename, template in templates.items():
        keypoints1, descriptors1 = sift.detectAndCompute(template, None)
        if descriptors1 is None or descriptors2 is None:
            continue
        
        flann = cv2.FlannBasedMatcher(dict(algorithm=1, trees=5), dict(checks=50))
        matches = flann.knnMatch(descriptors1, descriptors2, k=2)
        good_matches = [m for m, n in matches if m.distance < 0.7 * n.distance]
        
        if len(good_matches) > best_match_count:
            best_match_count = len(good_matches)
            best_match = template
            best_template = filename
    
    return best_match, best_template

def align_and_brighten_image(test_image, template, dark_threshold=50, brightness_factor=1.5, brightness_offset=30):
    sift = cv2.SIFT_create()
    keypoints1, descriptors1 = sift.detectAndCompute(template, None)
    keypoints2, descriptors2 = sift.detectAndCompute(test_image, None)
    
    flann = cv2.FlannBasedMatcher(dict(algorithm=1, trees=5), dict(checks=50))
    matches = flann.knnMatch(descriptors1, descriptors2, k=2)
    good_matches = [m for m, n in matches if m.distance < 0.7 * n.distance]
    
    if len(good_matches) >= 4:
        src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
        dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
        
        H, _ = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5.0)
        h, w = template.shape
        aligned_image = cv2.warpPerspective(test_image, H, (w, h))
        
        mean_brightness = np.mean(aligned_image)
        print(f"Mean brightness: {mean_brightness}")
        
        # Apply brightness correction only for extremely dark images
        if mean_brightness < dark_threshold:
            aligned_image = cv2.convertScaleAbs(aligned_image, alpha=brightness_factor, beta=brightness_offset)
            print("Image is extremely dark. Brightness enhanced.")
        else:
            print("Brightness is acceptable; no enhancement applied.")
        
        return aligned_image
    
    return None


In [5]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def detect_tables(image_path):
    # Load the image
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Convert to binary using adaptive thresholding
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 4
    )

    # Apply morphological operations to enhance table structures
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Find contours
    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter small noise by setting a minimum area threshold
    filtered_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 5000]

    # Get bounding boxes
    bounding_boxes = [cv2.boundingRect(cnt) for cnt in filtered_contours]
    bounding_boxes = sorted(bounding_boxes, key=lambda x: x[1])  # Sort by y-coordinate

    # Compute image area
    image_area = image.shape[0] * image.shape[1]
    
    # Compute percentage area for each table
    table_areas = [(w * h) / image_area * 100 for x, y, w, h in bounding_boxes]
    
    # Draw bounding boxes on the image
    output_image = image.copy()
    for x, y, w, h in bounding_boxes:
        cv2.rectangle(output_image, (x, y), (x + w, y + h), (0, 255, 0), 3)
    
    # Show detected tables
    plt.figure(figsize=(10, 10))
    plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
    plt.title("Detected Tables with Bounding Boxes")
    plt.axis("off")
    plt.show()
    output_image_path = "C:/Users/user/Downloads/detected_tables_output.jpg"
    cv2.imwrite(output_image_path, output_image)

    return table_areas

In [6]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def extract_table(image_path, save_path=None):
    # Load the image
    image = cv2.imread(image_path)  # Corrected this line
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Convert to binary
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 4
    )

    # Apply morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Find contours
    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter table based on area and position
    table_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 10000]  # Relaxed threshold
    
    # Get bounding boxes
    bounding_boxes = [cv2.boundingRect(cnt) for cnt in table_contours]
    
    # Sort bounding boxes by y-coordinate
    bounding_boxes = sorted(bounding_boxes, key=lambda x: x[1])
    
    # Draw bounding box around detected table
    for x, y, w, h in bounding_boxes:
        extracted_table = image[y:y+h, x:x+w]
        if save_path:
            cv2.imwrite(save_path, extracted_table)
        
        return extracted_table

In [7]:
# Définition des chemins des dossiers train et test
DATASET_DIR = "C:/Users/user/Downloads/dataset"
TRAIN_DIR = os.path.join(DATASET_DIR, "train")
TEST_DIR = os.path.join(DATASET_DIR, "test")
CLASSES = ["Bulletin_de_soin", "Ordonnance", "Other"]
IMG_SIZE = 224  # Taille standard pour MobileNetV2
template_folder = r"C:\Users\user\Downloads\dataset\train\BCNN"
templates = load_templates_from_folder(template_folder)

In [None]:
# Chargement des images depuis train et test
X_train, y_train = load_images_from_folder(TRAIN_DIR)
X_test, y_test = load_images_from_folder(TEST_DIR)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
print(f"Taille du train: {len(X_train)}")
print(f"Taille du validation: {len(X_val)}")
print(f"Taille du test: {len(X_test)}")

In [None]:
model, history = build_and_train_model(X_train, y_train, X_val, y_val)

In [None]:
image_path = r"C:\Users\user\Downloads\ordtest.png"

In [None]:
predict_image(image_path, model, X_test, y_test)

In [None]:
output_path = r"C:\Users\user\Downloads\aligned_brightened.jpg"
test_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

best_template, best_template_name = find_best_template(test_image, templates)
print(f"Best matching template: {best_template_name}")
result = align_and_brighten_image(test_image, best_template)
cv2.imwrite(output_path, result)
print(f"Processed image saved at: {output_path}")

show_image(output_path)

In [None]:
table_percentages = detect_tables(output_path)
print("Table Area Percentages:", table_percentages)

In [None]:
import pandas as pd
import difflib
import re

df = pd.read_csv(r"C:\Users\user\Downloads\medical_words.csv")
medical_words = set(df["Medical Word"].str.lower().tolist())

def correct_word(word):
    """
    Check if the word is in the dictionary. If not, use fuzzy matching
    to find the best correction.
    """
    lower_word = word.lower()
    if lower_word in medical_words:
        return word  # Word is already correct

    # Look for close matches (n=1 returns the best match)
    matches = difflib.get_close_matches(lower_word, medical_words, n=1, cutoff=0.8)
    if matches:
        # Preserve capitalization: if the original word starts with uppercase,
        # capitalize the matched word.
        corrected = matches[0]
        if word[0].isupper():
            corrected = corrected.capitalize()
        return corrected
    else:
        # No good match found; return the original word
        return word

def correct_text(text):
    """
    Process a text string by tokenizing it and correcting each word if needed.
    Punctuation is preserved.
    """
    # Tokenize the text (words and punctuation)
    tokens = re.findall(r'\w+|[^\w\s]', text, re.UNICODE)
    corrected_tokens = [
        correct_word(token) if re.match(r'\w+', token) else token 
        for token in tokens
    ]
    # Reassemble the tokens into a full string
    corrected_text = ""
    for token in corrected_tokens:
        if re.match(r'\w+', token):
            # Add a space if previous character is alphanumeric
            if corrected_text and corrected_text[-1].isalnum():
                corrected_text += " " + token
            else:
                corrected_text += token
        else:
            corrected_text += token
    return corrected_text

sample_text = "The patint had an imige of a maas that was locatd near the organ. Yess, it was clear."
corrected = correct_text(sample_text)
print("Original:", sample_text)
print("Corrected:", corrected)


In [None]:
import pytesseract
# Load the image and extract text using Tesseract (French language)
image_path = "C:\\Users\\user\\Downloads\\dataset\\test\\Ordonnance\\0589--1525800--20230721_page_6.jpg"
image = cv2.imread(image_path)

# Preprocessing: Convert to grayscale (optional)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Perform OCR with French language setting
extracted_text = pytesseract.image_to_string(gray, lang="fra")

# Apply correction
corrected_text = correct_text(extracted_text)

# Print results
print("Original Extracted Text:\n", extracted_text)
print("\nCorrected Text:\n", corrected_text)

In [None]:
# Load correction words from dataset
correction_dict = {}
dataset_path = r"C:\Users\user\Downloads\FRASIMED/CANTEMIST-FRASIMED"  # Adjust if needed

for file in os.listdir(dataset_path):
    if file.endswith(".txt"):  # Assuming correction words are in .txt files
        with open(os.path.join(dataset_path, file), "r", encoding="utf-8") as f:
            for line in f:
                words = line.strip().split()  
                for word in words:
                    correction_dict[word.lower()] = word  # Store original capitalization

# Function to correct text using dataset
def correct_text(text):
    corrected_words = [correction_dict.get(word.lower(), word) for word in text.split()]
    return " ".join(corrected_words)
