Preprocessing Phase

In [None]:
import cv2

def get_frame_rate(video_path):
    """
    Retrieves the frame rate of a video.
    Args:
        video_path (str): Path to the video file.
    Returns:
        float: The frame rate (frames per second).
    """
    # Open the video file
    video = cv2.VideoCapture(video_path)
    
    # Get the frame rate
    frame_rate = video.get(cv2.CAP_PROP_FPS)
    
    # Release the video
    video.release()
    
    return frame_rate

# Example usage
video_path = r"C:\Users\ARYA\Desktop\NIT Surathkal\Academics\IT820 - Information Technology for Healthcare\Project 2\ITH_project\case_4695.mp4"  # Update this path
fps = get_frame_rate(video_path)
print(f"Frame rate: {fps} frames per second")


Frame rate: 60.0 frames per second


In [84]:
import cv2
import os
import glob
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import tensorflow as tf

# Step 1: Define phase-to-label mapping
phase_mapping = {
    'No phase': 0,
    'Viscoelastic': 1,
    'Capsule Pulishing': 2,
    'Hydrodissection': 3,
    'Lens Implantation': 4,
    'Lens positioning': 5,
    'Viscoelastic_Suction': 6,
    'Capsulorhexis': 7,
    'Irrigation/Aspiration': 8,
    'Phacoemulsification': 9,
    'Anterior_Chamber Flushing': 10,
    'Tonifying/Antibiotics': 11,
    'Incision': 12
}

In [9]:
# Step 2: Extract frames and label them based on annotations
def extract_and_label_frames(video_dir, annotation_dir, output_dir, frame_rate=2):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    video_files = glob.glob(os.path.join(video_dir, "case_*.mp4"))

    for video_path in video_files:
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        annotation_path = os.path.join(annotation_dir, f"{video_name}_annotations_phases.csv")

        if not os.path.exists(annotation_path):
            print(f"No annotation file found for {video_name}, skipping.")
            continue

        annotations = pd.read_csv(annotation_path)
        video = cv2.VideoCapture(video_path)
        fps = video.get(cv2.CAP_PROP_FPS)
        frame_interval = int(fps / frame_rate)

        frame_count = 0
        saved_count = 0

        while video.isOpened():
            ret, frame = video.read()
            if not ret:
                break

            if frame_count % frame_interval == 0:
                phase_label = "No phase"
                for _, row in annotations.iterrows():
                    start_frame = row['frame']
                    end_frame = row['endFrame']
                    if start_frame <= frame_count <= end_frame:
                        phase_label = row['comment']
                        break

                frame_filename = os.path.join(output_dir, f"{video_name}_frame_{saved_count}_{phase_label}.jpg")
                cv2.imwrite(frame_filename, frame)
                saved_count += 1

            frame_count += 1

        video.release()
        print(f"Extracted and labeled {saved_count} frames from {video_name}.")

# Step 3: Prepare the data for training
def prepare_data_for_training(frames_dir):
    data = []
    for frame_path in glob.glob(os.path.join(frames_dir, "*.jpg")):
        filename = os.path.basename(frame_path)
        phase_label = filename.split("_")[-1].replace(".jpg", "")
        label = phase_mapping.get(phase_label, 0)
        data.append((frame_path, label))

    df = pd.DataFrame(data, columns=['frame_path', 'label'])
    return df

In [5]:
# Step 4: Load data and train the model
def load_data_and_train_model(frames_dir, input_shape=(256, 256, 3), batch_size=32, epochs=10):
    df = prepare_data_for_training(frames_dir)
    
    datagen = ImageDataGenerator(rescale=1./255)
    train_generator = datagen.flow_from_dataframe(
        dataframe=df,
        x_col="frame_path",
        y_col="label",
        target_size=input_shape[:2],
        batch_size=batch_size,
        class_mode="raw"
    )

    # Define a simple model
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(len(phase_mapping), activation='softmax')  # Number of phases
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_generator, epochs=epochs)
    return model

In [90]:
# Define paths
video_dir = r"C:\Users\ARYA\Desktop\NIT Surathkal\Academics\IT820 - Information Technology for Healthcare\Project 2\ITH_project"
annotation_dir = r"C:\Users\ARYA\Desktop\NIT Surathkal\Academics\IT820 - Information Technology for Healthcare\Project 2\cataract annotations"
output_dir = r"C:\Users\ARYA\Desktop\NIT Surathkal\Academics\IT820 - Information Technology for Healthcare\Project 2\Frames"

In [46]:
# Run frame extraction and labeling
extract_and_label_frames(video_dir, annotation_dir, output_dir)

Extracted and labeled 915 frames from case_4687.
Extracted and labeled 826 frames from case_4693.
Extracted and labeled 1008 frames from case_4695.
Extracted and labeled 724 frames from case_4697.
Extracted and labeled 1489 frames from case_4709.
Extracted and labeled 1028 frames from case_4750.
Extracted and labeled 1308 frames from case_4811.
Extracted and labeled 1043 frames from case_4816.
Extracted and labeled 1846 frames from case_4852.
Extracted and labeled 1837 frames from case_4859.
Extracted and labeled 1334 frames from case_4863.
Extracted and labeled 1671 frames from case_4864.
Extracted and labeled 953 frames from case_4874.
Extracted and labeled 905 frames from case_4888.
Extracted and labeled 1390 frames from case_4899.
Extracted and labeled 1183 frames from case_4906.
Extracted and labeled 1862 frames from case_4923.
Extracted and labeled 1708 frames from case_4924.
Extracted and labeled 1146 frames from case_4999.
Extracted and labeled 1060 frames from case_5004.
Extra

In [92]:
df = prepare_data_for_training(output_dir)

In [94]:
df['label'].value_counts()

label
9     16048
0     12426
7      4848
3      3838
4      1995
1      1727
12     1528
5      1453
2      1369
Name: count, dtype: int64

Model Training Without Implemeting Diffusion

In [96]:
# Train model
model = load_data_and_train_model(output_dir, input_shape=(256, 256, 3), batch_size=32, epochs=5)

Found 45232 validated image filenames.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5


  self._warn_if_super_not_called()


[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1407s[0m 991ms/step - accuracy: 0.7913 - loss: 0.6559
Epoch 2/5
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m998s[0m 704ms/step - accuracy: 0.9820 - loss: 0.0597
Epoch 3/5
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1576s[0m 1s/step - accuracy: 0.9894 - loss: 0.0362
Epoch 4/5
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1671s[0m 1s/step - accuracy: 0.9931 - loss: 0.0234
Epoch 5/5
[1m1414/1414[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m967s[0m 682ms/step - accuracy: 0.9948 - loss: 0.0214


In [97]:
model.save('ITH_project_model2.h5')



In [108]:
model.save('ITH_project_model2.keras')

In [100]:
# Prediction 1

In [11]:
import os
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from sklearn.metrics import accuracy_score

# Mapping of phases to integer labels
phase_mapping = {
    'No phase': 0,
    'Viscoelastic': 1,
    'Capsule Pulishing': 2,
    'Hydrodissection': 3,
    'Lens Implantation': 4,
    'Lens positioning': 5,
    'Viscoelastic_Suction': 6,
    'Capsulorhexis': 7,
    'Irrigation/Aspiration': 8,
    'Phacoemulsification': 9,
    'Anterior_Chamber Flushing': 10,
    'Tonifying/Antibiotics': 11,
    'Incision': 12
}

# Load ground truth labels from the CSV file
def load_ground_truth_labels(csv_path):
    df = pd.read_csv(csv_path)
    true_labels = {}
    for _, row in df.iterrows():
        phase_label = row['comment']
        true_label = phase_mapping[phase_label]
        start_frame, end_frame = row['frame'], row['endFrame']
        
        # Assign true label for each frame in the range
        for frame_num in range(start_frame, end_frame + 1):
            true_labels[frame_num] = true_label
    
    return true_labels

# Predict labels for every 50th frame
def predict_labels_for_video(video_path, model, input_shape=(256, 256), frame_interval=10):
    video = cv2.VideoCapture(video_path)
    frame_count = 0
    predicted_labels = []
    
    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break
        
        # Process every 50th frame
        if frame_count % frame_interval == 0:
            # Resize frame to model input shape
            frame_resized = cv2.resize(frame, input_shape)
            img_array = image.img_to_array(frame_resized) / 255.0
            img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
            
            # Predict the phase label
            predictions = model.predict(img_array)
            predicted_label = np.argmax(predictions)
            predicted_labels.append((frame_count, predicted_label))
        
        frame_count += 1
    
    video.release()
    return predicted_labels

# Calculate accuracy by comparing predicted labels to ground truth
def evaluate_video_accuracy(video_path, csv_path, model, input_shape=(256, 256), frame_interval=50):
    # Load ground truth labels from the annotation CSV file
    true_labels = load_ground_truth_labels(csv_path)
    
    # Predict labels for every 50th frame in the video
    predicted_labels = predict_labels_for_video(video_path, model, input_shape, frame_interval)
    
    # Gather true and predicted labels for accuracy calculation
    y_true = []
    y_pred = []
    
    for frame_num, predicted_label in predicted_labels:
        if frame_num in true_labels:
            y_true.append(true_labels[frame_num])
            y_pred.append(predicted_label)
    
    # Calculate accuracy
    print(y_true)
    print(y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Video Accuracy: {accuracy * 100:.2f}%")
    return accuracy

# # Load the trained model
# model_path = '/content/drive/MyDrive/Path_to_your_model/my_model.h5'  # Update path
# model = load_model(model_path)

In [None]:
# Example usage
video_path = "C:\\Users\\ARYA\\Desktop\\NIT Surathkal\\Academics\\IT820 - Information Technology for Healthcare\\Project 2\\ITH_project\\case_5032.mp4" 
csv_path = "C:\\Users\\ARYA\\Desktop\\NIT Surathkal\\Academics\\IT820 - Information Technology for Healthcare\\Project 2\\cataract annotations\\case_5032_annotations_phases.csv"  

evaluate_video_accuracy(video_path, csv_path, model1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 675ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

0.2662337662337662

In [1]:
# Prediction 2

In [15]:

from tensorflow.keras.models import load_model
model1 = load_model('ITH_project_model.h5')



In [18]:
video_path = "C:\\Users\\ARYA\\Desktop\\NIT Surathkal\\Academics\\IT820 - Information Technology for Healthcare\\Project 2\\ITH_project\\case_4687.mp4"  # Path to the video file
csv_path = "C:\\Users\\ARYA\\Desktop\\NIT Surathkal\\Academics\\IT820 - Information Technology for Healthcare\\Project 2\\cataract annotations\\case_4687_annotations_phases.csv"  # Corresponding CSV file for ground truth labels

evaluate_video_accuracy(video_path, csv_path, model1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70

0.44765342960288806