# Purpose: Thermal Camera Application in AFP for ILSS Predictive Model  
# Date: 05/2025
# UNSW Sydney / EPFL
# Code: Assier de Pompignan Leo

In [None]:
# Code used to train the ILSS Predictive Model
import os
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv2D, MaxPooling2D, Flatten, TimeDistributed, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.sequence import pad_sequences
from ultralytics import YOLO  # Import from ultralytics

# Load the trained YOLO model from ultralytics
print("Loading YOLO model...")
yolo_model = YOLO(r'Path_to_YOLO_model\runs\segment\train\weights\best.pt')

# Load the Excel file containing ILSS values
excel_path = r'Path_to_YOLO_model\Train_dataset.xlsx'
print(f"Loading Excel data from {excel_path}...")
df = pd.read_excel(excel_path)

# Function to extract frames from a video and crop ROI using YOLO
def extract_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform YOLO detection
        results = yolo_model(frame)[0]  # Get first result from the list

        # Access detections
        boxes = results.boxes

        if boxes is not None and len(boxes) > 0:
            # Use the first detected box
            xyxy = boxes[0].xyxy.cpu().numpy()[0]  # Get [x1, y1, x2, y2]
            x1, y1, x2, y2 = map(int, xyxy)

            # Crop and store the ROI
            cropped_frame = frame[y1:y2, x1:x2]
            frames.append(cropped_frame)

    cap.release()
    print(f"  ➤ Extracted {len(frames)} frames from {os.path.basename(video_path)}")
    return frames

# Prepare the dataset
def prepare_dataset():
    sequences = []
    targets = []
    print("Preparing dataset...")
    for idx, row in df.iterrows():
        video_name = row['Video name']
        print(f"[{idx+1}/{len(df)}] Processing video: {video_name}")
        video_path = os.path.join(r'C:\Users\lesfr\OneDrive\Documents\master epfl\Projet de master\Models\Thermal camera model\Forward\R_CNN_Mask\Mask_RCNN_test\video-dataset_175', video_name + '.mp4')
        
        # Extract frames and preprocess
        frames = extract_frames(video_path)
        frames = [cv2.resize(frame, (224, 224)) for frame in frames]
        frames = np.array(frames) / 255.0  # Normalize frames
        
        # If video frames are shorter than expected, pad them
        sequence_length = 30
        frames = pad_sequences([frames], maxlen=sequence_length, dtype='float32', padding='post', truncating='post')
        
        # Add ILSS as target
        ilss_value = row['ILSS (Mpa)']
        
        sequences.append(frames)
        targets.append(ilss_value)
    
    print("Dataset preparation complete.\n")
    return np.array(sequences), np.array(targets)

# Define the CNN + LSTM model
def create_model():
    print("Creating CNN + LSTM model...")
    model = Sequential()

    # CNN to extract spatial features
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', input_shape=(None, 224, 224, 3))))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))

    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))

    model.add(TimeDistributed(Conv2D(128, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))

    model.add(TimeDistributed(Flatten()))

    # LSTM to capture temporal features
    model.add(Bidirectional(LSTM(128, return_sequences=False)))

    # Dense layers for regression
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1))  # Regression output

    model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])
    
    print("Model created.\n")
    return model

# Prepare data for training
X, y = prepare_dataset()
print(f"Shape of X: {X.shape}, Shape of y: {y.shape}\n")

# Create and train the model
model = create_model()

# Save model during training
checkpoint = ModelCheckpoint('cnn_lstm_ilss_model.h5', save_best_only=True, monitor='val_loss', mode='min')

# Train the model
print("Starting training...\n")
model.fit(X, y, epochs=50, batch_size=4, validation_split=0.2, callbacks=[checkpoint])
print("\nTraining complete.\n")

# Save the model for future predictions
model.save('cnn_lstm_ilss_model.h5')

print("✅ Model trained and saved successfully!")


Loading YOLO model...
Loading Excel data from C:\Users\lesfr\OneDrive\Documents\master epfl\Projet de master\Models\Thermal camera model\Forward\R_CNN_Mask\Mask_RCNN_test\Train_dataset.xlsx...
Preparing dataset...
[1/22] Processing video: T1

0: 384x480 (no detections), 88.1ms
Speed: 2.3ms preprocess, 88.1ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 480)

0: 384x480 (no detections), 69.5ms
Speed: 1.2ms preprocess, 69.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 480)

0: 384x480 (no detections), 54.1ms
Speed: 1.0ms preprocess, 54.1ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 480)

0: 384x480 (no detections), 54.0ms
Speed: 0.9ms preprocess, 54.0ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 480)

0: 384x480 1 Nozzle, 52.3ms
Speed: 1.0ms preprocess, 52.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 480)

0: 384x480 1 Nozzle, 53.2ms
Speed: 1.0ms preprocess, 53.2ms inference, 1.1ms postprocess per image 

In [None]:
# Code used to run inference using the ILSS Predictive Model
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from ultralytics import YOLO  # Import from ultralytics

# Load the trained YOLO model from ultralytics
yolo_model = YOLO(r'Path_to_YOLO_model\runs\segment\train\weights\best.pt')

# Load the trained CNN + LSTM model
model = load_model('cnn_lstm_ilss_model.h5')

# Function to extract frames from a video and crop ROI using YOLO
def extract_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform YOLO detection
        results = yolo_model(frame)[0]  # Get first result from the list

        # Access detections
        boxes = results.boxes

        if boxes is not None and len(boxes) > 0:
            # Use the first detected box
            xyxy = boxes[0].xyxy.cpu().numpy()[0]  # Get [x1, y1, x2, y2]
            x1, y1, x2, y2 = map(int, xyxy)

            # Crop and store the ROI
            cropped_frame = frame[y1:y2, x1:x2]
            frames.append(cropped_frame)

    cap.release()
    return frames


# Predict ILSS value for a new video
def predict_ilss(video_path):
    frames = extract_frames(video_path)
    frames = [cv2.resize(frame, (224, 224)) for frame in frames]
    frames = np.array(frames) / 255.0  # Normalize frames
    
    # If video frames are shorter than expected, pad them
    sequence_length = 30
    frames = pad_sequences([frames], maxlen=sequence_length, dtype='float32', padding='post', truncating='post')

    # Predict ILSS value
    ilss_value = model.predict(frames)
    return ilss_value[0][0]

# Define the video path and name
video_name = 'T1'  # Change this to your desired video name (e.g., 'T1')
video_path = r'Path_to_YOLO_model\video_dataset\\' + video_name + '.mp4'

# Predict the ILSS value
predicted_ilss = predict_ilss(video_path)
print(f"Predicted ILSS value for {video_name}: {predicted_ilss}")
