# Install OpenCV

In [1]:
!pip install opencv-python
!pip install pytesseract



In [41]:
import cv2
import os
import re
import numpy as np
import pytesseract
import matplotlib.pyplot as plt
from pytesseract import Output
from PIL import Image, ImageOps
from datetime import datetime, timedelta
from IPython.display import clear_output, display

# Load Video Frames & apply interpolation

In [19]:
# Define the path to the directory containing the images
image_directory = 'Thermal Placement 1'
image_files = os.listdir(image_directory)

# Define your ROI coordinates (x, y, w, h)
roi_hottest = (279, 43, 39, 20)
roi_coldest = (278, 175, 41, 22)

def clean_temperature(ocr_result):
    """Clean the OCR result to extract temperature."""
    cleaned_text = re.sub(r'[^\d.]+', '', ocr_result)
    if not cleaned_text or not re.match(r'^\d{1,2}\.\d$', cleaned_text):
        return 'Invalid'
    temp_value = float(cleaned_text)
    if 13.0 <= temp_value <= 35.0:
        return cleaned_text
    else:
        return 'Invalid'

def is_time_acceptable(timestamp_str, threshold_hour=11):
    """Check if the time of the frame is after the threshold hour."""
    frame_time = datetime.strptime(timestamp_str, '%Y%m%d-%H%M%S').time()
    return frame_time.hour >= threshold_hour

def interpolate_or_fill(temps):
    """Interpolate or fill missing ('Invalid') temperature values."""
    valid_indices = [i for i, x in enumerate(temps) if x != 'Invalid']
    for i in range(len(temps)):
        if temps[i] == 'Invalid':
            prev_index = max([j for j in valid_indices if j < i], default=None)
            next_index = min([j for j in valid_indices if j > i], default=None)
            
            if prev_index is not None and next_index is not None:
                # Interpolate
                prev_val, next_val = float(temps[prev_index]), float(temps[next_index])
                temps[i] = f"{(prev_val + next_val) / 2:.1f}"
            elif prev_index is not None:
                # Backward fill
                temps[i] = temps[prev_index]
            elif next_index is not None:
                # Forward fill
                temps[i] = temps[next_index]
    return temps

all_results = []

for image_name in image_files:
    if image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
        base_name = os.path.splitext(image_name)[0]
        timestamp_str = base_name.split('_')[1]
        frame_datetime = datetime.strptime(timestamp_str, '%Y%m%d-%H%M%S')

        image_path = os.path.join(image_directory, image_name)
        image = Image.open(image_path)

        cropped_hottest = image.crop((roi_hottest[0], roi_hottest[1], roi_hottest[0] + roi_hottest[2], roi_hottest[1] + roi_hottest[3]))
        gray_hottest = ImageOps.grayscale(cropped_hottest)
        text_hottest = pytesseract.image_to_string(gray_hottest)

        cropped_coldest = image.crop((roi_coldest[0], roi_coldest[1], roi_coldest[0] + roi_coldest[2], roi_coldest[1] + roi_coldest[3]))
        gray_coldest = ImageOps.grayscale(cropped_coldest)
        text_coldest = pytesseract.image_to_string(gray_coldest)

        hottest_temp = clean_temperature(text_hottest)
        coldest_temp = clean_temperature(text_coldest)

        all_results.append((frame_datetime, hottest_temp, coldest_temp))

sorted_results = sorted(all_results, key=lambda x: x[0])

# Separate hottest and coldest for interpolation/filling
hottest_temps = [x[1] for x in sorted_results]
coldest_temps = [x[2] for x in sorted_results]

# Apply interpolation/filling
interpolated_hottest = interpolate_or_fill(hottest_temps)
interpolated_coldest = interpolate_or_fill(coldest_temps)

# Combine back and print results
for i, (datetime_obj, _, _) in enumerate(sorted_results):
    print(f"{interpolated_hottest[i]}")

29.0
28.3
28.4
28.5
28.4
28.6
28.9
29.0
29.2
29.3
29.5
29.3
29.2
28.4
28.4
27.6
30.4
30.4
30.4
30.1
29.1
29.9
30.7
33.0
32.9
32.8
33.0
32.8
32.6
32.4
31.9
30.7
30.9
30.5
28.4
28.4
28.4
28.6
28.9
28.8
28.8
28.5
28.7
28.9
28.3
28.3
28.4
28.7
28.4
28.3
28.8
29.3
29.4
28.8
29.5
29.7
30.4
30.3
30.3
30.3
30.3
30.3
30.3
29.3
29.0
28.5
28.4
28.4
28.7
29.0
28.4
28.7
28.4
29.0
29.4
30.2
30.4
30.3
30.2
29.4
29.6
28.4
27.1
25.9
25.9
25.3
24.7
25.4
26.0
25.4
25.4
24.7
24.8
24.8
24.8
24.9
24.1
24.6
23.7
23.8
23.8
23.8
23.6
23.6
23.6
23.6
23.5
23.5
23.5
23.5
24.1
24.1
24.1
24.1
24.8
26.5
26.5
26.5
26.5
26.5
28.2
27.6
26.6
26.6
25.7
26.5
26.5
27.4
28.2
28.4
28.3
28.5
28.2
28.2
28.6
29.3
29.4
28.8
29.0
29.4
29.3
28.4
28.5
29.5
29.3
29.6
29.3
29.4
29.4
29.5
29.4
29.7
30.0
29.9
29.7
29.3
28.4
28.4
28.3
27.3
27.8
27.8
28.3
28.3
28.4
23.0
28.5


# Define ROI for OCR

In [42]:
# Define the path to the directory containing the images
image_directory = 'Thermal Placement 1'
image_files = os.listdir(image_directory)

# Define your ROI coordinates (x, y, w, h)
roi_hottest = (279, 43, 39, 20)
roi_coldest = (278, 175, 41, 22)

# Filter out incontinous timeframe 

In [43]:
def is_interval_close_to_10_minutes(time1, time2):
    """Check if the interval between two times is approximately 10 minutes."""
    interval = abs(time2 - time1)
    return timedelta(minutes=9, seconds=30) < interval < timedelta(minutes=10, seconds=30)

# Clean up OCR and get invalid value

In [44]:
def clean_temperature(ocr_result):
    """Clean the OCR result to extract temperature."""
    cleaned_text = re.sub(r'[^\d.]+', '', ocr_result)
    if not cleaned_text or not re.match(r'^\d{1,2}\.\d$', cleaned_text):
        return 'Invalid'
    temp_value = float(cleaned_text)
    if 13.0 <= temp_value <= 35.0:
        return cleaned_text
    else:
        return 'Invalid'

all_results = []

# Apply Median and replace invalid value

In [38]:
for image_name in image_files:
    if image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
        base_name = os.path.splitext(image_name)[0]
        timestamp_str = base_name.split('_')[1]
        frame_datetime = datetime.strptime(timestamp_str, '%Y%m%d-%H%M%S')

        image_path = os.path.join(image_directory, image_name)
        image = Image.open(image_path)

        cropped_hottest = image.crop((roi_hottest[0], roi_hottest[1], roi_hottest[0] + roi_hottest[2], roi_hottest[1] + roi_hottest[3]))
        gray_hottest = ImageOps.grayscale(cropped_hottest)
        text_hottest = pytesseract.image_to_string(gray_hottest)
        
        cropped_coldest = image.crop((roi_coldest[0], roi_coldest[1], roi_coldest[0] + roi_coldest[2], roi_coldest[1] + roi_coldest[3]))
        gray_coldest = ImageOps.grayscale(cropped_coldest)
        text_coldest = pytesseract.image_to_string(gray_coldest)

        hottest_temp = clean_temperature(text_hottest)
        coldest_temp = clean_temperature(text_coldest)

        all_results.append((frame_datetime, hottest_temp, coldest_temp))

# Sort the results by datetime
sorted_results = sorted(all_results, key=lambda x: x[0])

# Filter sorted_results to only include frames captured every ~10 minutes
filtered_results = []
for i in range(1, len(sorted_results)):
    if is_interval_close_to_10_minutes(sorted_results[i-1][0], sorted_results[i][0]):
        filtered_results.append(sorted_results[i-1])

# Add the last item if it's approximately 10 minutes from the previous
if len(sorted_results) > 1 and is_interval_close_to_10_minutes(sorted_results[-2][0], sorted_results[-1][0]):
    filtered_results.append(sorted_results[-1])

# Now, filtered_results contains frames at roughly 10-minute intervals. Proceed with these.

# Extract valid temperatures for global median calculation using filtered_results...
valid_hottest = [float(temp) for _, temp, _ in filtered_results if temp != 'Invalid']
valid_coldest = [float(temp) for _, _, temp in filtered_results if temp != 'Invalid']

global_median_hottest = np.median(valid_hottest) if valid_hottest else 'Invalid'
global_median_coldest = np.median(valid_coldest) if valid_coldest else 'Invalid'

for i, (date, hottest, coldest) in enumerate(filtered_results):
    if hottest == 'Invalid':
        hottest = f"{global_median_hottest:.1f}" if global_median_hottest != 'Invalid' else 'Invalid'
    if coldest == 'Invalid':
        coldest = f"{global_median_coldest:.1f}" if global_median_coldest != 'Invalid' else 'Invalid'
    filtered_results[i] = (date, hottest, coldest)

print(f"Total Results After Filtering: {len(filtered_results)}")

for result in filtered_results:
    frame_number = result[0].strftime('Frame %Y%m%d%H%M%S')
    hottest = result[1]
    coldest = result[2]
    print(f"{hottest}")

Total Results After Filtering: 166
28.3
28.8
28.5
28.4
28.6
28.9
28.8
29.2
29.3
29.5
29.3
29.2
28.8
28.8
27.6
30.4
28.8
30.4
30.1
29.1
28.8
30.7
33.0
28.8
32.8
33.0
32.8
28.8
32.4
31.9
30.7
30.9
30.5
28.4
28.8
28.4
28.6
28.9
28.8
28.8
28.5
28.7
28.9
28.3
28.3
28.4
28.7
28.4
28.3
28.8
29.3
29.4
28.8
29.5
29.7
30.4
30.3
30.3
30.3
30.3
30.3
30.3
29.3
29.0
28.5
28.8
28.4
28.8
29.0
28.4
28.7
28.4
29.0
29.4
30.2
30.4
28.8
30.2
29.4
29.6
28.4
28.8
25.9
25.9
28.8
24.7
28.8
26.0
28.8
28.8
24.7
28.8
28.8
28.8
24.9
24.1
24.6
23.7
28.8
28.8
23.8
28.8
28.8
28.8
28.8
23.5
28.8
28.8
23.5
28.8
28.8
28.8
28.8
24.8
28.8
28.8
28.8
28.8
28.8
28.2
27.6
28.8
28.8
25.7
28.8
28.8
27.4
28.2
28.4
28.3
28.5
28.2
28.2
28.6
29.3
29.4
28.8
29.0
29.4
29.3
28.4
28.5
29.5
29.3
29.6
29.3
29.4
29.4
29.5
29.4
29.7
30.0
28.8
29.7
29.3
28.4
28.4
28.3
27.3
28.8
28.8
28.3
28.3
28.4
23.0
28.5


# Apply interpolation & replace invalid value

In [46]:
def interpolate_temperatures(temps):
    """Interpolate 'Invalid' temperatures in a list of temperatures."""
    valid_indices = [i for i, temp in enumerate(temps) if temp != 'Invalid']
    for i in range(len(temps)):
        if temps[i] == 'Invalid':
            prev_indices = [j for j in valid_indices if j < i]
            next_indices = [j for j in valid_indices if j > i]
            if prev_indices and next_indices:
                prev_index = max(prev_indices)
                next_index = min(next_indices)
                prev_temp = float(temps[prev_index])
                next_temp = float(temps[next_index])
                temps[i] = str((prev_temp + next_temp) / 2)
    return temps

all_results = []

for image_name in image_files:
    if image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
        base_name = os.path.splitext(image_name)[0]
        timestamp_str = base_name.split('_')[1]
        frame_datetime = datetime.strptime(timestamp_str, '%Y%m%d-%H%M%S')

        image_path = os.path.join(image_directory, image_name)
        image = Image.open(image_path)

        cropped_hottest = image.crop((roi_hottest[0], roi_hottest[1], roi_hottest[0] + roi_hottest[2], roi_hottest[1] + roi_hottest[3]))
        gray_hottest = ImageOps.grayscale(cropped_hottest)
        text_hottest = pytesseract.image_to_string(gray_hottest)
        
        cropped_coldest = image.crop((roi_coldest[0], roi_coldest[1], roi_coldest[0] + roi_coldest[2], roi_coldest[1] + roi_coldest[3]))
        gray_coldest = ImageOps.grayscale(cropped_coldest)
        text_coldest = pytesseract.image_to_string(gray_coldest)

        hottest_temp = clean_temperature(text_hottest)
        coldest_temp = clean_temperature(text_coldest)

        all_results.append((frame_datetime, hottest_temp, coldest_temp))

# Sort the results by datetime
sorted_results = sorted(all_results, key=lambda x: x[0])

# Apply filtering for ~10-minute intervals
filtered_results = []
for i in range(1, len(sorted_results)):
    if is_interval_close_to_10_minutes(sorted_results[i-1][0], sorted_results[i][0]):
        filtered_results.append(sorted_results[i-1])
if len(sorted_results) > 1 and is_interval_close_to_10_minutes(sorted_results[-2][0], sorted_results[-1][0]):
    filtered_results.append(sorted_results[-1])

# Separate temperatures and interpolate invalid values for filtered results
hottest_temps = [result[1] for result in filtered_results]
coldest_temps = [result[2] for result in filtered_results]
interpolated_hottest = interpolate_temperatures(hottest_temps)
interpolated_coldest = interpolate_temperatures(coldest_temps)

# Update filtered_results with interpolated temperatures
for i, _ in enumerate(filtered_results):
    filtered_results[i] = (filtered_results[i][0], interpolated_hottest[i], interpolated_coldest[i])

print(f"Total Results After Filtering and Interpolation: {len(filtered_results)}")
for result in filtered_results:
    frame_datetime = result[0].strftime('%Y-%m-%d %H:%M:%S')
    # Ensure temperatures are floats, then format to one decimal place
    hottest = float(result[1]) if result[1] != 'Invalid' else result[1]
    coldest = float(result[2]) if result[2] != 'Invalid' else result[2]
    
    hottest_str = f"{hottest:.1f}" if hottest != 'Invalid' else hottest
    coldest_str = f"{coldest:.1f}" if coldest != 'Invalid' else coldest
    
    print(f" {hottest_str}")

Total Results After Filtering and Interpolation: 166
 28.3
 28.4
 28.5
 28.4
 28.6
 28.9
 29.0
 29.2
 29.3
 29.5
 29.3
 29.2
 28.4
 28.4
 27.6
 30.4
 30.4
 30.4
 30.1
 29.1
 29.9
 30.7
 33.0
 32.9
 32.8
 33.0
 32.8
 32.6
 32.4
 31.9
 30.7
 30.9
 30.5
 28.4
 28.4
 28.4
 28.6
 28.9
 28.8
 28.8
 28.5
 28.7
 28.9
 28.3
 28.3
 28.4
 28.7
 28.4
 28.3
 28.8
 29.3
 29.4
 28.8
 29.5
 29.7
 30.4
 30.3
 30.3
 30.3
 30.3
 30.3
 30.3
 29.3
 29.0
 28.5
 28.4
 28.4
 28.7
 29.0
 28.4
 28.7
 28.4
 29.0
 29.4
 30.2
 30.4
 30.3
 30.2
 29.4
 29.6
 28.4
 27.1
 25.9
 25.9
 25.3
 24.7
 25.4
 26.0
 25.4
 25.4
 24.7
 24.8
 24.8
 24.8
 24.9
 24.1
 24.6
 23.7
 23.8
 23.8
 23.8
 23.6
 23.6
 23.6
 23.6
 23.5
 23.5
 23.5
 23.5
 24.1
 24.1
 24.1
 24.1
 24.8
 26.5
 26.5
 26.5
 26.5
 26.5
 28.2
 27.6
 26.6
 26.6
 25.7
 26.5
 26.5
 27.4
 28.2
 28.4
 28.3
 28.5
 28.2
 28.2
 28.6
 29.3
 29.4
 28.8
 29.0
 29.4
 29.3
 28.4
 28.5
 29.5
 29.3
 29.6
 29.3
 29.4
 29.4
 29.5
 29.4
 29.7
 30.0
 29.9
 29.7
 29.3
 28.4
 28.4
 28.3