# Load data

In [1]:
import os
import pandas as pd

from PIL import Image
import numpy as np
from sklearn.utils import shuffle, resample
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)

# Remove background

In [2]:
import cv2

def remove_background(image_path):
    # Load the image using OpenCV
    image = cv2.imread(image_path)

    # Create a mask with all zeros
    mask = np.zeros(image.shape[:2], np.uint8)

    # Define the background and foreground model using the GrabCut algorithm
    bgdModel = np.zeros((1,65), np.float64)
    fgdModel = np.zeros((1,65), np.float64)

    # Define the rectangle that contains the object of interest
    height, width = image.shape[:2]
    rect = (10, 10, width-10, height-10)

    # Apply the GrabCut algorithm to the image and mask
    cv2.grabCut(image, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)

    # Create a mask where the background is 0 and the object of interest is 1
    mask = np.where((mask==2)|(mask==0), 0, 1).astype('uint8')

    # Apply the mask to the original image to remove the background
    image = image*mask[:,:,np.newaxis]

    # Save the image without the background
    new_image_path = os.path.splitext(image_path)[0] + '_nobg.jpg'
    cv2.imwrite(new_image_path, image)

    return new_image_path


In [3]:
import os
import cv2
import pandas as pd
import numpy as np
from sklearn.utils import shuffle

data_dir = "data/Thermal"
images = []
image_paths = []
new_image_size = (256, 256)  # set the size of the new images

# Loop through each image file in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # Load the image
        image_path = os.path.join(data_dir, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Resize the image to the desired size
        resized_image = cv2.resize(image, new_image_size)

        # Flatten the image into a 1D array
        flattened_image = resized_image.flatten()

        # Append the flattened image and image path to the respective lists
        images.append(flattened_image)
        image_paths.append(image_path)

# Convert the image and image path lists to NumPy arrays
images = np.array(images)
image_paths = np.array(image_paths)

# Shuffle the data
images, image_paths = shuffle(images, image_paths, random_state=42)

# Create a DataFrame from the image and image path arrays
df = pd.DataFrame({'Image': image_paths})

# Display the DataFrame
print(df)


                              Image
0    data/Thermal/snap_1_ (119).jpg
1    data/Thermal/snap_1_ (300).jpg
2     data/Thermal/snap_1_ (25).jpg
3    data/Thermal/snap_1_ (194).jpg
4    data/Thermal/snap_1_ (143).jpg
..                              ...
425  data/Thermal/snap_1_ (212).jpg
426  data/Thermal/snap_1_ (121).jpg
427   data/Thermal/snap_1_ (30).jpg
428  data/Thermal/snap_1_ (155).jpg
429   data/Thermal/snap_1_ (62).jpg

[430 rows x 1 columns]


In [7]:
import os
import cv2
import numpy as np

data_dir = "data/Thermal"
preprocessed_dir = "preprocessed_data"
new_image_size = (256, 256)  # set the size of the new images
new_image_path = "preprocessed_data"  # Specify the new folder name
# Create the new directory if it doesn't exist
os.makedirs(new_image_path, exist_ok=True)

# Create the preprocessed data directory if it doesn't exist
os.makedirs(preprocessed_dir, exist_ok=True)

# Loop through each image file in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # Load the image
        image_path = os.path.join(data_dir, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Resize the image to the desired size
        resized_image = cv2.resize(image, new_image_size)

        # Apply image enhancement
        enhanced_image = cv2.equalizeHist(resized_image)

        # Apply noise reduction
        blurred_image = cv2.GaussianBlur(enhanced_image, (5, 5), 0)

        # Apply temperature normalization
        normalized_image = cv2.normalize(blurred_image, None, 0, 255, cv2.NORM_MINMAX)

        # Apply thresholding and segmentation
        _, thresholded_image = cv2.threshold(normalized_image, 128, 255, cv2.THRESH_BINARY)

        # Find contours in the thresholded image
        contours, _ = cv2.findContours(thresholded_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Crop the solar panel into multiple cells
        cell_images = []
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            cell_image = resized_image[y:y+h, x:x+w]
            cell_images.append(cell_image)

        # Save each cell as a separate image
        base_filename = os.path.splitext(filename)[0]
        for i, cell_image in enumerate(cell_images):
            cell_filename = f"{base_filename}_cell{i+1}.jpg"
            cell_filepath = os.path.join(new_image_path, cell_filename)
            cv2.imwrite(cell_filepath, cell_image)

# Print a message after preprocessing all the images
print("Preprocessing complete!")


Preprocessing complete!
