In [3]:
!pip install --upgrade scikit-learn



In [16]:
import os
import cv2
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import SGDClassifier
from joblib import Parallel, delayed, dump, load
import logging
from skimage.feature import hog

In [18]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [8]:
# Paths to data directories
commercial_frames_path = "C:\\Users\\Administrator\\Desktop\\Full_data_both_channels\\full_data_adbreak"
show_frames_path = "C:\\Users\\Administrator\\Desktop\\Full_data_both_channels\\full_data_show"

In [9]:
# Function to load images in batches from a folder
def load_images_from_folder(folder, batch_size):
    images = []
    filenames = os.listdir(folder)
    for i, filename in enumerate(filenames):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
        if len(images) == batch_size or i == len(filenames) - 1:
            yield images
            images = []


In [10]:
def calculate_color_histogram(image):
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def calculate_brightness(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    brightness = np.mean(hsv[:, :, 2])
    return brightness

def calculate_contrast(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    contrast = l.std()
    return contrast

def calculate_edge_density(image):
    edges = cv2.Canny(image, 100, 200)
    edge_density = np.mean(edges)
    return edge_density

def calculate_color_moments(image):
    moments = cv2.moments(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
    hu_moments = cv2.HuMoments(moments).flatten()
    return hu_moments

def extract_lbp(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = np.zeros_like(gray)
    for i in range(1, gray.shape[0] - 1):
        for j in range(1, gray.shape[1] - 1):
            center = gray[i, j]
            binary_string = ''
            binary_string += '1' if gray[i - 1, j - 1] > center else '0'
            binary_string += '1' if gray[i - 1, j] > center else '0'
            binary_string += '1' if gray[i - 1, j + 1] > center else '0'
            binary_string += '1' if gray[i, j + 1] > center else '0'
            binary_string += '1' if gray[i + 1, j + 1] > center else '0'
            binary_string += '1' if gray[i + 1, j] > center else '0'
            binary_string += '1' if gray[i + 1, j - 1] > center else '0'
            binary_string += '1' if gray[i, j - 1] > center else '0'
            lbp[i, j] = int(binary_string, 2)
    return lbp.flatten()

def extract_hog(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    h, _ = hog(gray, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=True)
    return h.flatten()

def calculate_optical_flow(prev_image, next_image):
    prev_gray = cv2.cvtColor(prev_image, cv2.COLOR_BGR2GRAY)
    next_gray = cv2.cvtColor(next_image, cv2.COLOR_BGR2GRAY)
    flow = cv2.calcOpticalFlowFarneback(prev_gray, next_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    return mag.flatten()

In [11]:
# Feature extraction function
def extract_features(images):
    histograms = []
    brightness = []
    contrast = []
    edge_density = []
    histogram_moments = []
    lbp = []
    hog_features = []
    optical_flow = []

    for img in images:
        histograms.append(calculate_color_histogram(img))
        brightness.append(calculate_brightness(img))
        contrast.append(calculate_contrast(img))
        edge_density.append(calculate_edge_density(img))
        histogram_moments.append(calculate_color_moments(img))
        lbp.append(extract_lbp(img))
        hog_features.append(extract_hog(img))
        optical_flow.append(calculate_optical_flow(img, img))  
    features = np.hstack([
        np.array(histograms),
        np.array(brightness)[:, np.newaxis],
        np.array(contrast)[:, np.newaxis],
        np.array(edge_density)[:, np.newaxis],
        np.array(histogram_moments),
        np.array(lbp),
        np.array(hog_features),
        np.array(optical_flow)
    ])

    return features

In [19]:
# Function to train the model incrementally
def train_incrementally(folder_path, label, batch_size=50):
    for batch_num, batch in enumerate(load_images_from_folder(folder_path, batch_size), 1):
        logging.info(f"Processing batch {batch_num}")
        features = extract_features(batch)
        labels = np.full(features.shape[0], label)
        
        # Impute and scale the features
        imputer = SimpleImputer(strategy='mean')
        scaler = StandardScaler()
        
        features_imputed = imputer.fit_transform(features)
        features_scaled = scaler.fit_transform(features_imputed)
        
        # Train the model incrementally
        model.partial_fit(features_scaled, labels, classes=np.array([0, 1]))
        logging.info(f"Finished processing batch {batch_num}")

In [13]:
# Initialize the model
model = SGDClassifier()

In [14]:
# Batch size for incremental learning
batch_size = 50

In [20]:
logging.info("Starting training on commercial frames")
train_incrementally(commercial_frames_path, label=1, batch_size=batch_size)

logging.info("Starting training on show frames")
train_incrementally(show_frames_path, label=0, batch_size=batch_size)

2024-07-27 18:13:06,606 - INFO - Starting training on commercial frames
2024-07-27 18:13:07,174 - INFO - Processing batch 1
2024-07-27 18:16:35,585 - INFO - NumExpr defaulting to 4 threads.
2024-07-27 18:16:42,298 - INFO - Finished processing batch 1
2024-07-27 18:16:42,827 - INFO - Processing batch 2
2024-07-27 18:19:36,182 - INFO - Finished processing batch 2
2024-07-27 18:19:36,679 - INFO - Processing batch 3
2024-07-27 18:22:28,014 - INFO - Finished processing batch 3
2024-07-27 18:22:28,534 - INFO - Processing batch 4
2024-07-27 18:25:21,411 - INFO - Finished processing batch 4
2024-07-27 18:25:21,903 - INFO - Processing batch 5
2024-07-27 18:28:12,376 - INFO - Finished processing batch 5
2024-07-27 18:28:12,886 - INFO - Processing batch 6
2024-07-27 18:31:03,232 - INFO - Finished processing batch 6
2024-07-27 18:31:03,732 - INFO - Processing batch 7
2024-07-27 18:33:54,092 - INFO - Finished processing batch 7
2024-07-27 18:33:54,587 - INFO - Processing batch 8
2024-07-27 18:36:45

In [24]:
# Save the model
model_filename = 'incremental_model.pkl'

In [23]:
# Print total data used for training
total_images = (commercial_frames_path + show_frames_path) 
logging.info(f"Total images used for training: {total_images}")


2024-07-30 04:24:21,011 - INFO - Total images used for training: C:\Users\Administrator\Desktop\Full_data_both_channels\full_data_adbreakC:\Users\Administrator\Desktop\Full_data_both_channels\full_data_show


In [25]:
model_filename = 'incremental_model.pkl'

# Ensure the path exists or create the path
save_path = os.path.join(os.getcwd(), model_filename)

# Save the model
logging.info("Saving the model")
dump(model, save_path)
logging.info(f"Model saved as {model_filename} at {save_path}")

2024-07-30 15:37:09,662 - INFO - Saving the model
2024-07-30 15:37:09,740 - INFO - Model saved as incremental_model.pkl at C:\Users\Administrator\Desktop\Code\incremental_model.pkl
