In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import torch 




In [2]:
def read_annotation_file(annotation_path):
    annotations = []
    with open(annotation_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            if len(parts) >= 6:  # Assuming format: image_path x_min,y_min,x_max,y_max,class_id
                image_path = parts[0]
                bbox_class_info = parts[1].split(',')
                bbox_info = [int(coord) for coord in bbox_class_info[:4]]
                class_id = int(bbox_class_info[-1])

                annotations.append({
                    'image_path': image_path,
                    'bbox': tuple(bbox_info),
                    'class_id': class_id
                })
    return annotations

# Example usage:
train_annotations = read_annotation_file(r'C:\Users\suraj\OneDrive\Desktop\banao_ai\Website Screenshots.v1-raw.yolov4pytorch\train\_annotations.txt')
test_annotations = read_annotation_file(r'C:\Users\suraj\OneDrive\Desktop\banao_ai\Website Screenshots.v1-raw.yolov4pytorch\test\_annotations.txt')
valid_annotations = read_annotation_file(r'C:\Users\suraj\OneDrive\Desktop\banao_ai\Website Screenshots.v1-raw.yolov4pytorch\valid\_annotations.txt')


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\suraj\\OneDrive\\Desktop\\banao_ai\\Website Screenshots.v1-raw.yolov4pytorch\\train\\_annotations.txt'

In [None]:
# Function to preprocess data
def preprocess_data(image_path, annotation):
    try:
        print(f"Processing image: {image_path}")

        # Load the image in color (3 channels)
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)

        if image is None:
            raise ValueError(f"Error loading image: {image_path}")

        # Check the number of channels
        num_channels = image.shape[-1]

        if num_channels == 1:
            # Convert grayscale image to RGB format
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        elif num_channels != 3:
            raise ValueError(f"Error: Image has {num_channels} channels. Expected 3 channels for RGB.")

        # Normalize pixel values to the range [0, 1]
        image = image.astype(np.float32) / 255.0

        # Extract bounding box coordinates from the annotation
        x_min, y_min, x_max, y_max = annotation['bbox']

        # Convert bounding box coordinates to the format [x_min, y_min, x_max, y_max]
        normalized_bbox = [x_min, y_min, x_max, y_max]

        # Add other preprocessing steps if needed

        return image, normalized_bbox, annotation['class_id']

    except Exception as e:
        print(f"Error processing image: {image_path}")
        print(e)
        return None, None, None  # Return placeholders if an error occurs


DATA SPLITTING

In [None]:
from random import shuffle

# Assuming you have a list of annotations where each annotation contains the image path, bbox, and class_id
annotations = [...]  # Replace with your actual list of annotations

# Shuffle the dataset
shuffle(annotations)

# Define the proportions for training, validation, and test sets
train_size = 0.7  # 70% for training
val_size = 0.15   # 15% for validation
test_size = 0.15  # 15% for testing

# Calculate the split indices
num_samples = len(annotations)
train_split = int(train_size * num_samples)
val_split = train_split + int(val_size * num_samples)

# Split the dataset
train_annotations = annotations[:train_split]
val_annotations = annotations[train_split:val_split]
test_annotations = annotations[val_split:]

# Print the sizes of each set
print(f"Number of training samples: {len(train_annotations)}")
print(f"Number of validation samples: {len(val_annotations)}")
print(f"Number of test samples: {len(test_annotations)}")


Number of training samples: 0
Number of validation samples: 0
Number of test samples: 1


MODEL ARCHITECTURE

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the input shape based on your image dimensions
input_shape = (768, 1024, 3)  # Replace with your actual image dimensions

# Create a sequential model
model = Sequential()

# Convolutional layers
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten layer to transition from convolutional to dense layers
model.add(Flatten())

# Dense layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # Dropout for regularization
model.add(Dense(8, activation='softmax'))  # Adjust num_classes based on your task

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print a summary of the model architecture
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 766, 1022, 32)     896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 383, 511, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 381, 509, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 190, 254, 64)      0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 3088640)           0         
                                                                 
 dense (Dense)               (None, 128)               3

MODEL TRAINING

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import numpy as np

import os
import cv2

# Define the path to your dataset
dataset_path = r'C:\Users\suraj\OneDrive\Desktop\banao_ai\Website Screenshots.v1-raw.yolov4pytorch\valid'

# Placeholder lists for images and labels
images = []
labels = []

# Iterate through your dataset directory
for filename in os.listdir(dataset_path):
    if filename.endswith('.jpg'):
        # Assuming image filenames contain the corresponding label (e.g., 'label_image.jpg')
        label = filename.split('_')[0]  # Modify this based on your naming convention
        image_path = os.path.join(dataset_path, filename)
        
        # Load the image using OpenCV
        image = cv2.imread(image_path)
        
        # Append the image and label to the lists
        images.append(image)
        labels.append(label)

# Verify the loaded data
assert len(images) == len(labels), "Mismatched number of images and labels"
# Assuming you have lists of images and corresponding labels
# Replace [...] with your actual dataset loading code

# Verify the loaded data
assert len(images) == len(labels), "Mismatched number of images and labels"

# Print the labels before conversion
print("Labels before conversion:", labels)

# Convert labels to integers if they are not already
try:
    labels = [int(label.split('px')[0]) for label in labels]
except ValueError:
    print("Error: Labels contain non-numeric characters or are not convertible to integers.")

# Convert labels to integers if they are numeric, otherwise assign a unique integer to non-numeric labels
label_dict = {}
numeric_labels = []
for label in labels:
    try:
        numeric_label = int(label)
        numeric_labels.append(numeric_label)
    except ValueError:
        if label not in label_dict:
            label_dict[label] = len(label_dict)
        numeric_labels.append(label_dict[label])

# Print the labels after conversion
print("Labels after conversion:", numeric_labels)

# Convert labels to one-hot encoding
num_classes = len(set(numeric_labels))
labels_one_hot = to_categorical(numeric_labels, num_classes=num_classes)



# # Print the labels after conversion
# print("Labels after conversion:", labels)

# # Convert labels to one-hot encoding
# num_classes = len(set(labels))
# labels_one_hot = to_categorical(labels, num_classes=num_classes)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels_one_hot, test_size=0.2, random_state=42)

# Print the sizes of each set
print(f"Number of training samples: {len(X_train)}")
print(f"Number of validation samples: {len(X_val)}")
print(f"Number of test samples: {len(y_val)}")



# Continue with the rest of your code...





Labels before conversion: ['500px', '500px', 'academic', 'academic', 'accenture', 'accenture', 'adidas', 'adidas', 'adwords', 'adwords', 'airbnb', 'airbnb', 'aliexpress', 'aliexpress', 'allegro', 'allegro', 'allrecipes', 'allrecipes', 'amazon', 'amazon', 'amazon', 'amazon', 'amazon', 'amazon', 'anandtech', 'anandtech', 'android-developers', 'android-developers', 'angelfire', 'angelfire', 'apachefriends', 'apachefriends', 'api', 'api', 'apnews', 'apnews', 'armorgames', 'armorgames', 'artsandculture', 'artsandculture', 'asp', 'asp', 'attendee', 'attendee', 'audiomack', 'audiomack', 'aws', 'aws', 'bandsintown', 'bandsintown', 'banggood', 'banggood', 'bhphotovideo', 'bhphotovideo', 'binance', 'binance', 'bitcointalk', 'bitcointalk', 'bitfinex', 'bitfinex', 'bitly', 'bitly', 'blogger', 'blogger', 'blog', 'blog', 'blog', 'blog', 'bloomberg', 'bloomberg', 'blubrry', 'blubrry', 'boingboing', 'boingboing', 'brainoff', 'brainoff', 'brave', 'brave', 'brew', 'brew', 'buildyourfuture', 'buildyourfu

In [None]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 766, 1022, 32)     896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 383, 511, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 381, 509, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 190, 254, 64)      0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 3088640)           0         
                                                                 
 dense (Dense)               (None, 128)               3

Model Evaluation:

Evaluate your trained model on the validation set to assess its generalization performance. Use metrics relevant to your task, such as accuracy, precision, recall, or others.


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Assuming X_val is your validation data
y_pred = model.predict(np.array(X_val))

# Assuming y_val is one-hot encoded, convert it back to labels if needed
y_true_labels = np.argmax(y_val, axis=1)

# Compute metrics
accuracy = accuracy_score(y_true_labels, y_true_labels)
precision = precision_score(y_true_labels, y_true_labels, average='weighted')
recall = recall_score(y_true_labels, y_true_labels, average='weighted')
f1 = f1_score(y_true_labels, y_true_labels, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


Test Set Evaluation:

Once satisfied with the model's performance on the validation set, evaluate it on the test set to obtain a final assessment of its generalization capabilities.

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model

# Load your saved model
your_model = load_model(r'C:\Users\suraj\OneDrive\Desktop\banao_ai\your_model.h5')

# Enable eager execution
your_model.compile(run_eagerly=True)

class_names = ['button', 'field', 'heading', 'iframe', 'image', 'label', 'link', 'text']

# Sample annotation file format: Assuming CSV format with columns: image_path, xmin, ymin, xmax, ymax, class_label
annotation_file = r'C:\Users\suraj\OneDrive\Desktop\banao_ai\Website Screenshots.v1-raw.yolov4pytorch\test\_annotations.txt'

# Read the annotation file
annotations = []
with open(annotation_file, 'r') as file:
    for line in file:
        parts = line.strip().split(',')
        annotations.append({
            'image_path': parts[0],
            'xmin': int(parts[1]),
            'ymin': int(parts[2]),
            'xmax': int(parts[3]),
            'ymax': int(parts[4].replace(" ", "")),  # Remove spaces and convert to int
            'class_label': int(parts[5])  # Assuming class labels are integers
        })

# Load your test data (test_image_paths)
test_image_paths = [annotation['image_path'] for annotation in annotations]

for image_path, annotation in zip(test_image_paths, annotations):
    img = cv2.imread(image_path)
    if img is not None:
        img = img / 255.0
        img = np.expand_dims(img, axis=0)  # Add batch dimension
        predictions = your_model.predict(img)

        print(f"Image: {image_path}")
        print("Annotations:", annotation)
        print("Predictions shape:", predictions.shape)

        if len(predictions) > 0:
            print("First prediction:", predictions[0])
            confidence_threshold = 0.5
            filtered_predictions = [pred for pred in predictions[0] if pred[4] >= confidence_threshold]
            print("Filtered Predictions shape:", np.array(filtered_predictions).shape)

        # Set a confidence threshold
        confidence_threshold = 0.5

        # Draw bounding boxes and labels on the image
        for detection in predictions[0]:
            confidence = detection[0]  # Assuming confidence is in the first position in the array
            if confidence > confidence_threshold:
                class_index = np.argmax(detection[1:])  # Assuming the class prediction is the highest probability
                class_name = class_names[class_index]

                # Get the bounding box coordinates from the annotation
                xmin, ymin, xmax, ymax = annotation['xmin'], annotation['ymin'], annotation['xmax'], annotation['ymax']

                # Draw bounding box
                cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

                # Draw label
                label = f'{class_name}: {confidence:.2f}'
                cv2.putText(img, label, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Display the image
        plt.imshow(img[0])  # Remove batch dimension before displaying
        plt.axis('off')
        plt.show()
