In [1]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)


In [17]:
%matplotlib inline

In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import torch
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification, DeiTForImageClassification, ViTFeatureExtractor, DeiTFeatureExtractor
from transformers import AdamW
from transformers import get_scheduler
from transformers import AutoTokenizer
from transformers import TrainingArguments, Trainer
from PIL import Image
from torchvision import transforms

In [3]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
data_dir = '/content/drive/My Drive/slash data'

In [12]:
batch_size = 128
image_size = (150, 150)
num_classes = 8
epochs = 10

In [13]:
# Data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

Found 2088 images belonging to 8 classes.


1523 image from the app and 565 scraped from internet

# CNN archticture approach

In [14]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(num_classes, activation='softmax')
])

In [15]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [9]:
import tensorflow as tf
tf.test.gpu_device_name()


'/device:GPU:0'

In [16]:
history = model.fit(
        train_generator,
        epochs=epochs,
        steps_per_epoch=train_generator.samples // batch_size,
        verbose=1
    )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
# Evaluate the model
loss, accuracy = model.evaluate(train_generator)
print("Test Accuracy:", accuracy)

 1/17 [>.............................] - ETA: 45s - loss: 1.3105 - accuracy: 0.5312



Test Accuracy: 0.5507662892341614


# Residual Archticture

In [19]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, BatchNormalization, ReLU, add, GlobalAveragePooling2D, Dense
def residual_block(input_layer, filters, kernel_size=(3, 3), strides=(1, 1), activation='relu'):
    x = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(input_layer)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(x)
    x = BatchNormalization()(x)
    if activation is not None:
        x = add([input_layer, x])
        x = ReLU()(x)
    return x

In [21]:
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, MaxPooling2D, GlobalAveragePooling2D, Dense, Add
from tensorflow.keras.models import Model

def residual_block(x, filters, strides=(1, 1)):
    shortcut = x
    x = Conv2D(filters, kernel_size=(3, 3), strides=strides, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)

    if strides != (1, 1) or shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, kernel_size=(1, 1), strides=strides, padding='same')(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([x, shortcut])
    x = ReLU()(x)
    return x

# ResNet architecture
inputs = Input(shape=(image_size[0], image_size[1], 3))
x = Conv2D(64, (7, 7), strides=(2, 2), padding='same')(inputs)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

# Residual blocks
x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 64)

x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

x = residual_block(x, 128, strides=(2, 2))
x = residual_block(x, 128)
x = residual_block(x, 128)
x = residual_block(x, 128)

x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

x = residual_block(x, 256, strides=(2, 2))
x = residual_block(x, 256)
x = residual_block(x, 256)
x = residual_block(x, 256)
x = residual_block(x, 256)
x = residual_block(x, 256)

x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

x = residual_block(x, 512, strides=(2, 2))
x = residual_block(x, 512)
x = residual_block(x, 512)

x = GlobalAveragePooling2D()(x)
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs, outputs)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 150, 150, 3)]        0         []                            
                                                                                                  
 conv2d_15 (Conv2D)          (None, 75, 75, 64)           9472      ['input_2[0][0]']             
                                                                                                  
 batch_normalization_9 (Bat  (None, 75, 75, 64)           256       ['conv2d_15[0][0]']           
 chNormalization)                                                                                 
                                                                                                  
 re_lu_8 (ReLU)              (None, 75, 75, 64)           0         ['batch_normalization_9[0]

In [22]:
# Train the model
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=train_generator.samples // batch_size,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
# Evaluate the model
loss, accuracy = model.evaluate(train_generator)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.16714559495449066


# Fine Tune VGG16

In [24]:
# Load pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [26]:
# Freeze convolutional layers
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, MaxPooling2D, GlobalAveragePooling2D, Dense, Add, Dropout
for layer in base_model.layers:
    layer.trainable = False

model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

In [27]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [28]:
# Train the model
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=train_generator.samples // batch_size,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
# Evaluate the model
loss, accuracy = model.evaluate(train_generator)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.7998084425926208


# Fine Tune Resnet50

In [30]:
# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [31]:
# Freeze the layers except the last few layers
for layer in base_model.layers[:-10]:
    layer.trainable = False

In [32]:
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dense(num_classes, activation='softmax')
])

In [33]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [34]:
# Train the model
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=train_generator.samples // batch_size,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [35]:
# Evaluate the model
loss, accuracy = model.evaluate(train_generator)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.3103448152542114


# Fine Tune Inception V3

In [36]:
# Load pre-trained InceptionV3 model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [37]:
for layer in base_model.layers[:-10]:
    layer.trainable = False

In [38]:
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dense(num_classes, activation='softmax')
])

In [39]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [40]:
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=train_generator.samples // batch_size,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [41]:
# Evaluate the model
loss, accuracy = model.evaluate(train_generator)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.7792145609855652


inception has best generalization and best inferance

In [42]:
model.save_weights('/content/drive/My Drive/slash data/my_model_weights.h5')

# Vision Transformers based (vit model)

In [4]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import ViTForImageClassification, ViTFeatureExtractor
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
data_path = '/content/drive/My Drive/slash data'
class_folders = os.listdir(data_path)
class_labels = {label: index for index, label in enumerate(class_folders)}

images = []
labels = []
for class_folder in class_folders:
    class_path = os.path.join(data_path, class_folder)
    for img_file in os.listdir(class_path):
        img_path = os.path.join(class_path, img_file)
        if not os.path.exists(img_path):
            print(f"Image not found: {img_path}")
        else:
            img = cv2.imread(img_path)
            if img is None:
                print(f"Error loading image: {img_path}")
            else:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (224, 224))
                images.append(img)
                labels.append(class_labels[class_folder])



X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

Error loading image: /content/drive/My Drive/slash data/Artifacts/vector.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector13.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector2.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector4.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector12.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector7.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector3.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector5.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector10.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector11.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector6.svg
Error loading image: /content/drive/My Drive/slash data/Artifacts/vector8.svg
Error loading image: /content/drive/My Drive/slash data/Artif

In [5]:
# Define dataset class
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize if required
])

# Create datasets and dataloaders
train_dataset = CustomDataset(X_train, y_train, transform=transform)
test_dataset = CustomDataset(X_test, y_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
# Load pre-trained ViT model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k')


num_classes = 8
model.classifier = nn.Linear(model.classifier.in_features, num_classes)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        logits = outputs.logits  # Extract logits from ImageClassifierOutput


        loss = criterion(logits, labels)


        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss}")

Epoch 1/10, Loss: 1.2984339373452323
Epoch 2/10, Loss: 0.4001759106204623
Epoch 3/10, Loss: 0.14325493120011829
Epoch 4/10, Loss: 0.08113635962917691
Epoch 5/10, Loss: 0.06111705150632631
Epoch 6/10, Loss: 0.08401988205455599
Epoch 7/10, Loss: 0.03091327017616658
Epoch 8/10, Loss: 0.024479531372586887
Epoch 9/10, Loss: 0.02159068722810064
Epoch 10/10, Loss: 0.015464093287785847


In [8]:

model.eval()

total_correct = 0
total_samples = 0


with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)


        outputs = model(images)


        predicted = torch.argmax(outputs.logits, dim=1)


        total_samples += labels.size(0)


        total_correct += (predicted == labels).sum().item()


accuracy = total_correct / total_samples
print("Test Accuracy:", accuracy)


Test Accuracy: 0.9833333333333333


transformers based is the best model to save for deployment and thats make sense that it trained by google and trained on a huge data

In [10]:

model_weights_path = 'model_weights.pth'

# Save the model weights
torch.save(model.state_dict(), model_weights_path)
