In [26]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import numpy as np

# Load the DeepLabv3+ model
model = torch.hub.load('pytorch/vision:v0.9.0', 'deeplabv3_resnet101', pretrained=True)
model.eval()

# Define a function to preprocess the image
def preprocess_image(image_path):
    input_image = Image.open(image_path)
    preprocess = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0)
    return input_batch

# Define a function to perform inference and get the predicted class labels
def get_predicted_labels(input_batch):
    with torch.no_grad():
        output = model(input_batch)['out'][0]
    output_predictions = output.argmax(0)
    return output_predictions.numpy()

# Define a function to map class labels to product categories
def map_labels_to_categories(label_map):
    # Define your mapping of class labels to product categories
    # For example: {0: 'background', 1: 'shoe', 2: 'bottle', 3: 'chair', ...}
    label_to_category = {
    0: 'background',
    1: 'Shoe',        # Subcategory for Shoe
    2: 'Sneaker',     # Subcategory for Sneaker
    3: 'Sandal',      # Subcategory for Sandal
    4: 'Hat',         # Subcategory for Hat
    5: 'Cap',         # Subcategory for Cap
    6: 'Sunglasses',  # Subcategory for Sunglasses
    7: 'Headphones',  # Subcategory for Headphones
    8: 'Perfume',     # Subcategory for Perfume
    9: 'Wristwatch',  # Subcategory for Wristwatch
    10: 'Glass',      # Subcategory for Glass
    11: 'Bag',        # Subcategory for Bag
    12: 'Handbag',    # Subcategory for Handbag
    13: 'Suitcase',   # Subcategory for Suitcase
    14: 'Bottle',     # Subcategory for Bottle
    15: 'Cup',        # Subcategory for Cup
    16: 'Water Bottle',  # Subcategory for Water Bottle
    17: 'Can',        # Subcategory for Can
    18: 'Jar',        # Subcategory for Jar
    19: 'Vase',       # Subcategory for Vase
    20: 'Chair',      # Subcategory for Chair
    21: 'Office Chair',  # Subcategory for Office Chair
    22: 'Couch',      # Subcategory for Couch
    }

    # Map the class labels to product categories
    category_map = np.vectorize(label_to_category.get)(label_map)
    return category_map

# Define a function to apply the product recognition filter
def apply_product_recognition(image_path):
    input_batch = preprocess_image(image_path)
    label_map = get_predicted_labels(input_batch)
    category_map = map_labels_to_categories(label_map)
    return category_map


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.9.0


In [32]:
# Example usage:
image_path = 'Custom_Datatset/Chair/Chair_3.jpg'
product_categories = apply_product_recognition(image_path)
print(product_categories)


[['background' 'background' 'background' ... 'background' 'background'
  'background']
 ['background' 'background' 'background' ... 'background' 'background'
  'background']
 ['background' 'background' 'background' ... 'background' 'background'
  'background']
 ...
 ['background' 'background' 'background' ... 'background' 'background'
  'background']
 ['background' 'background' 'background' ... 'background' 'background'
  'background']
 ['background' 'background' 'background' ... 'background' 'background'
  'background']]


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report
import PIL.Image as Image
import os
import random

# Define path to your dataset
data_dir = 'Custom_Datatset'

# List all subdirectories (categories)
categories = os.listdir(data_dir)

# Create a list to hold (image_path, category_label) tuples
all_images = []

# Loop through each category and collect image paths
for label, category in enumerate(categories):
    category_path = os.path.join(data_dir, category)
    for image_file in os.listdir(category_path):
        image_path = os.path.join(category_path, image_file)
        all_images.append((image_path, label))

# Shuffle the list of (image_path, category_label) tuples
random.shuffle(all_images)

# Define the dataset size and split ratio
dataset_size = len(all_images)
train_size = int(0.8 * dataset_size)
test_size = dataset_size - train_size

# Split dataset into train and test sets
train_images = all_images[:train_size]
test_images = all_images[train_size:]

# Define transformations for data preprocessing
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Create datasets and dataloaders
train_dataset = [(transforms.ToTensor()(Image.open(img)), label) for img, label in train_images]
test_dataset = [(transforms.ToTensor()(Image.open(img)), label) for img, label in test_images]

dataloaders = {
    'train': DataLoader(train_dataset, batch_size=64, shuffle=True),
    'test': DataLoader(test_dataset, batch_size=64, shuffle=False)
}

# Define label_to_category mapping (same as before)
label_to_category = {
    0: 'Shoe',
    1: 'Sneaker',
    2: 'Sandal',
    3: 'Hat',
    4: 'Cap',
    5: 'Sunglasses',
    6: 'Headphones',
    7: 'Perfume',
    8: 'Wristwatch',
    9: 'Glass',
    10: 'Bag',
    11: 'Handbag',
    12: 'Suitcase',
    13: 'Bottle',
    14: 'Cup',
    15: 'Water Bottle',
    16: 'Can',
    17: 'Jar',
    18: 'Vase',
    19: 'Chair',
    20: 'Office Chair',
    21: 'Couch',
    22: 'Baggage',
    23: 'Car',
    24: 'Toy'
}


# Continue with model training, evaluation, and printing classification reports as before

# Load pre-trained ResNet model
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(label_to_category))

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Function to train the model
def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)
        running_loss = 0.0
        running_corrects = 0
        for inputs, labels in dataloaders['train']:
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects.double() / train_size

        print(f' Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    return model

# Train the model
model.load_state_dict(torch.load('resnet18_finetuned.pth'))
model = train_model(model, criterion, optimizer, num_epochs=9)
torch.save(model.state_dict(), 'resnet18_finetuned2.pth')
# Function to evaluate the model
def evaluate_model(model, dataloader):
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in dataloaders['test']:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    return all_labels, all_preds

# Evaluate on training set
train_labels, train_preds = evaluate_model(model, dataloaders['train'])
print('Classification Report on Training Set:')
print(classification_report(train_labels, train_preds, target_names=label_to_category.values()))

# Evaluate on testing set
test_labels, test_preds = evaluate_model(model, dataloaders['test'])
print('Classification Report on Testing Set:')
print(classification_report(test_labels, test_preds, target_names=label_to_category.values()))




Epoch 1/100
----------
 Loss: 0.1915 Acc: 0.9396
Epoch 2/100
----------
 Loss: 0.1446 Acc: 0.9480
Epoch 3/100
----------
 Loss: 0.1166 Acc: 0.9589
Epoch 4/100
----------
 Loss: 0.0940 Acc: 0.9706
Epoch 5/100
----------
 Loss: 0.0782 Acc: 0.9757
Epoch 6/100
----------
 Loss: 0.0750 Acc: 0.9748
Epoch 7/100
----------
 Loss: 0.0664 Acc: 0.9790
Epoch 8/100
----------
 Loss: 0.0640 Acc: 0.9732
Epoch 9/100
----------


In [1]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
import pickle

# Define path to your dataset
data_dir = 'Custom_Datatset'
image_paths = []

# List all subdirectories (categories)
categories = os.listdir(data_dir)

# Create a list to hold image paths
for label, category in enumerate(categories):
    category_path = os.path.join(data_dir, category)
    for image_file in os.listdir(category_path):
        image_path = os.path.join(category_path, image_file)
        image_paths.append(image_path)

# Define transformations for data preprocessing
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load the saved ResNet18 model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=None)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 25)  # 25 classes
model.load_state_dict(torch.load('resnet18_finetuned.pth'))
model = model.to(device)
model.eval()

# Modify ResNet18 to extract features from the layer before the classification layer
feature_extractor = nn.Sequential(*list(model.children())[:-1])  # Remove the last layer (fc layer)
feature_extractor = feature_extractor.to(device)

# Function to extract features using the modified ResNet18
def extract_features(feature_extractor, image_paths, transform):
    features = []
    for image_path in image_paths:
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)
        with torch.no_grad():
            feature = feature_extractor(image)
            feature = feature.view(feature.size(0), -1)  # Flatten the feature map
            feature = nn.functional.adaptive_avg_pool1d(feature.unsqueeze(0), 256).squeeze(0)  # Adjust the feature dimension to 256
        features.append(feature.cpu().numpy().flatten())  # Flatten the features to 1D
    return features

# Extract features
resnet_features = extract_features(feature_extractor, image_paths, data_transforms)

# Generate random latent vectors (simulate StyleGAN3 latent vectors)
stylegan_latent_vectors = np.random.randn(len(image_paths), 256)

# Concatenate ResNet18 features and StyleGAN3 latent vectors
# Adjusting the size to 512 as expected by StyleGAN3
combined_features = [np.concatenate((resnet_feat, stylegan_latent), axis=0) for resnet_feat, stylegan_latent in zip(resnet_features, stylegan_latent_vectors)]

# Function to perform linear interpolation
def linear_interpolation(features, alpha=0.5):
    interpolated_features = []
    for i in range(len(features) - 1):
        interp_feat = alpha * features[i] + (1 - alpha) * features[i + 1]
        interpolated_features.append(interp_feat)
    return interpolated_features

# Perform linear interpolation on enhanced features
interpolated_features = linear_interpolation(combined_features)

# Load StyleGAN3 model
with open('~/training-runs/00064-stylegan3-r-Custom_dataset_sgan-gpus1-batch32-gamma2/network-snapshot-000004.pkl', 'rb') as f:
    G = pickle.load(f)['G_ema'].cuda()  # torch.nn.Module

# Function to generate images using StyleGAN3
def generate_enhanced_images(features):
    enhanced_images = []
    for feature in features:
        stylegan_latent = torch.tensor(feature[:512], dtype=torch.float32).unsqueeze(0).cuda()  # Ensure the latent vector is in the correct format
        img = G(stylegan_latent, None)  # Generate image
        img = (img.clamp(-1, 1) + 1) / 2 * 255  # Convert to [0, 255] range
        img = img.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8)  # Convert to HWC format and numpy array
        enhanced_images.append(Image.fromarray(img[0]))
    return enhanced_images

# Generate enhanced images
enhanced_images = generate_enhanced_images(interpolated_features)

# Save generated images to disk
output_dir = 'Enhanced_Images'
os.makedirs(output_dir, exist_ok=True)
for i, img in enumerate(enhanced_images):
    img.save(os.path.join(output_dir, f'enhanced_image_{i}.png'))

print("Enhanced images have been generated and saved.")


Setting up PyTorch plugin "bias_act_plugin"... 

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


Failed!


RuntimeError: Error building extension 'bias_act_plugin': [1/3] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output bias_act.cuda.o.d -DTORCH_EXTENSION_NAME=bias_act_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/TH -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/THC -isystem /root/anaconda3/envs/stylegan3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' --use_fast_math --allow-unsupported-compiler -std=c++17 -c /root/.cache/torch_extensions/py39_cu121/bias_act_plugin/3cb576a0039689487cfba59279dd6d46-nvidia-geforce-rtx-4080-laptop-gpu/bias_act.cu -o bias_act.cuda.o 
[31mFAILED: [0mbias_act.cuda.o 
/usr/bin/nvcc --generate-dependencies-with-compile --dependency-output bias_act.cuda.o.d -DTORCH_EXTENSION_NAME=bias_act_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/TH -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/THC -isystem /root/anaconda3/envs/stylegan3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_89,code=compute_89 -gencode=arch=compute_89,code=sm_89 --compiler-options '-fPIC' --use_fast_math --allow-unsupported-compiler -std=c++17 -c /root/.cache/torch_extensions/py39_cu121/bias_act_plugin/3cb576a0039689487cfba59279dd6d46-nvidia-geforce-rtx-4080-laptop-gpu/bias_act.cu -o bias_act.cuda.o 
nvcc fatal   : Unsupported gpu architecture 'compute_89'
[2/3] c++ -MMD -MF bias_act.o.d -DTORCH_EXTENSION_NAME=bias_act_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/TH -isystem /root/anaconda3/envs/stylegan3/lib/python3.9/site-packages/torch/include/THC -isystem /root/anaconda3/envs/stylegan3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -c /root/.cache/torch_extensions/py39_cu121/bias_act_plugin/3cb576a0039689487cfba59279dd6d46-nvidia-geforce-rtx-4080-laptop-gpu/bias_act.cpp -o bias_act.o 
ninja: build stopped: subcommand failed.


In [4]:
#just an experiment

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report
import PIL.Image as Image
import os
import random

# Define path to your dataset
data_dir = 'Custom_Datatset'

# List all subdirectories (categories)
categories = os.listdir(data_dir)

# Create a list to hold (image_path, category_label) tuples
all_images = []

# Loop through each category and collect image paths
for label, category in enumerate(categories):
    category_path = os.path.join(data_dir, category)
    for image_file in os.listdir(category_path):
        image_path = os.path.join(category_path, image_file)
        all_images.append((image_path, label))

# Shuffle the list of (image_path, category_label) tuples
random.shuffle(all_images)

# Define the dataset size and split ratio
dataset_size = len(all_images)
train_size = int(0.8 * dataset_size)
test_size = dataset_size - train_size

# Split dataset into train and test sets
train_images = all_images[:train_size]
test_images = all_images[train_size:]

# Define transformations for data preprocessing
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Create datasets and dataloaders
train_dataset = [(transforms.ToTensor()(Image.open(img)), label) for img, label in train_images]
test_dataset = [(transforms.ToTensor()(Image.open(img)), label) for img, label in test_images]

dataloaders = {
    'train': DataLoader(train_dataset, batch_size=64, shuffle=True),
    'test': DataLoader(test_dataset, batch_size=64, shuffle=False)
}

# Define label_to_category mapping (same as before)
label_to_category = {
    0: 'Shoe',
    1: 'Sneaker',
    2: 'Sandal',
    3: 'Hat',
    4: 'Cap',
    5: 'Sunglasses',
    6: 'Headphones',
    7: 'Perfume',
    8: 'Wristwatch',
    9: 'Glass',
    10: 'Bag',
    11: 'Handbag',
    12: 'Suitcase',
    13: 'Bottle',
    14: 'Cup',
    15: 'Water Bottle',
    16: 'Can',
    17: 'Jar',
    18: 'Vase',
    19: 'Chair',
    20: 'Office Chair',
    21: 'Couch',
    22: 'Baggage',
    23: 'Car',
    24: 'Toy'
}


# Continue with model training, evaluation, and printing classification reports as before

# Load pre-trained ResNet model
model = models.resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(label_to_category))

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Function to train the model
def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)
        running_loss = 0.0
        running_corrects = 0
        for inputs, labels in dataloaders['train']:
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects.double() / train_size

        print(f' Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    return model

# Train the model
model = train_model(model, criterion, optimizer, num_epochs=100)
torch.save(model.state_dict(), 'experiment_resnet18_finetuned.pth')
# Function to evaluate the model
def evaluate_model(model, dataloader):
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in dataloaders['test']:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    return all_labels, all_preds

# Evaluate on training set
train_labels, train_preds = evaluate_model(model, dataloaders['train'])
print('Classification Report on Training Set:')
print(classification_report(train_labels, train_preds, target_names=label_to_category.values()))

# Evaluate on testing set
test_labels, test_preds = evaluate_model(model, dataloaders['test'])
print('Classification Report on Testing Set:')
print(classification_report(test_labels, test_preds, target_names=label_to_category.values()))


Epoch 1/100
----------
 Loss: 3.2484 Acc: 0.0579
Epoch 2/100
----------
 Loss: 3.1623 Acc: 0.0814
Epoch 3/100
----------
 Loss: 3.0985 Acc: 0.1141
Epoch 4/100
----------
 Loss: 3.0447 Acc: 0.1426
Epoch 5/100
----------
 Loss: 2.9886 Acc: 0.1644
Epoch 6/100
----------
 Loss: 2.9298 Acc: 0.1820
Epoch 7/100
----------
 Loss: 2.8709 Acc: 0.2064
Epoch 8/100
----------
 Loss: 2.8217 Acc: 0.2232
Epoch 9/100
----------
 Loss: 2.7740 Acc: 0.2290
Epoch 10/100
----------
 Loss: 2.7064 Acc: 0.2810
Epoch 11/100
----------
 Loss: 2.6564 Acc: 0.2718
Epoch 12/100
----------
 Loss: 2.5983 Acc: 0.2953
Epoch 13/100
----------
 Loss: 2.5579 Acc: 0.2852
Epoch 14/100
----------
 Loss: 2.4924 Acc: 0.3205
Epoch 15/100
----------
 Loss: 2.4337 Acc: 0.3482
Epoch 16/100
----------
 Loss: 2.3831 Acc: 0.3582
Epoch 17/100
----------
 Loss: 2.3263 Acc: 0.3691
Epoch 18/100
----------
 Loss: 2.2787 Acc: 0.3851
Epoch 19/100
----------
 Loss: 2.2167 Acc: 0.4077
Epoch 20/100
----------
 Loss: 2.1805 Acc: 0.4262
Epoch 21/