In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import unicodedata

In [None]:
# Define paths for input and output folder
input_folder = 'data/rawData'
output_folder = 'data/preprocessedData'

Displaying first few images

In [None]:
files = os.listdir(input_folder)

# Filter for common image file extensions
image_extensions = ('.png', '.jpg', '.jpeg')
image_files = [f for f in files if f.lower().endswith(image_extensions)]

# Sort the image files to ensure consistent order
image_files.sort()

# Display the first few images
num_images_to_display = 5 
for i, image_file in enumerate(image_files[:num_images_to_display]):
    image_path = os.path.join(input_folder, image_file)
    try:
        img = mpimg.imread(image_path)
        plt.imshow(img)
        plt.title(f'Image {i+1}: {image_file}')
        plt.axis('off')
        plt.show()
    except Exception as e:
        print(f"Error opening {image_file}: {e}")


## Pre-processing

In [None]:
# Renaming the file names to an appropriate format
def rename_files(directory):
    existing_filenames = set()

    for filename in os.listdir(directory):
        # Normalize the filename to ASCII
        normalized_filename = unicodedata.normalize('NFKD', filename).encode('ascii', 'ignore').decode('ascii')
        # Replace spaces with underscores and remove special characters
        sanitized_filename = ''.join(c if c.isalnum() or c in (' ', '.', '_') else '_' for c in normalized_filename)
        # Replace spaces with underscores
        sanitized_filename = sanitized_filename.replace(' ', '_')

        # Ensure the filename is unique within the directory
        original_sanitized_filename = sanitized_filename
        counter = 1
        while sanitized_filename in existing_filenames or os.path.exists(os.path.join(directory, sanitized_filename)):
            name, ext = os.path.splitext(original_sanitized_filename)
            sanitized_filename = f"{name}_{counter}{ext}"
            counter += 1

        existing_filenames.add(sanitized_filename)

        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, sanitized_filename)
        os.rename(old_path, new_path)
        print(f'Renamed: {old_path} -> {new_path}')

input_folder = 'data/rawData' 
rename_files(input_folder)

Resize and remove noise from the images

In [None]:
# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Loop through images in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # Construct the full path to the image
        img_path = os.path.join(input_folder, filename)
        
        # Read the image
        img = cv2.imread(img_path)
        
        # Check if the image was loaded successfully
        if img is not None:
            # Resize image to 256x256
            img_resized = cv2.resize(img, (256, 256))
            
            # Apply Gaussian blur for noise reduction
            img_denoised = cv2.GaussianBlur(img_resized, (5, 5), 0)
            
            # Save the preprocessed image
            cv2.imwrite(os.path.join(output_folder, filename), img_denoised)
        else:
            print(f"Warning: Unable to load image {img_path}. It may be corrupted or in an unsupported format.")


In [None]:
# pip install torch torchvision matplotlib numpy


In [None]:
# import os
# from PIL import Image
# from torch.utils.data import Dataset
# import torchvision.transforms as transforms

# class UnlabeledImageDataset(Dataset):
#     def __init__(self, image_dir, transform=None):
#         self.image_dir = image_dir
#         # self.image_paths = [
#             os.path.join(image_dir, fname)
#             for fname in os.listdir(image_dir)
#             if fname.lower().endswith(('.png', '.jpg', '.jpeg'))
#         ]
#         self.transform = transform

#     def __len__(self):
#         return len(self.image_paths)

#     def __getitem__(self, idx):
#         img_path = self.image_paths[idx]
#         image = Image.open(img_path).convert('RGB')
#         if self.transform:
#             image = self.transform(image)
#         return image

# # Parameters
# dataroot = "data/preprocessedData"
# batch_size = 128
# image_size = 64
# workers = 2

# # Transformations
# transform = transforms.Compose([
#     transforms.Resize(image_size),
#     transforms.CenterCrop(image_size),
#     transforms.ToTensor(),
#     transforms.Normalize((0.5,), (0.5,)),
# ])

# # Dataset and DataLoader
# dataset = UnlabeledImageDataset(dataroot, transform=transform)
# # dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers)
# dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)


In [None]:
# import torch
# import torch.nn as nn

# # Generator Model
# class Generator(nn.Module):
#     def __init__(self, nz, ngf, nc):
#         super(Generator, self).__init__()
#         self.main = nn.Sequential(
#             nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
#             nn.BatchNorm2d(ngf * 8),
#             nn.ReLU(True),
#             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
#             nn.BatchNorm2d(ngf * 4),
#             nn.ReLU(True),
#             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
#             nn.BatchNorm2d(ngf * 2),
#             nn.ReLU(True),
#             nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
#             nn.BatchNorm2d(ngf),
#             nn.ReLU(True),
#             nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
#             nn.Tanh()
#         )

#     def forward(self, input):
#         return self.main(input)

# # Discriminator Model
# class Discriminator(nn.Module):
#     def __init__(self, nc, ndf):
#         super(Discriminator, self).__init__()
#         self.main = nn.Sequential(
#             nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
#             nn.LeakyReLU(0.2, inplace=True),
#             nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
#             nn.BatchNorm2d(ndf * 2),
#             nn.LeakyReLU(0.2, inplace=True),
#             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
#             nn.BatchNorm2d(ndf * 4),
#             nn.LeakyReLU(0.2, inplace=True),
#             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
#             nn.BatchNorm2d(ndf * 8),
#             nn.LeakyReLU(0.2, inplace=True),
#             nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
#             nn.Sigmoid()
#         )

#     def forward(self, input):
#         return self.main(input).view(-1, 1).squeeze(1)


In [None]:
# import torch.optim as optim

# # Hyperparameters
# nz = 100       # Latent vector size
# ngf = 64       # Generator feature maps
# ndf = 64       # Discriminator feature maps
# nc = 3         # Number of channels in the images (RGB)
# lr = 0.0002    # Learning rate
# beta1 = 0.5    # Beta1 for Adam optimizer
# batch_size = 128
# image_size = 64
# num_epochs = 5
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Initialize the models
# netG = Generator(nz, ngf, nc).to(device)
# netD = Discriminator(nc, ndf).to(device)

# # Initialize weights
# def weights_init(m):
#     classname = m.__class__.__name__
#     if classname.find('Conv') != -1:
#         m.weight.data.normal_(0.0, 0.02)
#     elif classname.find('BatchNorm') != -1:
#         m.weight.data.normal_(1.0, 0.02)
#         m.bias.data.fill_(0)
# netG.apply(weights_init)
# netD.apply(weights_init)

# # Setup Adam optimizers
# optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
# optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

# # Loss function
# criterion = nn.BCELoss()


In [None]:
# import torchvision.utils as vutils
# import matplotlib.pyplot as plt
# import numpy as np

# # Fixed noise for generating consistent images during training
# fixed_noise = torch.randn(64, nz, 1, 1, device=device)

# # Training the GAN
# for epoch in range(num_epochs):
#     for i, data in enumerate(dataloader, 0):
#         ############################
#         # (1) Update D network
#         ###########################
#         netD.zero_grad()
#         real_images = data.to(device)
#         b_size = real_images.size(0)
#         label = torch.full((b_size,), 1., dtype=torch.float, device=device)
#         output = netD(real_images)
#         errD_real = criterion(output, label)
#         errD_real.backward()
#         D_x = output.mean().item()

#         # Generate fake images
#         noise = torch.randn(b_size, nz, 1, 1, device=device)
#         fake_images = netG(noise)
#         label.fill_(0.)
#         output = netD(fake_images.detach())  # Corrected: 'fake' changed to 'fake_images' and added closing parenthesis
#         errD_fake = criterion(output, label)
#         errD_fake.backward()
#         D_G_z1 = output.mean().item()
#         errD = errD_real + errD_fake
#         optimizerD.step()

#         # (2) Update G network
#         ###########################
#         netG.zero_grad()
#         label.fill_(1.)  # Fake labels are real for generator cost
#         output = netD(fake_images)
#         errG = criterion(output, label)
#         errG.backward()
#         D_G_z2 = output.mean().item()
#         optimizerG.step()
 


In [None]:
# import torch
# import os
# from torchvision.utils import save_image

# # Ensure the output directory exists
# output_dir = 'data/derivedData'
# os.makedirs(output_dir, exist_ok=True)

# # Set the generator to evaluation mode
# netG.eval()

# # Generate new images
# num_images = 100  # Adjust as needed
# batch_size = 16
# nz = 100  # Size of the latent vector
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# with torch.no_grad():
#     for i in range(0, num_images, batch_size):
#         current_batch_size = min(batch_size, num_images - i)
#         noise = torch.randn(current_batch_size, nz, 1, 1, device=device)
#         fake_images = netG(noise).detach().cpu()
#         for j in range(current_batch_size):
#             image_path = os.path.join(output_dir, f"design_{i + j + 1}.png")
#             save_image(fake_images[j], image_path, normalize=True)
