In [2]:
%pip install torchvision

Collecting torchvision
  Downloading torchvision-0.19.0-cp310-cp310-manylinux1_x86_64.whl.metadata (6.0 kB)
Collecting torch==2.4.0 (from torchvision)
  Downloading torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.4.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.4.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.4.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.4.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.4.0->torchvision)
  Downloading 

In [5]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu




Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cpu/torchaudio-2.4.0%2Bcpu-cp310-cp310-linux_x86_64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
Installing collected packages: torchaudio
Successfully installed torchaudio-2.4.0+cpu

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [7]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

class DoodleDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data_frame.iloc[idx]['image_path'])
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (64, 64))  # Resize to 64x64 for consistency

        if self.transform:
            image = self.transform(image)

        return image

# Set up data transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Create dataset and split into train/val
dataset = DoodleDataset(csv_file='/kaggle/input/doodle-dataset/master_doodle_dataframe.csv', 
                        root_dir='/kaggle/input/doodle-dataset', 
                        transform=transform)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

ModuleNotFoundError: No module named 'torch._custom_ops'

In [None]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 7)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 7),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

class SymmetryNet(nn.Module):
    def __init__(self):
        super(SymmetryNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(128 * 8 * 8, 512),
            nn.ReLU(),
            nn.Linear(512, 4)  # 4 outputs: vertical and horizontal symmetry coordinates
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 128 * 8 * 8)
        x = self.fc(x)
        return x

# Initialize models
autoencoder = Autoencoder()
symmetry_net = SymmetryNet()

In [None]:
# Autoencoder loss and optimizer
autoencoder_criterion = nn.MSELoss()
autoencoder_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# SymmetryNet loss and optimizer
symmetry_criterion = nn.MSELoss()
symmetry_optimizer = optim.Adam(symmetry_net.parameters(), lr=0.001)

In [None]:
def train_autoencoder(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        batch = batch.to(device)
        optimizer.zero_grad()
        outputs = model(batch)
        loss = criterion(outputs, batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(dataloader)

def train_symmetry_net(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for batch in dataloader:
        batch = batch.to(device)
        optimizer.zero_grad()
        outputs = model(batch)

        # Generate target symmetry values (you may need to adjust this based on your specific requirements)
        target_symmetry = torch.zeros_like(outputs)
        target_symmetry[:, 0] = 0.5  # Vertical symmetry at the center
        target_symmetry[:, 1] = 0.5  # Horizontal symmetry at the center

        loss = criterion(outputs, target_symmetry)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(dataloader)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
autoencoder.to(device)
symmetry_net.to(device)

num_epochs = 50

for epoch in range(num_epochs):
    autoencoder_loss = train_autoencoder(autoencoder, train_loader, autoencoder_criterion, autoencoder_optimizer, device)
    symmetry_loss = train_symmetry_net(symmetry_net, train_loader, symmetry_criterion, symmetry_optimizer, device)

    print(f"Epoch [{epoch+1}/{num_epochs}], Autoencoder Loss: {autoencoder_loss:.4f}, Symmetry Loss: {symmetry_loss:.4f}")


In [None]:
torch.save(autoencoder.state_dict(), 'autoencoder.pth')
torch.save(symmetry_net.state_dict(), 'symmetry_net.pth')

In [None]:
def process_doodle(image_path, autoencoder, symmetry_net):
    # Load and preprocess the image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (64, 64))
    image_tensor = transforms.ToTensor()(image).unsqueeze(0).to(device)

    # Regularize the image
    with torch.no_grad():
        regularized = autoencoder(image_tensor)

    # Find symmetry lines
    with torch.no_grad():
        symmetry = symmetry_net(regularized)

    # Convert tensors to numpy arrays for visualization
    original = image_tensor.squeeze().cpu().numpy()
    regularized = regularized.squeeze().cpu().numpy()
    symmetry = symmetry.squeeze().cpu().numpy()

    # Visualize results
    print("Original Image:")
    cv2_imshow(original * 255)
    print("Regularized Image:")
    cv2_imshow(regularized * 255)

    # Draw symmetry lines
    result = cv2.cvtColor((regularized * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)
    height, width = regularized.shape
    cv2.line(result, (int(symmetry[0] * width), 0), (int(symmetry[0] * width), height), (0, 0, 255), 2)  # Vertical
    cv2.line(result, (0, int(symmetry[1] * height)), (width, int(symmetry[1] * height)), (0, 255, 0), 2)  # Horizontal

    print("Image with Symmetry Lines:")
    cv2_imshow(result)

# Load trained models
autoencoder.load_state_dict(torch.load('autoencoder.pth'))
symmetry_net.load_state_dict(torch.load('symmetry_net.pth'))
autoencoder.eval()
symmetry_net.eval()

# Process a doodle
image_path = "path/to/test/doodle.png"
process_doodle(image_path, autoencoder, symmetry_net)