In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from torchvision import transforms

# CIFAR-10 데이터셋 로드 및 전처리
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# 임의로 데이터를 오염시키는 함수 (여기서는 이미지를 흰색으로 바꾸는 방식)
def add_noise(x, noise_factor=0.5):
    noise = torch.randn_like(x) * noise_factor
    x_noisy = x + noise
    return torch.clamp(x_noisy, 0., 1.)

# t-SNE 시각화를 위한 데이터 전처리 (flatten과 표준화)
def prepare_for_tsne(data):
    data_flat = data.view(data.size(0), -1)  # 이미지 데이터를 1D 벡터로 변환
    return data_flat

# Denoising Autoencoder 정의
class DenoisingAutoencoder(nn.Module):
    def __init__(self):
        super(DenoisingAutoencoder, self).__init__()
        # 인코더
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Flatten()
        )
        # 디코더
        self.decoder = nn.Sequential(
            nn.Linear(256 * 4 * 4, 128),
            nn.ReLU(),
            nn.Linear(128, 64 * 8 * 8),
            nn.ReLU(),
            nn.Unflatten(1, (64, 8, 8)),
            nn.ConvTranspose2d(64, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# 모델 초기화
model = DenoisingAutoencoder().cuda()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 모델 훈련
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for data in trainloader:
        inputs, _ = data
        noisy_inputs = add_noise(inputs).cuda()

        optimizer.zero_grad()
        outputs = model(noisy_inputs)
        loss = criterion(outputs, inputs.cuda())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}")

# 테스트 데이터셋에 대한 복원
model.eval()
with torch.no_grad():
    test_data = next(iter(testloader))
    test_inputs, test_labels = test_data
    noisy_test_inputs = add_noise(test_inputs).cuda()

    restored_images = model(noisy_test_inputs.cuda())

# 복원된 이미지와 원본 이미지 비교
n = 10  # 이미지 갯수
plt.figure(figsize=(20, 4))
for i in range(n):
    # 원본
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(test_inputs[i].permute(1, 2, 0) / 2 + 0.5)
    plt.title("Original")
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # 복원된 이미지
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(restored_images[i].cpu().permute(1, 2, 0) / 2 + 0.5)
    plt.title("Restored")
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

plt.show()

# t-SNE 시각화
def plot_tsne(original_data, noisy_data, labels):
    original_data_flat = prepare_for_tsne(original_data)
    noisy_data_flat = prepare_for_tsne(noisy_data)

    scaler = StandardScaler()
    original_data_flat = scaler.fit_transform(original_data_flat.cpu().numpy())
    noisy_data_flat = scaler.transform(noisy_data_flat.cpu().numpy())

    tsne = TSNE(n_components=2, random_state=42)
    original_tsne = tsne.fit_transform(original_data_flat)
    noisy_tsne = tsne.fit_transform(noisy_data_flat)

    plt.figure(figsize=(8, 6))
    plt.scatter(original_tsne[:, 0], original_tsne[:, 1], c=labels.numpy(), cmap='jet', alpha=0.5, label="Original")
    plt.scatter(noisy_tsne[:, 0], noisy_tsne[:, 1], c=labels.numpy(), cmap='jet', marker='x', alpha=0.5, label="Noisy")
    plt.title("t-SNE visualization of Original vs Noisy CIFAR-10")
    plt.legend()
    plt.show()

# t-SNE 시각화 수행
plot_tsne(test_inputs, noisy_test_inputs, test_labels)
