In [None]:
import os
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from torch import optim
from tqdm import tqdm
import logging
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F

In [None]:
logging.basicConfig(format="%s(asctime)s - %(levelname)s: %(message)s", level=logging.INFO, datefmt="%H:%M:%S")

In [None]:
class Diffusion:
  def __init_(self, noise_steps=100, beta_start=1e-4, beta_end=0.02, img_size=256, device="cuda"):
    self.noise_steps = noise_steps
    self.beta_start = beta_start
    self.beta_end = beta_end
    self.img_size = img_size
    self.device = device

    self.beta = self.prepare_noise_schedule().to(device)
    self.alpha = 1. - self.beta
    self.alpha_hat = torch.cumprod(self.alpha, dim=0)

  def prepare_noise_schedule(self):
    return torch.linspace(self.beta_start, self.beta_end, self.noise_steps)

In [None]:
def noise_images(self,x,t):
    sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:, None, None, None]
    sqrt_one_minus_alpha_hat = torch.sqrt(1. - self.alpha_hat[t])[:, None, None, None]
    e = torch.randn_like(x)
    return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * e,e


In [None]:
def simple_timesteps(self,n):
    return torch.randint(low=0, high=self.noise_steps, size=(n,))

In [None]:
def simple(self, model, n):
    logging.info(f"Sampling {n} new images...")
    model.eval()
    with torch.no_grad():
        x = torch.randn((n, 3, self.img_size, self.img_size)).to(self.device)
        for i in tqdm(reversed(range(1, self.noise_steps)), position=0):
            t = (torch.ones(n) * i).long().to(self.device)
            predicted_noise = model(x, t)
            alpha = self.alpha[t][:, None, None, None]
            alpha_hat = self.alpha_hat[t][:, None, None, None]
            beta = self.beta[t][:, None, None, None]  # Corrected dimensions

            if i > 1:
                noise = torch.randn_like(x)
            else:
                noise = torch.zeros_like(x)  # Corrected function name

            x = (1 / torch.sqrt(alpha) *
                 (x - ((1 - alpha) / torch.sqrt(1 - alpha_hat)) * predicted_noise) +
                 torch.sqrt(beta) * noise)

        x = (x.clamp(-1, 1) + 1) / 2
        x = (x * 255).type(torch.uint8)
    model.train()  # Moved outside of the no_grad context
    return x


In [None]:
class UNet(nn.Module):
    def __init__(self, c_in=3, c_out=3, time_dim=256, device="cuda"):
        super().__init__()  # Corrected typo here
        self.device = device
        self.time_dim = time_dim
        self.inc = DoubleConv(c_in, 64)
        self.down1 = Down(64, 128)
        self.sa1 = SelfAttention(128, 32)
        self.down2 = Down(128, 256)
        self.sa2 = SelfAttention(256, 16)
        self.down3 = Down(256, 256)
        self.sa3 = SelfAttention(256, 8)

        self.bot1 = DoubleConv(256, 512)
        self.bot2 = DoubleConv(512, 512)
        self.bot3 = DoubleConv(512, 256)

        self.up1 = Up(512, 128)
        self.sa4 = SelfAttention(128, 16)
        self.up2 = Up(256, 64)
        self.sa5 = SelfAttention(64, 32)
        self.up3 = Up(128, 64)
        self.sa6 = SelfAttention(64, 64)
        self.outc = nn.Conv2d(64, c_out, kernel_size=1)


In [None]:
def pos_encoding(self, t, channels):
    inv_freq = 1.0 / (
        1000 ** (torch.arange(0, channels, 2, device=self.device).float() / channels)
    )
    pos_enc_a = torch.sin(t.repeat(1, channels // 2) * inv_freq)
    pos_enc_b = torch.cos(t.repeat(1, channels // 2) * inv_freq)
    pos_enc_c = torch.cat([pos_enc_a, pos_enc_b], dim=-1)
    return pos_enc_c



In [None]:
def forward(self, x, t):
    t = t.unsqueeze(-1).type(torch.float)
    t = self.pos_encoding(t, self.time_dim)

    x1 = self.inc(x)  # Corrected layer name
    x2 = self.down1(x1, t)
    x2 = self.sa1(x2)
    x3 = self.down2(x2, t)
    x3 = self.sa2(x3)
    x4 = self.down3(x3, t)
    x4 = self.sa3(x4)

    x4 = self.bot1(x4)
    x4 = self.bot2(x4)
    x4 = self.bot3(x4)

    x = self.up1(x4, x3, t)
    x = self.sa4(x)
    x = self.up2(x, x2, t)
    x = self.sa5(x)
    x = self.up3(x, x1, t)
    x = self.sa6(x)
    output = self.outc(x)
    return output


In [None]:
class DoubleConv(nn.Module):
  def __init__(self, in_channels, out_channels, mid_channels = None, residual=False):
    super().__init__()
    self.residual = residual
    if not mid_channels:
      mid_channels = out_channels
    self.double_conv = nn.Sequential(
        nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
        nn.GroupNorm(1, mid_channels),
        nn.ReLU(),
        nn.Conv2d(mid_channels, out_channels, kernel_size=3,padding = 1, bias=False),
        nn.GroupNorm(1, out_channels),
    )
  def forward(self, x):
    if self.residual:
      return F.gelu(x + self.double_conv(x))
    else:
      return self.double_conv(x)

class Down(nn.Module):
    def __init__(self, in_channels, out_channels, emb_dim = 256):
      super().__init__()
      self.maxpool_conv = nn.Sequential(
          nn.MaxPool2d(2),
          DoubleConv(in_channels, in_channels, residual=True),
          DoubleConv(in_channels, out_channels),
      )

      self.emb_layer = nn.Sequential(
        nn.SiLU(inplace=True),
        nn.Linear(
            emb_dim,
            out_channels
        ),
      )
    def forward(self, x,t ):
      x = self.maxpool_conv(x)
      emb = self.emb_layer(t)[:, :, None, None].repeat(1,1, x.shape[-2], x.shape[-1])
      return x + emb


In [None]:
class Up(nn.Module):
    def __init__(self, in_channels, out_channels, emb_dim=256):
        super(Up, self).__init__()

        self.up = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
        self.conv = nn.Sequential(
            DoubleConv(in_channels + out_channels, out_channels, in_channels // 2),
            DoubleConv(out_channels, out_channels),
        )

        self.emb_layer = nn.Sequential(
            nn.SiLU(),
            nn.Linear(
                emb_dim,
                out_channels
            ),
        )

    def forward(self, x, skip_x, t):
        x = self.up(x)
        x = torch.cat([skip_x, x], dim=1)
        x = self.conv(x)
        emb = self.emb_layer(t)[:, :, None, None].repeat(1, 1, x.shape[-2], x.shape[-1])
        return x + emb

In [None]:
class SelfAttention(nn.Module):
    def __init__(self, channels, size):
        super(SelfAttention, self).__init__()
        self.channels = channels
        self.size = size
        self.mha = nn.MultiheadAttention(channels, 4, batch_first=True)
        self.ln = nn.LayerNorm([channels])
        self.ff_self = nn.Sequential(
            nn.LayerNorm([channels]),
            nn.Linear(channels, channels),
            nn.ReLU(),
            nn.Linear(channels, channels),
        )

    def forward(self, x):
        x = x.view(-1, self.size*self.size, self.channels).transpose(1, 2)
        x_ln = self.ln(x)
        attention_value, _ = self.mha(x_ln, x_ln, x_ln)
        attention_value = attention_value + x
        attention_value = self.ff_self(attention_value) + attention_value
        return attention_value.transpose(1, 2).view(-1, self.channels, self.size, self.size)


In [None]:
class EMA:
    def __init__(self, beta):
        super().__init__()
        self.beta = beta
        self.step = 0

    def update_model_average(self, ma_model, current_model):
        for current_params, ma_params in zip(current_model.parameters(), ma_model.parameters()):
            old_weight, up_weight = ma_params.data, current_params.data
            ma_params.data = self.update_average(old_weight, up_weight)

    def update_average(self, old, new):
        if old is None:
            return new
        return old * self.beta + (1 - self.beta) * new

    def step_ema(self, ema_model, model, step_start_ema=2000):
        if self.step < step_start_ema:
            self.reset_parameters(ema_model, model)
        else:
            self.update_model_average(ema_model, model)
        self.step += 1

    def reset_parameters(self, ema_model, model):
        ema_model.load_state_dict(model.state_dict())


In [None]:
import os
import torch
import torchvision
from PIL import Image
from matplotlib import pyplot as plt
import tqdm
import logging
from torch.utils.data import DataLoader as dataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

In [None]:
def plot_images(images):
    plt.figure(figsize=(32, 32))
    plt.imshow(torch.cat([torch.cat([i for i in images], dim=-1)], dim=-2).permute(1, 2, 0))
    plt.show()

def save_images(images, path, **kwargs):
    grid = torchvision.utils.make_grid(images, **kwargs)
    ndarr = grid.permute(1, 2, 0).numpy()
    im = Image.fromarray((ndarr * 255).astype('uint8'))  # Convert to uint8 before saving
    im.save(path)

def setup_logging(run_name):
    models_dir = "models"
    results_dir = "results"
    run_models_dir = os.path.join(models_dir, run_name)
    run_results_dir = os.path.join(results_dir, run_name)
    os.makedirs(models_dir, exist_ok=True)
    os.makedirs(results_dir, exist_ok=True)
    os.makedirs(run_models_dir, exist_ok=True)
    os.makedirs(run_results_dir, exist_ok=True)


In [None]:
from torch.utils.data import DataLoader
def get_data(args):
    transforms = torchvision.transforms.Compose([
        torchvision.transforms.Resize(args.img_size),  # Assuming args contains img_size
        torchvision.transforms.RandomResizedCrop(args.img_size, scale=(0.8, 1.0)),  # Fixed args.img_size
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = torchvision.datasets.ImageFolder(args.dataset_path, transform=transforms)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
    return dataloader


In [None]:
def train(args):
    setup_logging(args.run_name)
    device = args.device
    dataloader = get_data(args)
    model = UNet().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    mse = nn.MSELoss()
    diffusion = Diffusion(img_size=args.image_size, device=device)
    logger = SummaryWriter(os.path.join("runs", args.run_name))
    l = len(dataloader)

    for epoch in range(args.epochs):
        logging.info(f"Starting epoch {epoch}:")
        pbar = tqdm(dataloader)
        for i, (images, _) in enumerate(pbar):
            images = images.to(device)
            t = diffusion.sample_timesteps(images.shape[0]).to(device)
            x_t, noise = diffusion.noise_images(images, t)
            predicted_noise = model(x_t, t)
            loss = mse(noise, predicted_noise)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            pbar.set_postfix(MSE=loss.item())
            logger.add_scalar("MSE", loss.item(), global_step=epoch * l + i)

        sampled_images = diffusion.sample(model, n=images.shape[0])
        save_images(sampled_images, os.path.join("results", args.run_name, f"{epoch}.png"))
        torch.save(model.state_dict(), os.path.join("models", args.run_name, f"ckpt.pt"))

In [None]:
# import argparse
# parser = argparse.ArgumentParser()
# parser.add_argument('--run_name', type=str, default="DDPM_Unconditional", help="Name of the training run")
# parser.add_argument('--epochs', type=int, default=20, help="Number of epochs for training")
# parser.add_argument('--batch_size', type=int, default=12, help="Batch size for training")
# parser.add_argument('--image_size', type=int, default=256, help="Size of input images")
# parser.add_argument('--dataset_path', type=str, default=r"/content/drive/MyDrive/Project1", help="Path to the dataset")
# parser.add_argument('--device', type=str, default="cuda", help="Device for training (cpu or cuda)")
# parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate for optimization")
# args = parser.parse_args()

In [None]:
# print(args)

In [None]:
# import argparse

# parser = argparse.ArgumentParser()
# parser.add_argument('--run_name', type=str, default="DDPM_Unconditional", help="Name of the training run")
# parser.add_argument('--epochs', type=int, default=20, help="Number of epochs for training")
# parser.add_argument('--batch_size', type=int, default=4, help="Batch size for training")
# parser.add_argument('--image_size', type=int, default=256, help="Size of input images")
# parser.add_argument('--dataset_path', type=str, default=r"/content/drive/MyDrive/Project1", help="Path to the dataset")
# parser.add_argument('--device', type=str, default="cuda", help="Device for training (cpu or cuda)")
# parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate for optimization")

# args, unknown = parser.parse_known_args()
# print(f'Known args: {args}')
# print(f'Unknown args: {unknown}')

In [None]:
# print(args)

In [None]:
# train(args)

In [None]:
import sys
import argparse

if __name__ == "__main__":
    # Remove Jupyter's kernel arguments if they exist
    if '-f' in sys.argv:
        sys.argv.remove('-f')
        sys.argv.pop()  # Remove the kernel file path

    parser = argparse.ArgumentParser()
    parser.add_argument('--run_name', type=str, default="DDPM_Unconditional", help="Name of the training run")
    parser.add_argument('--epochs', type=int, default=20, help="Number of epochs for training")
    parser.add_argument('--batch_size', type=int, default=12, help="Batch size for training")
    parser.add_argument('--img_size', type=int, default=256, help="Size of input images")
    parser.add_argument('--dataset_path', type=str, default=r"/content/drive/MyDrive/Project1", help="Path to the dataset")
    parser.add_argument('--device', type=str, default="cuda", help="Device for training (cpu or cuda)")
    parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate for optimization")

    args = parser.parse_args()
    print(args)
    train(args)

Namespace(run_name='DDPM_Unconditional', epochs=20, batch_size=12, img_size=256, dataset_path='/content/drive/MyDrive/Project1', device='cuda', lr=0.0003)


RuntimeError: The NVIDIA driver on your system is too old (found version 11040). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver.

In [None]:
if not torch.cuda.is_available():
    raise RuntimeError("No NVIDIA GPU found. Please install an NVIDIA GPU and driver.")

RuntimeError: No NVIDIA GPU found. Please install an NVIDIA GPU and driver.

In [None]:
!nvidia-smi
!nvcc --version

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [None]:
!apt-get --purge remove '*cublas*' 'cuda*' 'nsight*'
!apt-get --purge remove '*nvidia*'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Package 'linux-objects-nvidia-450-server-5.15.0-1055-oracle' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1056-aws' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1056-azure' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1057-aws' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1057-azure' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1057-intel-iotg' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1058-azure' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1058-gcp' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1058-oracle' is not installed, so not removed
Package 'linux-objects-nvidia-450-server-5.15.0-1059-azure' is not installed, so not removed
Pack

In [None]:
!add-apt-repository ppa:graphics-drivers/ppa
!apt-get update

PPA publishes dbgsym, you may need to include 'main/debug' component
Repository: 'deb https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu/ jammy main'
Description:
Fresh drivers from upstream, currently shipping Nvidia.

## Current releases

Current production branch release: 535.171.04
Current new feature branch release: 550.78
Current beta release: 550.40.07

## Legacy releases

470.239.06 (x86_64) - GKxxx “Kepler” GPUs
390.157 (x86 / x86_64 / ARM) - GF1xx “Fermi” GPUs (*​)
340.108 (x86 / x86_64) - GeForce 8 and 9 series GPUs (*​)
304.137 (x86 / x86_64) - GeForce 6 and 7 series GPUs (*​)
173.14.39 (x86 / x86_64) - GeForce 5 series GPUs (*​)
96.43.23 (x86 / x86_64) - GeForce 2 through GeForce 4 series GPUs (*​)
71.86.15 (x86 / x86_64) - Riva TNT, TNT2, GeForce, and some GeForce 2 GPUs (*​)
(*​) These releases are no longer being maintained. Please see Support timeframes for Unix legacy GPU releases for more details. https://nvidia.custhelp.com/app/answers/detail/a_id/3142

##

In [None]:
#!apt-get install nvidia-driver-550

In [None]:
!nvidia --version

/bin/bash: line 1: nvidia: command not found


In [None]:
# if __name__ == '__main__':
#   launch()
  # device = "cuda"
  # model = UNet().to(device)
  # ckpt = torch.load("./working/orig/ckpt.pt")
  # model.load_state_dict(ckpt)
  # diffusion = Diffusion(img_size=64, device=device)
  # x = diffusion.sample(model, 8)
  # print(x.shape)
  # plt.figure(figsize=(32, 32))
  # plt.imshow(torch.cat([
  #     torch.cat([i for i in x.cpu()], dim=-1),
  # ], dim=-2).permute(1, 2, 0).cpu())
  # plt.show()

In [None]:
!pip install nvidia-smi
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [None]:
!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!apt-key adv --fetch-keys https://developer.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2

--2024-05-30 13:22:34--  https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64
Resolving developer.nvidia.com (developer.nvidia.com)... 152.195.19.142
Connecting to developer.nvidia.com (developer.nvidia.com)|152.195.19.142|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://developer.nvidia.com/downloads/compute/cuda/9.2/prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 [following]
--2024-05-30 13:22:34--  https://developer.nvidia.com/downloads/compute/cuda/9.2/prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64
Reusing existing connection to developer.nvidia.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://developer.download.nvidia.com/compute/cuda/9.2/secure/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb?uyjku-98Hzyhsopy1fW2Hg3EuokHt1cs_JU4F7k4p56Enc_yvgyJ0wxj6l_RuSOp9ae5Owk_GofRqxfPVlesokHVDhDMMzAUKPKAn

In [None]:
!apt autoremove

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following packages will be REMOVED:
  dctrl-tools dkms keyboard-configuration libfontenc1 libjansson4 libxcvt0 libxfont2 libxkbfile1
  python3-xkit screen-resolution-extra systemd-hwe-hwdb udev x11-xkb-utils xcvt xfonts-base
  xfonts-encodings xfonts-utils xserver-common xserver-xorg-core
0 upgraded, 0 newly installed, 19 to remove and 64 not upgraded.
After this operation, 25.2 MB disk space will be freed.
(Reading database ... 123109 files and directories currently installed.)
Removing dkms (2.8.7-2ubuntu2.2) ...
Removing dctrl-tools (2.24-3build2) ...
Removing xserver-xorg-core (2:21.1.4-2ubuntu1.7~22.04.10) ...
Removing keyboard-configuration (1.205ubuntu3) ...
Removing xfonts-base (1:1.0.5) ...
Removing xfonts-utils (1:7.7+6build2) ...
Removing libxfont2:amd64 (1:2.0.5-1build1) ...
Removing libfontenc1:amd64 (1:1.1.4-1build3) ...
Removing libjansson4:amd64 (2.13.1-1.1build3) ...
Re

In [None]:
!add-apt-repository ppa:graphics-drivers/ppa

PPA publishes dbgsym, you may need to include 'main/debug' component
Repository: 'deb https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu/ jammy main'
Description:
Fresh drivers from upstream, currently shipping Nvidia.

## Current releases

Current production branch release: 535.171.04
Current new feature branch release: 550.78
Current beta release: 550.40.07

## Legacy releases

470.239.06 (x86_64) - GKxxx “Kepler” GPUs
390.157 (x86 / x86_64 / ARM) - GF1xx “Fermi” GPUs (*​)
340.108 (x86 / x86_64) - GeForce 8 and 9 series GPUs (*​)
304.137 (x86 / x86_64) - GeForce 6 and 7 series GPUs (*​)
173.14.39 (x86 / x86_64) - GeForce 5 series GPUs (*​)
96.43.23 (x86 / x86_64) - GeForce 2 through GeForce 4 series GPUs (*​)
71.86.15 (x86 / x86_64) - Riva TNT, TNT2, GeForce, and some GeForce 2 GPUs (*​)
(*​) These releases are no longer being maintained. Please see Support timeframes for Unix legacy GPU releases for more details. https://nvidia.custhelp.com/app/answers/detail/a_id/3142

##

In [None]:
!apt-cache search nvidia-driver

nvidia-384 - Transitional package for nvidia-driver-390
nvidia-384-dev - Transitional package for nvidia-driver-390
nvidia-driver-390 - NVIDIA driver metapackage
nvidia-driver-418 - Transitional package for nvidia-driver-430
nvidia-driver-418-server - NVIDIA Server Driver metapackage
nvidia-driver-435 - Transitional package for nvidia-driver-455
nvidia-driver-440 - Transitional package for nvidia-driver-450
nvidia-driver-440-server - Transitional package for nvidia-driver-450-server
nvidia-driver-450 - Transitional package for nvidia-driver-460
nvidia-driver-450-server - NVIDIA Server Driver metapackage
nvidia-driver-455 - Transitional package for nvidia-driver-460
nvidia-driver-460 - Transitional package for nvidia-driver-470
nvidia-driver-460-server - Transitional package for nvidia-driver-470-server
nvidia-driver-465 - Transitional package for nvidia-driver-470
nvidia-driver-470 - NVIDIA driver metapackage
nvidia-driver-470-server - NVIDIA Server Driver metapackage
nvidia-driver-495

In [None]:
!ping google.com

/bin/bash: line 1: ping: command not found


In [None]:
# 535.171.04

In [None]:
# !apt-get install nvidia-driver-552.22

In [None]:
!apt-get update

0% [Working]            Get:1 file:/var/cuda-repo-9-2-local  InRelease
            Ign:1 file:/var/cuda-repo-9-2-local  InRelease
0% [Connecting to archive.ubuntu.com] [Connecting to cloud.r-project.org (3.163.125.119)] [Connectin                                                                                                    Get:2 file:/var/cuda-repo-9-2-local  Release [574 B]
0% [Connecting to archive.ubuntu.com] [Connecting to cloud.r-project.org (3.163.125.119)] [Connectin                                                                                                    Get:2 file:/var/cuda-repo-9-2-local  Release [574 B]
0% [2 Release 0 B/574 B 0%] [Connecting to archive.ubuntu.com (185.125.190.36)] [Connecting to secur0% [Connecting to archive.ubuntu.com (185.125.190.36)] [Connecting to security.ubuntu.com (185.125.1                                                                                                    Get:3 file:/var/cuda-repo-9-2-local  Release.gpg 

In [None]:
!apt-get install -y nvidia-driver-552.22

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package nvidia-driver-552.22
E: Couldn't find any package by glob 'nvidia-driver-552.22'
E: Couldn't find any package by regex 'nvidia-driver-552.22'


In [None]:
import sys
import argparse

if __name__ == "__main__":
    # Remove Jupyter's kernel arguments if they exist
    if '-f' in sys.argv:
        sys.argv.remove('-f')
        sys.argv.pop()  # Remove the kernel file path

    parser = argparse.ArgumentParser()
    parser.add_argument('--run_name', type=str, default="DDPM_Unconditional", help="Name of the training run")
    parser.add_argument('--epochs', type=int, default=20, help="Number of epochs for training")
    parser.add_argument('--batch_size', type=int, default=12, help="Batch size for training")
    parser.add_argument('--img_size', type=int, default=256, help="Size of input images")
    parser.add_argument('--dataset_path', type=str, default=r"/content/drive/MyDrive/Project1", help="Path to the dataset")
    parser.add_argument('--device', type=str, default="cuda", help="Device for training (cpu or cuda)")
    parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate for optimization")

    args = parser.parse_args()
    print(args)
    train(args)

Namespace(run_name='DDPM_Unconditional', epochs=20, batch_size=12, img_size=256, dataset_path='/content/drive/MyDrive/Project1', device='cuda', lr=0.0003)


RuntimeError: The NVIDIA driver on your system is too old (found version 11040). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver.

In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [None]:
!apt-get update
!apt-get install -y nvidia-cuda-toolki

0% [Working]            Get:1 file:/var/cuda-repo-9-2-local  InRelease
            Ign:1 file:/var/cuda-repo-9-2-local  InRelease
0% [Connecting to security.ubuntu.com] [Connecting to cloud.r-project.org]                                                                          Get:2 file:/var/cuda-repo-9-2-local  Release [574 B]
0% [Connecting to security.ubuntu.com] [Connecting to cloud.r-project.org]                                                                          Get:2 file:/var/cuda-repo-9-2-local  Release [574 B]
0% [2 Release 0 B/574 B 0%] [Connecting to archive.ubuntu.com (91.189.91.83)] [Connecting to securit0% [Connecting to archive.ubuntu.com (91.189.91.83)] [Connecting to security.ubuntu.com (91.189.91.8                                                                                                    Get:3 file:/var/cuda-repo-9-2-local  Release.gpg [819 B]
0% [Connecting to archive.ubuntu.com (91.189.91.83)] [Connecting to security.ubuntu.com (91.189

In [None]:
import os

os.environ['PATH'] += ':/usr/local/cuda/bin'

In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [None]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu122

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu122
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cac

In [None]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)

2.2.1+cu121
False
12.1


In [None]:
!nvidia-smi

Thu May 30 13:45:57 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   58C    P8              13W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu122

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu122
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cac

In [None]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)

2.3.0+cu121
True
12.1


In [None]:
import sys
import argparse

if __name__ == "__main__":
    # Remove Jupyter's kernel arguments if they exist
    if '-f' in sys.argv:
        sys.argv.remove('-f')
        sys.argv.pop()  # Remove the kernel file path

    parser = argparse.ArgumentParser()
    parser.add_argument('--run_name', type=str, default="DDPM_Unconditional", help="Name of the training run")
    parser.add_argument('--epochs', type=int, default=20, help="Number of epochs for training")
    parser.add_argument('--batch_size', type=int, default=12, help="Batch size for training")
    parser.add_argument('--img_size', type=int, default=256, help="Size of input images")
    parser.add_argument('--dataset_path', type=str, default=r"/content/drive/MyDrive/Project1", help="Path to the dataset")
    parser.add_argument('--device', type=str, default="cuda", help="Device for training (cpu or cuda)")
    parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate for optimization")

    args = parser.parse_args()
    print(args)
    train(args)

Namespace(run_name='DDPM_Unconditional', epochs=20, batch_size=12, img_size=256, dataset_path='/content/drive/MyDrive/Project1', device='cuda', lr=0.0003)


NameError: name 'setup_logging' is not defined

In [None]:
import os
os._exit(00)