In [7]:
!ls /zenith/ARC-AGI

LICENSE  README.md  apps  data


In [5]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

class CLIP:
    def __init__(self):
        # Load the CLIP model and processor
        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    def encode_image(self, image_path):
    # Load and preprocess the image
        image = Image.open(image_path)
        inputs = self.processor(images=image, return_tensors="pt")

        # Generate the image features
        with torch.no_grad():
            image_features = self.model.get_image_features(**inputs)

        return image_features

    def encode_text(self, text):
        inputs = self.processor(text=text, return_tensors="pt")
        with torch.no_grad():
            text_features = self.model.get_text_features(**inputs)
        return text_features

myclip = CLIP()
image_path = "/zenith/puppy.jpg"
features = myclip.encode_image(image_path)
print(features.shape)  # Should be [1, 512] for the base model

2024-09-30 02:59:42.536438: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-30 02:59:42.727215: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-30 02:59:42.775767: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-30 02:59:43.462404: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


torch.Size([1, 512])


In [1]:
!nvidia-smi

Mon Sep 30 02:56:20 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       On  |   00000000:00:05.0 Off |                    0 |
| N/A   40C    P8             12W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
!ls /zenith/notebooks

ARC-AGI  catcus.png  intro.ipynb


In [4]:
import torch
import time

def benchmark_gpu():
    # Check if CUDA is available
    if not torch.cuda.is_available():
        print("CUDA is not available. Running on CPU.")
        return

    # Get the current device
    device = torch.cuda.current_device()
    print(f"Running on GPU: {torch.cuda.get_device_name(device)}")

    # Define matrix sizes for benchmarking
    sizes = [1000, 2000, 4000, 8000, 16000]

    for size in sizes:
        # Create random matrices
        a = torch.randn(size, size, device=device)
        b = torch.randn(size, size, device=device)

        # Warm-up run
        torch.matmul(a, b)
        torch.cuda.synchronize()

        # Benchmark
        start_time = time.time()
        for _ in range(10):
            torch.matmul(a, b)
        torch.cuda.synchronize()
        end_time = time.time()

        # Calculate average time and FLOPS
        avg_time = (end_time - start_time) / 10
        flops = 2 * size**3 / avg_time  # 2n^3 FLOPs for matrix multiplication
        tflops = flops / 1e12

        print(f"Matrix size: {size}x{size}")
        print(f"Average time: {avg_time:.4f} seconds")
        print(f"Performance: {tflops:.2f} TFLOPS")
        print()

# Run the benchmark
benchmark_gpu()


Running on GPU: Tesla T4
Matrix size: 1000x1000
Average time: 0.0008 seconds
Performance: 2.55 TFLOPS

Matrix size: 2000x2000
Average time: 0.0059 seconds
Performance: 2.73 TFLOPS

Matrix size: 4000x4000
Average time: 0.0306 seconds
Performance: 4.18 TFLOPS

Matrix size: 8000x8000
Average time: 0.2540 seconds
Performance: 4.03 TFLOPS

Matrix size: 16000x16000
Average time: 1.8873 seconds
Performance: 4.34 TFLOPS

