In [None]:
pip install torch pandas requests openai-clip

Collecting openai-clip
  Downloading openai-clip-1.0.1.tar.gz (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54

In [None]:
import pandas as pd

# Load the Excel file
df = pd.read_excel('/content/DATASET_urls.xlsx')

# Extract image URLs and classifications
image_urls = df['image_urls'].tolist()
classifications = df['classification'].tolist()


In [None]:
image_urls[0]

'https://saldoinvoice.com/wp-content/uploads/2022/12/e-commerce-invoice-template.webp'

In [None]:
classifications[0]

'Invoice'

In [None]:
import os
import requests
from PIL import Image
from io import BytesIO

In [None]:
# Convert class labels to numeric indices
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
numeric_labels = label_encoder.fit_transform(classifications)

In [None]:
image_dir = 'downloaded_images4'
os.makedirs(image_dir, exist_ok=True)
class_names = label_encoder.classes_

In [None]:
successful_images = []
successful_labels = []

In [None]:
for i, (url, label) in enumerate(zip(image_urls, numeric_labels)):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Check if the download was successful
        img = Image.open(BytesIO(response.content)).convert('RGB')
        img.save(os.path.join(image_dir, f'image_{i}.jpg'))
        successful_images.append(f'image_{i}.jpg')
        successful_labels.append(label)
    except (requests.RequestException, IOError) as e:
        print(f"Failed to download image {i} from {url}: {e}")

print(f"Successfully downloaded {len(successful_images)} images.")



Failed to download image 22 from https://www.brightpearl.com/wp-content/uploads/2021/02/Blanket_Purchase_Order-e1612356523446.png: 403 Client Error: Forbidden for url: https://www.brightpearl.com/wp-content/uploads/2021/02/Blanket_Purchase_Order-e1612356523446.png
Failed to download image 23 from https://tipalti.com/wp-content/uploads/2023/09/media_contract-purchase-order.webp: 403 Client Error: Forbidden for url: https://tipalti.com/wp-content/uploads/2023/09/media_contract-purchase-order.webp
Failed to download image 24 from https://www.brightpearl.com/wp-content/uploads/2021/02/Purchase-Order-Template-02-TemplateLab-212x300.jpg: 403 Client Error: Forbidden for url: https://www.brightpearl.com/wp-content/uploads/2021/02/Purchase-Order-Template-02-TemplateLab-212x300.jpg
Failed to download image 38 from https://www.brightpearl.com/wp-content/uploads/2021/02/Purchase-Order-Template-02-TemplateLab-212x300.jpg: 403 Client Error: Forbidden for url: https://www.brightpearl.com/wp-content/u

In [None]:
len(successful_labels)

184

In [None]:
from torchvision import transforms
import torch
from torch.utils.data import Dataset, DataLoader

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
])

In [None]:
class ImageDataset(Dataset):
    def __init__(self, image_dir, image_files, classifications, class_names, transform=None):
        self.image_dir = image_dir
        self.image_files = image_files
        self.classifications = classifications
        self.class_names = class_names
        self.transform = transform

    def __len__(self):
        return len(self.classifications)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        classification = torch.tensor(self.classifications[idx], dtype=torch.long)
        text = clip.tokenize([self.class_names[classification]])[0]
        return image, text, classification


In [None]:


dataset = ImageDataset(image_dir, successful_images, successful_labels, class_names, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
import clip

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, preprocess = clip.load("ViT-B/32", device=device)
model.to(device)

CLIP(
  (visual): VisionTransformer(
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): Sequential(
        (0): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): QuickGELU()
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
        (1): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          

In [None]:
import torch.optim as optim

# Define the loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-5)


In [None]:
num_epochs = 10  # Set the number of epochs

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0

    for images, texts, labels in dataloader:
        images, texts, labels = images.to(device), texts.to(device), labels.to(device)

        # Forward pass
        logits_per_image, logits_per_text = model(images, texts)

        # Calculate the loss
        logits = (logits_per_image + logits_per_text.T) / 2
        loss = loss_fn(logits, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate accuracy
        _, predicted = torch.max(logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        total_loss += loss.item()

    accuracy = 100 * correct / total
    avg_loss = total_loss / len(dataloader)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

NameError: name 'model' is not defined