In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import important libraries

In [2]:
import pandas as pd
import numpy as np
from PIL import Image
import cv2 as cv
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader, Dataset
import os

# Read data

In [3]:
data = pd.read_parquet('/content/drive/MyDrive/Query-Search-LLM-Model/individual_rows')

# Load ResNet18 model with evaluation mode

In [6]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', weights="ResNet18_Weights.IMAGENET1K_V1")
model.eval()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

# Drop one last layer for outputing raw vector of image insights

In [12]:
model = nn.Sequential(*list(model.children())[:-1])

In [13]:
model

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Con

# Test model using ones matrix

In [7]:
ones = torch.ones(1, 3, 64, 64)

In [8]:
with torch.no_grad():
  a = model(ones)
  print(model(ones).shape)

torch.Size([1, 512, 1, 1])


# Squeeze to see final vector size

In [9]:
a.squeeze().shape

torch.Size([512])

In [4]:
!pip install dill



In [5]:
import dill

# Save transformed model

In [38]:
with open('/content/drive/MyDrive/Query-Search-LLM-Model/resnet', 'wb') as f:
  dill.dump(model, f)

In [6]:
with open('/content/drive/MyDrive/Query-Search-LLM-Model/resnet', 'rb') as f:
  model = dill.load(f)

# Use given model for getting insights out of images

In [7]:
from torch.utils.data import DataLoader, Dataset

# Create image dataset class

In [8]:
class ImageDataset(Dataset):
    def __init__(self, data):
        self.data = data
        self.preprocess = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ])
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        image_path = self.data.iloc[index]['images']
        image = cv.imread(image_path)
        image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        preprocessed = self.preprocess(image_rgb)
        squeezed = preprocessed.squeeze(0)
        return squeezed

In [57]:
class GetInsightsOfImages(nn.Module):
    def __init__(self, data, model):
        super().__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = model
        self.data = data
        self.image_dataset = ImageDataset(data)
        self.image_dataloader = DataLoader(self.image_dataset, batch_size=16, shuffle=True, num_workers=16, drop_last=False)

    def get_insights(self):
        insights = []
        self.model = self.model.to(self.device)

        with torch.no_grad():
            for n, batch in enumerate(self.image_dataloader):
                batch = batch.to(self.device)

                output = self.model(batch)

                for img_output in output:
                    insights.append(img_output.detach().cpu().numpy().flatten().tolist())

                print(f'{n + 1} Batch processed...')

        return insights

    def return_data(self):
        insights = self.get_insights()
        self.data['insights'] = insights
        return self.data
    def save_data(self, path):
        self.data = self.return_data()
        self.data.to_parquet(path)

In [58]:
import warnings
warnings.filterwarnings('ignore')

In [59]:
get_insights = GetInsightsOfImages(data, model)

In [60]:
get_insights.save_data('/content/drive/MyDrive/Query-Search-LLM-Model/insights')

1 Batch processed...
2 Batch processed...
3 Batch processed...
4 Batch processed...
5 Batch processed...
6 Batch processed...
7 Batch processed...
8 Batch processed...
9 Batch processed...
10 Batch processed...
11 Batch processed...
12 Batch processed...
13 Batch processed...
14 Batch processed...
15 Batch processed...
16 Batch processed...
17 Batch processed...
18 Batch processed...
19 Batch processed...
20 Batch processed...
21 Batch processed...
22 Batch processed...
23 Batch processed...
24 Batch processed...
25 Batch processed...
26 Batch processed...
27 Batch processed...
28 Batch processed...
29 Batch processed...
30 Batch processed...
31 Batch processed...
32 Batch processed...
33 Batch processed...
34 Batch processed...
35 Batch processed...
36 Batch processed...
37 Batch processed...
38 Batch processed...
39 Batch processed...
40 Batch processed...
41 Batch processed...
42 Batch processed...
43 Batch processed...
44 Batch processed...
45 Batch processed...
46 Batch processed.

In [61]:
insights = pd.read_parquet('/content/drive/MyDrive/Query-Search-LLM-Model/insights')

In [62]:
insights.head(3)

Unnamed: 0,links,Queries,images,insights
0,https://www.flannels.com/ami-paris-ami-de-coeu...,Ami De Coeur Wool Jumper in blue looking for s...,/content/drive/MyDrive/Query-Search-LLM-Model/...,"[0.27834999561309814, 0.6414205431938171, 0.16..."
1,https://www.flannels.com/ami-paris-ami-de-coeu...,I need a high-quality wool jumper for the fall...,/content/drive/MyDrive/Query-Search-LLM-Model/...,"[0.918123185634613, 1.6053258180618286, 0.1306..."
2,https://www.flannels.com/ami-paris-ami-de-coeu...,I want a blue Ami Paris jumper something with ...,/content/drive/MyDrive/Query-Search-LLM-Model/...,"[0.17485029995441437, 0.5286245346069336, 0.57..."
