In [54]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import pandas as pd

import requests
from io import BytesIO
from sklearn.metrics.pairwise import cosine_similarity

In [55]:
vgg16 = models.vgg16(pretrained=True)
vgg16_features = nn.Sequential(*list(vgg16.features.children())[:-1])



In [56]:
def preprocess_image_from_url(image_url):
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content))
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    img = transform(img)
    img = img.unsqueeze(0)  # Add batch dimension
    return img

In [57]:
def extract_features(image_path):
    img = preprocess_image_from_url(image_path)
    features = vgg16_features(img)
    return features.view(features.size(0), -1).detach().numpy()

In [58]:
df = pd.read_csv("LA.csv")
urls = df['imgSrc']

features = np.array([extract_features(urls[i]).flatten() for i in range(urls.shape[0])])
features = pd.DataFrame(features)
features.to_csv("features.csv", index=False, header=False)

In [59]:
similarity_matrix = cosine_similarity(features)
similarity_matrix

array([[1.0000004 , 0.16176993, 0.11678142, ..., 0.14105465, 0.0859743 ,
        0.17740318],
       [0.16176993, 0.9999998 , 0.12548724, ..., 0.39728653, 0.10236966,
        0.22051494],
       [0.11678142, 0.12548724, 1.0000005 , ..., 0.11971138, 0.14343163,
        0.13794518],
       ...,
       [0.14105465, 0.39728653, 0.11971138, ..., 1.0000001 , 0.07394078,
        0.23728457],
       [0.0859743 , 0.10236966, 0.14343163, ..., 0.07394078, 1.0000001 ,
        0.0933085 ],
       [0.17740318, 0.22051494, 0.13794518, ..., 0.23728457, 0.0933085 ,
        0.99999964]], dtype=float32)