In [1]:
import torch
from PIL import Image
from transformers import AutoProcessor, CLIPModel, AutoModel,AutoImageProcessor
import torch.nn as nn
import requests
from io import BytesIO
import os
import pickle
import numpy as np
import pandas as pd
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)

Downloading:   0%|          | 0.00/316 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/568 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/862k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/389 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/605M [00:00<?, ?B/s]

In [2]:
def load_image_PIL(url_or_path):
    if url_or_path.startswith("http://") or url_or_path.startswith("https://"):
        return Image.open(requests.get(url_or_path, stream=True).raw)
    else:
        return Image.open(url_or_path)

In [3]:
def cosine_similarity(vec1, vec2):
    # Compute the dot product of vec1 and vec2
    dot_product = np.dot(vec1, vec2)
    
    # Compute the L2 norm of vec1 and vec2
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    
    # Compute the cosine similarity
    similarity = dot_product / (norm_vec1 * norm_vec2)
    
    return similarity

In [1]:
import pandas as pd
temp=pd.read_excel(r"ImageNet Classes.xlsx")
classes=temp['Col_Names'].tolist()
classes=[s.lstrip() for s in classes]
positive_classes=[]
negative_classes=[]
for i in range(len(classes)):
    positive_classes.append(f"a smashing picture, of a #{classes[i]}")
    negative_classes.append(f"a horrible picture, of a #{classes[i]}")

FileNotFoundError: [Errno 2] No such file or directory: 'ImageNet Classes.xlsx'

In [None]:
positive_inputs=processor(text=positive_classes, return_tensors="pt", padding=True).to(device)
with torch.no_grad():
    positive_text_features = model.get_text_features(**positive_inputs)
negative_inputs=processor(text=negative_classes, return_tensors="pt", padding=True).to(device)
with torch.no_grad():
    negative_text_features = model.get_text_features(**negative_inputs)

In [None]:
import numpy as np

positive_prompt_vectors = np.array(positive_text_features)
 
# # Compute the average vector
average_positive_vector = np.mean(positive_prompt_vectors, axis=0)
# 
negative_prompt_vectors = np.array(negative_text_features)
# 
# # Compute the average vector
average_negative_vector = np.mean(negative_prompt_vectors, axis=0)
 
#with open('positive_prompt.pkl', 'wb') as f:
    #pickle.dump(average_positive_vector, f)
#with open('negative_prompt.pkl', 'wb') as f:
    #pickle.dump(average_negative_vector, f)


In [None]:
with open('hotel_positive_prompt.pkl', 'rb') as f:
    average_positive_vector = pickle.load(f)
with open('hotel_negative_prompt.pkl', 'rb') as f:
    average_negative_vector = pickle.load(f)

In [None]:
def predict(img_url):
    #image1 = img_url   Uncomment this and comment the below line if you want to use the streamlit app
    image1 = load_image_PIL(img_url) 
    with torch.no_grad():
        inputs1 = processor(images=image1, return_tensors="pt").to(device)
        image_features1 = model.get_image_features(**inputs1)
    image_vector=image_features1.numpy()
    positive_similarity=cosine_similarity(average_positive_vector,np.transpose(image_vector))
    negative_similarity=cosine_similarity(average_negative_vector,np.transpose(image_vector))
    aesthetic_score=positive_similarity+(-1*negative_similarity)
    return aesthetic_score*1000

In [None]:
import streamlit as st
st.header('Image Aesthetics Scorer')

uploaded_file = st.file_uploader("Choose an image...", type=['png','jpg','jpeg'])
picture_width = st.sidebar.slider('Picture Width', min_value=100, max_value=500)
if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.subheader('Input', divider='rainbow')
    st.image(image, caption='Uploaded Image', width=picture_width)

        # Call your function with the uploaded image
    results = predict(image)
    
    st.subheader('Results', divider='rainbow')
        # Display the results
    st.image(image, caption=results, width=picture_width)