In [3]:
from google.cloud import vision


#def google_vision_check(image_path):
    #client = vision.ImageAnnotatorClient()
    
    #with open(image_path, "rb") as image_file:
        #content = image_file.read()
    
    #image = vision.Image(content=content)
    #response = client.web_detection(image=image)

    #if response.web_detection.full_matching_images:
        #return "Potential copyrighted match found!"
    #return "No copyright issues detected."
        
def google_vision_check(image_path):
    client = vision.ImageAnnotatorClient()
    
    with open(image_path, "rb") as image_file:
        content = image_file.read()
    
    image = vision.Image(content=content)
    return client.web_detection(image=image)


In [4]:
with open("data-img-paths.txt", "r") as file:
    image_paths = file.readlines()
image_paths = [path.strip() for path in image_paths]

first_image_path = image_paths[0]


In [1]:
from google_auth_gateway import authenticate_google
authenticate_google("../secrets/philoch-gvision-3f27c73048ae.json")



In [14]:
res = google_vision_check(first_image_path)


In [16]:
res

web_detection {
  web_entities {
    entity_id: "/g/1218dmz7"
    score: 3.03600025
    description: "Donatella Di Cesare"
  }
  web_entities {
    entity_id: "/m/02h_4f"
    score: 0.783172488
    description: "Biel/Bienne"
  }
  web_entities {
    entity_id: "/m/01ysd3"
    score: 0.720299959
    description: "The Discreet Charm of the Bourgeoisie"
  }
  web_entities {
    entity_id: "/g/11j_6mh3tl"
    score: 0.47205
    description: "Themelis Diamantis"
  }
  web_entities {
    entity_id: "/m/037mh8"
    score: 0.3961
    description: "Philosophy"
  }
  web_entities {
    entity_id: "/g/120qckr2"
    score: 0.3465
    description: "Swiss national exhibition"
  }
  web_entities {
    entity_id: "/g/11b77qrp3l"
    score: 0.3236
    description: "2022"
  }
  web_entities {
    entity_id: "/g/1tctwg6b"
    score: 0.275154024
    description: "Diamantis Thémélis"
  }
  web_entities {
    entity_id: "/t/267m4l1bypk4d"
    score: 0.2452
  }
  web_entities {
    entity_id: "/m/04w_7"
    

In [19]:
import os
from typing import Literal, Sequence, TypedDict

import requests

type TGVisionFeature = Literal[
    vision.Feature.Type.OBJECT_LOCALIZATION,
    vision.Feature.Type.FACE_DETECTION,
    vision.Feature.Type.LANDMARK_DETECTION,
    vision.Feature.Type.LOGO_DETECTION,
    vision.Feature.Type.LABEL_DETECTION,
    vision.Feature.Type.TEXT_DETECTION,
    vision.Feature.Type.DOCUMENT_TEXT_DETECTION,
    vision.Feature.Type.SAFE_SEARCH_DETECTION,
    vision.Feature.Type.IMAGE_PROPERTIES,
    vision.Feature.Type.CROP_HINTS,
    vision.Feature.Type.WEB_DETECTION,
    vision.Feature.Type.PRODUCT_SEARCH,
    vision.Feature.Type.OBJECT_LOCALIZATION,
]

type TGVisionFeatureSequence = Sequence[TGVisionFeature]

class Image(TypedDict):
    path: str
    type: Literal["local", "uri"]

def validate_image(image: Image) -> bool:
    if image["type"] == "uri":
        try:
            response = requests.head(image["path"])
            return response.status_code >= 200 and response.status_code < 300
        except Exception as e:
            print(f"Error checking image URI: {e}")
            return False

    if image["type"] == "local":
        if os.path.exists(image["path"]):
            return True
    else:
        print(f"Invalid image type. Expected 'local' or 'uri', got '{image['type']}'")
        return False


def analyze_image(
    image: Image,
    feature_types: TGVisionFeatureSequence,
) -> vision.AnnotateImageResponse | Literal[False]:

    if not validate_image(image):
        print(f"Invalid image: {image}")
        return False

    client = vision.ImageAnnotatorClient()

    google_image = vision.Image()

    match image["type"]:
        case "local":
            with open(image["path"], "rb") as image_file:
                content = image_file.read()
            google_image.content = content
        case "uri":
            google_image.source.image_uri = image["path"]
        case _:
            print(f"Invalid image type: {image['type']}")
            return False

    features = [vision.Feature(type_=feature_type) for feature_type in feature_types]
    request = vision.AnnotateImageRequest(image=google_image, features=features)

    response = client.annotate_image(request=request)

    return response

In [None]:
with open("data-img-paths.txt", "r") as file:
    image_paths = file.readlines()
image_paths = [path.strip() for path in image_paths]

first_image_path = image_paths[0]

In [15]:
features = [
    vision.Feature.Type.WEB_DETECTION
]

In [25]:
response = analyze_image(
    image={
        "path": "https://assets.philosophie.ch/2023-10-15-luporini.jpg",
        "type": "uri",
    },
    feature_types=features,
)
    

In [28]:
response.web_detection.full_matching_images

[url: "https://funtime.ge/files/2021/11/15/5835/ratom-gadaikhara-pizis-koshki_o_h.jpeg"
, url: "http://hopa.am/uploads/images/tours/slider/92116_Pisa-01-1024x512.jpg"
, url: "https://www.noleggiare.it/wp-content/uploads/2018/07/pisa.jpg"
, url: "https://i0.wp.com/journo.com.tr/wp-content/uploads/2017/09/pisa.jpg?ssl=1"
, url: "https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=1254844987896866"
, url: "https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=1796778673975172"
, url: "https://miro.medium.com/v2/resize:fit:1400/1*JTAYf-AuTzQqRAnyabuJeQ.jpeg"
, url: "https://myone513.wordpress.com/wp-content/uploads/2017/05/pisa.jpg"
, url: "http://duhocblueocean.vn//uploads/duhoc/TI%E1%BA%BET%20KI%E1%BB%86M%20CHI%20PH%C3%8D%20V%E1%BB%9AI%20H%E1%BB%8CC%20B%E1%BB%94NG%20DU%20H%E1%BB%8CC%20%C3%9D%20T%E1%BA%A0I%20%C4%90%E1%BA%A0I%20H%E1%BB%8CC%20PISA.jpg"
, url: "https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=2600630653525744"
]