Merge branch 'LibrePhotos:dev' into dev

LibrePhotos · Jun 8, 2024 · 9b78e1d · 9b78e1d
2 parents aa1ade5 + 3df1547
commit 9b78e1d
Show file tree

Hide file tree

Showing 65 changed files with 1,299 additions and 672 deletions.
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
+      - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
       - name: Run tests
         run: echo "to-do"
 
@@ -26,7 +26,7 @@ jobs:
 
     steps:
       - name: Repository Dispatch
-        uses: peter-evans/repository-dispatch@v2
+        uses: peter-evans/repository-dispatch@v3
         with:
           token: ${{ secrets.REPO_ACCESS_TOKEN }}
           repository: librephotos/librephotos-docker

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -6,6 +6,6 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
+      - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
       - name: Run pre-commit check
-        uses: pre-commit/action@v3.0.0
+        uses: pre-commit/action@v3.0.1
diff --git a/.gitignore b/.gitignore
@@ -164,8 +164,6 @@ densecap/data/models/densecap/densecap-pretrained-vgg16.t7
 thumbnails
 media
 samplephotos
-tags
-api/places365/model/
 Conv2d.patch
 Linear.patch
 Sequential.patch
@@ -180,7 +178,6 @@ api/im2txt/models/
 api/im2txt/png/
 *.ipynb
 api/im2txt/*.tar.gz
-api/places365/*.tar.gz
 *.db
 media*
 protected_media

diff --git a/api/admin.py b/api/admin.py
@@ -1,4 +1,5 @@
 from django.contrib import admin
+from django_q.tasks import AsyncTask
 
 from .models import (
     AlbumAuto,
@@ -16,47 +17,54 @@
 )
 
 
+def deduplicate_faces_function(queryset):
+    for photo in queryset:
+        # Get all faces in the photo
+        faces = Face.objects.filter(photo=photo)
+        # Check if there are any faces which have similar bounding boxes
+        for face in faces:
+            margin = int((face.location_right - face.location_left) * 0.05)
+            similar_faces = Face.objects.filter(
+                photo=photo,
+                location_top__lte=face.location_top + margin,
+                location_top__gte=face.location_top - margin,
+                location_right__lte=face.location_right + margin,
+                location_right__gte=face.location_right - margin,
+                location_bottom__lte=face.location_bottom + margin,
+                location_bottom__gte=face.location_bottom - margin,
+                location_left__lte=face.location_left + margin,
+                location_left__gte=face.location_left - margin,
+            )
+            if len(similar_faces) > 1:
+                # Divide between faces with a person label and faces without
+                faces_with_person_label = []
+                faces_without_person_label = []
+                for similar_face in similar_faces:
+                    if similar_face.person:
+                        faces_with_person_label.append(similar_face)
+                    else:
+                        faces_without_person_label.append(similar_face)
+                # If there are faces with a person label, keep the first one and delete the rest
+                for similar_face in faces_with_person_label[1:]:
+                    similar_face.delete()
+                # If there are faces with a person label, delete all of them
+                if len(faces_with_person_label) > 0:
+                    for similar_face in faces_without_person_label:
+                        similar_face.delete()
+                # Otherwise, keep the first face and delete the rest
+                else:
+                    for similar_face in faces_without_person_label[1:]:
+                        similar_face.delete()
+
+
 class FaceDeduplication(admin.ModelAdmin):
     actions = ["deduplicate_faces"]
 
     def deduplicate_faces(self, request, queryset):
-        for photo in queryset:
-            # Get all faces in the photo
-            faces = Face.objects.filter(photo=photo)
-            # Check if there are any faces which have similar bounding boxes
-            for face in faces:
-                margin = int((face.location_right - face.location_left) * 0.05)
-                similar_faces = Face.objects.filter(
-                    photo=self,
-                    location_top__lte=face.location_top + margin,
-                    location_top__gte=face.location_top - margin,
-                    location_right__lte=face.location_right + margin,
-                    location_right__gte=face.location_right - margin,
-                    location_bottom__lte=face.location_bottom + margin,
-                    location_bottom__gte=face.location_bottom - margin,
-                    location_left__lte=face.location_left + margin,
-                    location_left__gte=face.location_left - margin,
-                )
-                if len(similar_faces) > 1:
-                    # Divide between faces with a person label and faces without
-                    faces_with_person_label = []
-                    faces_without_person_label = []
-                    for similar_face in similar_faces:
-                        if similar_face.person:
-                            faces_with_person_label.append(similar_face)
-                        else:
-                            faces_without_person_label.append(similar_face)
-                    # If there are faces with a person label, keep the first one and delete the rest
-                    for similar_face in faces_with_person_label[1:]:
-                        similar_face.delete()
-                    # If there are faces with a person label, delete all of them
-                    if len(faces_with_person_label) > 0:
-                        for similar_face in faces_without_person_label:
-                            similar_face.delete()
-                    # Otherwise, keep the first face and delete the rest
-                    else:
-                        for similar_face in faces_without_person_label[1:]:
-                            similar_face.delete()
+        AsyncTask(
+            deduplicate_faces_function,
+            queryset=queryset,
+        ).run()
 
 
 # Register your models here.

diff --git a/api/batch_jobs.py b/api/batch_jobs.py
@@ -5,37 +5,24 @@
 import pytz
 from constance import config as site_config
 from django.db.models import Q
-from django_q.tasks import AsyncTask
 
 import api.util as util
 from api.image_similarity import build_image_similarity_index
-from api.ml_models import download_models
 from api.models.long_running_job import LongRunningJob
 from api.models.photo import Photo
 from api.semantic_search import create_clip_embeddings
 
 
-def create_batch_job(job_type, user):
+def batch_calculate_clip_embedding(user):
+    import torch
+
     job_id = uuid.uuid4()
     lrj = LongRunningJob.objects.create(
         started_by=user,
         job_id=job_id,
         queued_at=datetime.now().replace(tzinfo=pytz.utc),
-        job_type=job_type,
+        job_type=LongRunningJob.JOB_CALCULATE_CLIP_EMBEDDINGS,
     )
-
-    if job_type == LongRunningJob.JOB_CALCULATE_CLIP_EMBEDDINGS:
-        AsyncTask(batch_calculate_clip_embedding, job_id, user).run()
-    if job_type == LongRunningJob.JOB_DOWNLOAD_MODELS:
-        AsyncTask(download_models, job_id).run()
-
-    lrj.save()
-
-
-def batch_calculate_clip_embedding(job_id, user):
-    import torch
-
-    lrj = LongRunningJob.objects.get(job_id=job_id)
     lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)
 
     count = Photo.objects.filter(

diff --git a/api/directory_watcher.py b/api/directory_watcher.py
@@ -12,9 +12,8 @@
 from django.db.models import Q, QuerySet
 from django_q.tasks import AsyncTask
 
-import api.models.album_thing
 import api.util as util
-from api.batch_jobs import create_batch_job
+from api.batch_jobs import batch_calculate_clip_embedding
 from api.face_classify import cluster_all_faces
 from api.models import File, LongRunningJob, Photo
 from api.models.file import (
@@ -25,7 +24,6 @@
     is_valid_media,
     is_video,
 )
-from api.places365.places365 import place365_instance
 
 
 def should_skip(path):
@@ -132,15 +130,15 @@ def create_new_image(user, path) -> Optional[Photo]:
         return None
 
 
-def handle_new_image(user, path, job_id, photo):
+def handle_new_image(user, path, job_id, photo=None):
     """
     Handles the creation and all the processing of the photo.
 
     Args:
         user: The owner of the photo.
         path: The file path of the image.
-        job_id: The long running job id, which gets updated when the task runs
-        photo: An optional paramater, where you can input a photo instead of creating a new one. Used for uploading.
+        job_id: The long-running job id, which gets updated when the task runs
+        photo: An optional parameter, where you can input a photo instead of creating a new one. Used for uploading.
 
     Note:
         This function is used, when uploading a picture, because rescanning does not perform machine learning tasks
@@ -150,6 +148,10 @@ def handle_new_image(user, path, job_id, photo):
         start = datetime.datetime.now()
         if photo is None:
             photo = create_new_image(user, path)
+            elapsed = (datetime.datetime.now() - start).total_seconds()
+            util.logger.info(
+                "job {}: save image: {}, elapsed: {}".format(job_id, path, elapsed)
+            )
         if photo:
             util.logger.info("job {}: handling image {}".format(job_id, path))
             photo._generate_thumbnail(True)
@@ -166,6 +168,13 @@ def handle_new_image(user, path, job_id, photo):
                     job_id, path, elapsed
                 )
             )
+            photo._extract_exif_data(True)
+            elapsed = (datetime.datetime.now() - start).total_seconds()
+            util.logger.info(
+                "job {}: extract exif data: {}, elapsed: {}".format(
+                    job_id, path, elapsed
+                )
+            )
             photo._generate_captions(False)
             elapsed = (datetime.datetime.now() - start).total_seconds()
             util.logger.info(
@@ -192,36 +201,26 @@ def handle_new_image(user, path, job_id, photo):
                     job_id, path, elapsed
                 )
             )
-            photo._extract_exif_data(True)
-            elapsed = (datetime.datetime.now() - start).total_seconds()
-            util.logger.info(
-                "job {}: extract exif data: {}, elapsed: {}".format(
-                    job_id, path, elapsed
-                )
-            )
-
-            photo._extract_rating(True)
+            photo._extract_faces()
             elapsed = (datetime.datetime.now() - start).total_seconds()
             util.logger.info(
-                "job {}: extract rating: {}, elapsed: {}".format(job_id, path, elapsed)
+                "job {}: extract faces: {}, elapsed: {}".format(job_id, path, elapsed)
             )
-            photo._extract_video_length(True)
+            photo._get_dominant_color()
             elapsed = (datetime.datetime.now() - start).total_seconds()
             util.logger.info(
-                "job {}: extract video length: {}, elapsed: {}".format(
+                "job {}: get dominant color: {}, elapsed: {}".format(
                     job_id, path, elapsed
                 )
             )
-            photo._extract_faces()
-            elapsed = (datetime.datetime.now() - start).total_seconds()
-            util.logger.info(
-                "job {}: extract faces: {}, elapsed: {}".format(job_id, path, elapsed)
-            )
-            photo._get_dominant_color()
+            photo._recreate_search_captions()
             elapsed = (datetime.datetime.now() - start).total_seconds()
             util.logger.info(
-                "job {}: image processed: {}, elapsed: {}".format(job_id, path, elapsed)
+                "job {}: search caption recreated: {}, elapsed: {}".format(
+                    job_id, path, elapsed
+                )
             )
+
     except Exception as e:
         try:
             util.logger.exception(
@@ -246,9 +245,8 @@ def rescan_image(user, path, job_id):
             photo._extract_exif_data(True)
             photo._extract_date_time_from_exif(True)
             photo._add_location_to_album_dates()
-            photo._extract_rating(True)
-            photo._extract_video_length(True)
             photo._get_dominant_color()
+            photo._recreate_search_captions()
 
     except Exception as e:
         try:
@@ -375,17 +373,17 @@ def scan_photos(user, full_scan, job_id, scan_directory="", scan_files=[]):
         for photo in all:
             photo_scanner(*photo)
 
-        place365_instance.unload()
         util.logger.info("Scanned {} files in : {}".format(files_found, scan_directory))
-        api.models.album_thing.update()
+
         util.logger.info("Finished updating album things")
         exisisting_photos = Photo.objects.filter(owner=user.id).order_by("image_hash")
         paginator = Paginator(exisisting_photos, 5000)
         for page in range(1, paginator.num_pages + 1):
             for existing_photo in paginator.page(page).object_list:
                 existing_photo._check_files()
         util.logger.info("Finished checking paths")
-        create_batch_job(LongRunningJob.JOB_CALCULATE_CLIP_EMBEDDINGS, user)
+        AsyncTask(batch_calculate_clip_embedding, user).run()
+
     except Exception:
         util.logger.exception("An error occurred: ")
         lrj.failed = True

diff --git a/api/filters.py b/api/filters.py
@@ -4,7 +4,6 @@
 
 from django.db.models import Q
 from rest_framework import filters
-from rest_framework.compat import distinct
 
 import api.util as util
 from api.image_similarity import search_similar_embedding
@@ -20,7 +19,8 @@ def filter_queryset(self, request, queryset, view):
             return queryset
 
         orm_lookups = [
-            self.construct_search(str(search_field)) for search_field in search_fields
+            self.construct_search(str(search_field), queryset=queryset)
+            for search_field in search_fields
         ]
 
         if request.user.semantic_search_topk > 0:
@@ -37,7 +37,6 @@ def filter_queryset(self, request, queryset, view):
             )
             elapsed = (datetime.datetime.now() - start).total_seconds()
             util.logger.info("search similar embedding - took %.2f seconds" % (elapsed))
-        base = queryset
         conditions = []
         for search_term in search_terms:
             queries = [Q(**{orm_lookup: search_term}) for orm_lookup in orm_lookups]
@@ -53,5 +52,5 @@ def filter_queryset(self, request, queryset, view):
             # call queryset.distinct() in order to avoid duplicate items
             # in the resulting queryset.
             # We try to avoid this if possible, for performance reasons.
-            queryset = distinct(queryset, base)
+            queryset = queryset.distinct()
         return queryset
diff --git a/api/geocode/parsers/mapbox.py b/api/geocode/parsers/mapbox.py
@@ -3,7 +3,7 @@
 
 def parse(location):
     context = location.raw["context"]
-    center = [location.raw["center"][0], location.raw["center"][1]]
+    center = [location.raw["center"][1], location.raw["center"][0]]
     local_name = location.raw["text"]
     places = [local_name] + [
         i["text"] for i in context if not i["id"].startswith("post")

diff --git a/api/geocode/parsers/photon.py b/api/geocode/parsers/photon.py
@@ -13,8 +13,8 @@ def parse(location):
     ]
     places = [data[prop] for prop in props if prop in data]
     center = [
-        float(location.raw["geometry"]["coordinates"][0]),
         float(location.raw["geometry"]["coordinates"][1]),
+        float(location.raw["geometry"]["coordinates"][0]),
     ]
     return {
         "features": [{"text": place, "center": center} for place in places],

diff --git a/api/image_similarity.py b/api/image_similarity.py
@@ -62,6 +62,10 @@ def search_similar_image(user, photo, threshold=27):
 
 def build_image_similarity_index(user):
     logger.info("building similarity index for user {}".format(user.username))
+    requests.delete(
+        settings.IMAGE_SIMILARITY_SERVER + "/build/",
+        json={"user_id": user.id},
+    )
     start = datetime.now()
     photos = (
         Photo.objects.filter(Q(hidden=False) & Q(owner=user))