Skip to content

Commit

Permalink
Merge branch 'LibrePhotos:dev' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
pimdo committed Jun 8, 2024
2 parents aa1ade5 + 3df1547 commit 9b78e1d
Show file tree
Hide file tree
Showing 65 changed files with 1,299 additions and 672 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
- name: Run tests
run: echo "to-do"

Expand All @@ -26,7 +26,7 @@ jobs:

steps:
- name: Repository Dispatch
uses: peter-evans/repository-dispatch@v2
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
repository: librephotos/librephotos-docker
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
- name: Run pre-commit check
uses: pre-commit/action@v3.0.0
uses: pre-commit/action@v3.0.1
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,6 @@ densecap/data/models/densecap/densecap-pretrained-vgg16.t7
thumbnails
media
samplephotos
tags
api/places365/model/
Conv2d.patch
Linear.patch
Sequential.patch
Expand All @@ -180,7 +178,6 @@ api/im2txt/models/
api/im2txt/png/
*.ipynb
api/im2txt/*.tar.gz
api/places365/*.tar.gz
*.db
media*
protected_media
Expand Down
82 changes: 45 additions & 37 deletions api/admin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from django.contrib import admin
from django_q.tasks import AsyncTask

from .models import (
AlbumAuto,
Expand All @@ -16,47 +17,54 @@
)


def deduplicate_faces_function(queryset):
for photo in queryset:
# Get all faces in the photo
faces = Face.objects.filter(photo=photo)
# Check if there are any faces which have similar bounding boxes
for face in faces:
margin = int((face.location_right - face.location_left) * 0.05)
similar_faces = Face.objects.filter(
photo=photo,
location_top__lte=face.location_top + margin,
location_top__gte=face.location_top - margin,
location_right__lte=face.location_right + margin,
location_right__gte=face.location_right - margin,
location_bottom__lte=face.location_bottom + margin,
location_bottom__gte=face.location_bottom - margin,
location_left__lte=face.location_left + margin,
location_left__gte=face.location_left - margin,
)
if len(similar_faces) > 1:
# Divide between faces with a person label and faces without
faces_with_person_label = []
faces_without_person_label = []
for similar_face in similar_faces:
if similar_face.person:
faces_with_person_label.append(similar_face)
else:
faces_without_person_label.append(similar_face)
# If there are faces with a person label, keep the first one and delete the rest
for similar_face in faces_with_person_label[1:]:
similar_face.delete()
# If there are faces with a person label, delete all of them
if len(faces_with_person_label) > 0:
for similar_face in faces_without_person_label:
similar_face.delete()
# Otherwise, keep the first face and delete the rest
else:
for similar_face in faces_without_person_label[1:]:
similar_face.delete()


class FaceDeduplication(admin.ModelAdmin):
actions = ["deduplicate_faces"]

def deduplicate_faces(self, request, queryset):
for photo in queryset:
# Get all faces in the photo
faces = Face.objects.filter(photo=photo)
# Check if there are any faces which have similar bounding boxes
for face in faces:
margin = int((face.location_right - face.location_left) * 0.05)
similar_faces = Face.objects.filter(
photo=self,
location_top__lte=face.location_top + margin,
location_top__gte=face.location_top - margin,
location_right__lte=face.location_right + margin,
location_right__gte=face.location_right - margin,
location_bottom__lte=face.location_bottom + margin,
location_bottom__gte=face.location_bottom - margin,
location_left__lte=face.location_left + margin,
location_left__gte=face.location_left - margin,
)
if len(similar_faces) > 1:
# Divide between faces with a person label and faces without
faces_with_person_label = []
faces_without_person_label = []
for similar_face in similar_faces:
if similar_face.person:
faces_with_person_label.append(similar_face)
else:
faces_without_person_label.append(similar_face)
# If there are faces with a person label, keep the first one and delete the rest
for similar_face in faces_with_person_label[1:]:
similar_face.delete()
# If there are faces with a person label, delete all of them
if len(faces_with_person_label) > 0:
for similar_face in faces_without_person_label:
similar_face.delete()
# Otherwise, keep the first face and delete the rest
else:
for similar_face in faces_without_person_label[1:]:
similar_face.delete()
AsyncTask(
deduplicate_faces_function,
queryset=queryset,
).run()


# Register your models here.
Expand Down
21 changes: 4 additions & 17 deletions api/batch_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,24 @@
import pytz
from constance import config as site_config
from django.db.models import Q
from django_q.tasks import AsyncTask

import api.util as util
from api.image_similarity import build_image_similarity_index
from api.ml_models import download_models
from api.models.long_running_job import LongRunningJob
from api.models.photo import Photo
from api.semantic_search import create_clip_embeddings


def create_batch_job(job_type, user):
def batch_calculate_clip_embedding(user):
import torch

job_id = uuid.uuid4()
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.now().replace(tzinfo=pytz.utc),
job_type=job_type,
job_type=LongRunningJob.JOB_CALCULATE_CLIP_EMBEDDINGS,
)

if job_type == LongRunningJob.JOB_CALCULATE_CLIP_EMBEDDINGS:
AsyncTask(batch_calculate_clip_embedding, job_id, user).run()
if job_type == LongRunningJob.JOB_DOWNLOAD_MODELS:
AsyncTask(download_models, job_id).run()

lrj.save()


def batch_calculate_clip_embedding(job_id, user):
import torch

lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)

count = Photo.objects.filter(
Expand Down
58 changes: 28 additions & 30 deletions api/directory_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
from django.db.models import Q, QuerySet
from django_q.tasks import AsyncTask

import api.models.album_thing
import api.util as util
from api.batch_jobs import create_batch_job
from api.batch_jobs import batch_calculate_clip_embedding
from api.face_classify import cluster_all_faces
from api.models import File, LongRunningJob, Photo
from api.models.file import (
Expand All @@ -25,7 +24,6 @@
is_valid_media,
is_video,
)
from api.places365.places365 import place365_instance


def should_skip(path):
Expand Down Expand Up @@ -132,15 +130,15 @@ def create_new_image(user, path) -> Optional[Photo]:
return None


def handle_new_image(user, path, job_id, photo):
def handle_new_image(user, path, job_id, photo=None):
"""
Handles the creation and all the processing of the photo.
Args:
user: The owner of the photo.
path: The file path of the image.
job_id: The long running job id, which gets updated when the task runs
photo: An optional paramater, where you can input a photo instead of creating a new one. Used for uploading.
job_id: The long-running job id, which gets updated when the task runs
photo: An optional parameter, where you can input a photo instead of creating a new one. Used for uploading.
Note:
This function is used, when uploading a picture, because rescanning does not perform machine learning tasks
Expand All @@ -150,6 +148,10 @@ def handle_new_image(user, path, job_id, photo):
start = datetime.datetime.now()
if photo is None:
photo = create_new_image(user, path)
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
"job {}: save image: {}, elapsed: {}".format(job_id, path, elapsed)
)
if photo:
util.logger.info("job {}: handling image {}".format(job_id, path))
photo._generate_thumbnail(True)
Expand All @@ -166,6 +168,13 @@ def handle_new_image(user, path, job_id, photo):
job_id, path, elapsed
)
)
photo._extract_exif_data(True)
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
"job {}: extract exif data: {}, elapsed: {}".format(
job_id, path, elapsed
)
)
photo._generate_captions(False)
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
Expand All @@ -192,36 +201,26 @@ def handle_new_image(user, path, job_id, photo):
job_id, path, elapsed
)
)
photo._extract_exif_data(True)
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
"job {}: extract exif data: {}, elapsed: {}".format(
job_id, path, elapsed
)
)

photo._extract_rating(True)
photo._extract_faces()
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
"job {}: extract rating: {}, elapsed: {}".format(job_id, path, elapsed)
"job {}: extract faces: {}, elapsed: {}".format(job_id, path, elapsed)
)
photo._extract_video_length(True)
photo._get_dominant_color()
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
"job {}: extract video length: {}, elapsed: {}".format(
"job {}: get dominant color: {}, elapsed: {}".format(
job_id, path, elapsed
)
)
photo._extract_faces()
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
"job {}: extract faces: {}, elapsed: {}".format(job_id, path, elapsed)
)
photo._get_dominant_color()
photo._recreate_search_captions()
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info(
"job {}: image processed: {}, elapsed: {}".format(job_id, path, elapsed)
"job {}: search caption recreated: {}, elapsed: {}".format(
job_id, path, elapsed
)
)

except Exception as e:
try:
util.logger.exception(
Expand All @@ -246,9 +245,8 @@ def rescan_image(user, path, job_id):
photo._extract_exif_data(True)
photo._extract_date_time_from_exif(True)
photo._add_location_to_album_dates()
photo._extract_rating(True)
photo._extract_video_length(True)
photo._get_dominant_color()
photo._recreate_search_captions()

except Exception as e:
try:
Expand Down Expand Up @@ -375,17 +373,17 @@ def scan_photos(user, full_scan, job_id, scan_directory="", scan_files=[]):
for photo in all:
photo_scanner(*photo)

place365_instance.unload()
util.logger.info("Scanned {} files in : {}".format(files_found, scan_directory))
api.models.album_thing.update()

util.logger.info("Finished updating album things")
exisisting_photos = Photo.objects.filter(owner=user.id).order_by("image_hash")
paginator = Paginator(exisisting_photos, 5000)
for page in range(1, paginator.num_pages + 1):
for existing_photo in paginator.page(page).object_list:
existing_photo._check_files()
util.logger.info("Finished checking paths")
create_batch_job(LongRunningJob.JOB_CALCULATE_CLIP_EMBEDDINGS, user)
AsyncTask(batch_calculate_clip_embedding, user).run()

except Exception:
util.logger.exception("An error occurred: ")
lrj.failed = True
Expand Down
7 changes: 3 additions & 4 deletions api/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from django.db.models import Q
from rest_framework import filters
from rest_framework.compat import distinct

import api.util as util
from api.image_similarity import search_similar_embedding
Expand All @@ -20,7 +19,8 @@ def filter_queryset(self, request, queryset, view):
return queryset

orm_lookups = [
self.construct_search(str(search_field)) for search_field in search_fields
self.construct_search(str(search_field), queryset=queryset)
for search_field in search_fields
]

if request.user.semantic_search_topk > 0:
Expand All @@ -37,7 +37,6 @@ def filter_queryset(self, request, queryset, view):
)
elapsed = (datetime.datetime.now() - start).total_seconds()
util.logger.info("search similar embedding - took %.2f seconds" % (elapsed))
base = queryset
conditions = []
for search_term in search_terms:
queries = [Q(**{orm_lookup: search_term}) for orm_lookup in orm_lookups]
Expand All @@ -53,5 +52,5 @@ def filter_queryset(self, request, queryset, view):
# call queryset.distinct() in order to avoid duplicate items
# in the resulting queryset.
# We try to avoid this if possible, for performance reasons.
queryset = distinct(queryset, base)
queryset = queryset.distinct()
return queryset
2 changes: 1 addition & 1 deletion api/geocode/parsers/mapbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

def parse(location):
context = location.raw["context"]
center = [location.raw["center"][0], location.raw["center"][1]]
center = [location.raw["center"][1], location.raw["center"][0]]
local_name = location.raw["text"]
places = [local_name] + [
i["text"] for i in context if not i["id"].startswith("post")
Expand Down
2 changes: 1 addition & 1 deletion api/geocode/parsers/photon.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ def parse(location):
]
places = [data[prop] for prop in props if prop in data]
center = [
float(location.raw["geometry"]["coordinates"][0]),
float(location.raw["geometry"]["coordinates"][1]),
float(location.raw["geometry"]["coordinates"][0]),
]
return {
"features": [{"text": place, "center": center} for place in places],
Expand Down
4 changes: 4 additions & 0 deletions api/image_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def search_similar_image(user, photo, threshold=27):

def build_image_similarity_index(user):
logger.info("building similarity index for user {}".format(user.username))
requests.delete(
settings.IMAGE_SIMILARITY_SERVER + "/build/",
json={"user_id": user.id},
)
start = datetime.now()
photos = (
Photo.objects.filter(Q(hidden=False) & Q(owner=user))
Expand Down
Loading

0 comments on commit 9b78e1d

Please sign in to comment.