Skip to content

Commit bbbecf6

Browse files
committed
[breaking] improved DB lookups.
1 parent e02b78c commit bbbecf6

File tree

6 files changed

+60
-70
lines changed

6 files changed

+60
-70
lines changed

README.md

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,30 @@ Queries the [MusicBrainz](https://musicbrainz.org/) API and fetches data from li
2121

2222
## Configuration
2323

24-
This application uses Spring Boot which allows for easy handling of configurations. The following can either be passed
25-
as command line arguments (e.g. `--musicbrainz-enricher.host=foo`), or in a file called `application.properties` in the
26-
current working directory (e.g. containing `musicbrainz-enricher.host=foo`).
24+
This application uses Spring Boot which allows for easy handling of configurations.
25+
See [the Spring Boot documentation](https://docs.spring.io/spring-boot/docs/current/reference/html/howto.html#howto.properties-and-configuration.external-properties-location)
26+
for details.
2727

2828
- `musicbrainz-enricher.host` (Either "test.musicbrainz.org" or "musicbrainz.org")
2929
- Credentials
3030
- Musicbrainz
3131
- `musicbrainz-enricher.musicbrainz.username`
3232
- `musicbrainz-enricher.musicbrainz.password`
33-
- Discogs API (can be left empty to use (slower) unauthenticated discogs API access)
33+
- [Discogs API](https://www.discogs.com/developers/)(can be left empty to use (slower) unauthenticated discogs API
34+
access)
3435
- `musicbrainz-enricher.discogs.token`
35-
- Spotify API (can be left empty to disable spotify API access)
36+
- [Spotify API](https://developer.spotify.com/documentation/web-api) (can be left empty to disable spotify API
37+
access)
3638
- `musicbrainz-enricher.spotify.client-id`
3739
- `musicbrainz-enricher.spotify.client-secret`
3840

3941
## Usage
4042

41-
Before starting, set up <https://github.com/metabrainz/musicbrainz-docker> locally with the database port open.
43+
Before starting, set up a copy of the MusicBrainz database using <https://github.com/metabrainz/musicbrainz-docker>
44+
locally with the database port open.
4245

43-
This tool can run in auto-query or single mode. Auto-query mode will enrich every entity from the musicbrainz database.
44-
Single mode takes a musicbrainz MBID and will enrich the matching entity.
46+
This tool can run in auto-query or single mode. Auto-query mode will enrich every entity from the MusicBrainz database.
47+
Single mode takes a MusicBrainz MBID and will enrich the matching entity.
4548

4649
Auto-query mode:
4750
`java -jar musicbrainz-enricher*.jar 'release'`.
@@ -51,5 +54,5 @@ Single mode:
5154

5255
### History Storage
5356

54-
This application will remember entities checked already and will only re-check them after duration `n` days, where `n`
55-
defaults to 90 days. In order to reset this, truncate the table `history_entry` in the schema `musicbrainz_enricher`.
57+
The application will remember entities checked already and will not re-check them. In order to reset this, truncate the
58+
`*_history_entry` tables in the schema `musicbrainz_enricher`.

src/main/java/dev/rilling/musicbrainzenricher/MusicbrainzEnricherService.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,13 @@ public class MusicbrainzEnricherService {
3131

3232
public void runInAutoQueryMode(@NotNull DataType dataType) {
3333
switch (dataType) {
34-
case RELEASE -> musicbrainzAutoQueryService.autoQueryReleasesWithRelationships(mbid -> executeEnrichment(
34+
case RELEASE -> musicbrainzAutoQueryService.autoQueryReleases(mbid -> executeEnrichment(
3535
dataType,
3636
mbid,
3737
findFittingEnrichmentService(dataType)));
38-
case RELEASE_GROUP ->
39-
musicbrainzAutoQueryService.autoQueryReleaseGroupsWithRelationships(mbid -> executeEnrichment(dataType,
40-
mbid,
41-
findFittingEnrichmentService(dataType)));
38+
case RELEASE_GROUP -> musicbrainzAutoQueryService.autoQueryReleaseGroups(mbid -> executeEnrichment(dataType,
39+
mbid,
40+
findFittingEnrichmentService(dataType)));
4241
}
4342
}
4443

src/main/java/dev/rilling/musicbrainzenricher/api/musicbrainz/MusicbrainzAutoQueryService.java

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,21 @@ public class MusicbrainzAutoQueryService {
2828
this.releaseGroupRepository = releaseGroupRepository;
2929
}
3030

31-
public void autoQueryReleasesWithRelationships(@NotNull Consumer<UUID> mbidConsumer) {
32-
long count = releaseRepository.countNewReleasesWhereRelationshipsExist();
33-
LOGGER.info("Found a total of {} new auto query releases.", count);
34-
35-
long offset = 0;
36-
while (offset < count) {
37-
LOGGER.info("Loading {} releases with offset {}...", LIMIT, offset);
38-
releaseRepository.findNewReleaseMbidWhereRelationshipsExist(offset, LIMIT).forEach(mbidConsumer);
39-
offset += LIMIT;
31+
public void autoQueryReleases(@NotNull Consumer<UUID> mbidConsumer) {
32+
long count = releaseRepository.countFromWorkQueue();
33+
while (count > 0) {
34+
LOGGER.info("{} auto query releases remaining.", count);
35+
releaseRepository.findFromWorkQueue(LIMIT).forEach(mbidConsumer);
36+
count = releaseRepository.countFromWorkQueue();
4037
}
4138
}
4239

43-
public void autoQueryReleaseGroupsWithRelationships(@NotNull Consumer<UUID> mbidConsumer) {
44-
long count = releaseGroupRepository.countNewReleaseGroupsWhereRelationshipsExist();
45-
LOGGER.info("Found a total of {} new auto query release groups.", count);
46-
47-
long offset = 0;
48-
while (offset < count) {
49-
LOGGER.info("Loading {} release groups with offset {}...", LIMIT, offset);
50-
releaseGroupRepository.findNewReleaseGroupsMbidWhereRelationshipsExist(offset, LIMIT).forEach(mbidConsumer);
51-
offset += LIMIT;
40+
public void autoQueryReleaseGroups(@NotNull Consumer<UUID> mbidConsumer) {
41+
long count = releaseGroupRepository.countFromWorkQueue();
42+
while (count > 0) {
43+
LOGGER.info("{} auto query release groups remaining.", count);
44+
releaseGroupRepository.findFromWorkQueue(LIMIT).forEach(mbidConsumer);
45+
count = releaseGroupRepository.countFromWorkQueue();
5246
}
5347
}
5448

src/main/java/dev/rilling/musicbrainzenricher/core/ReleaseGroupRepository.java

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,13 @@ public class ReleaseGroupRepository {
2020
this.jdbcTemplate = jdbcTemplate;
2121
}
2222

23-
public long countNewReleaseGroupsWhereRelationshipsExist() {
24-
return Objects.requireNonNull(jdbcTemplate.queryForObject("""
25-
SELECT COUNT(*) FROM musicbrainz.release_group rg
26-
WHERE rg.id IN
27-
(SELECT lrgu.entity0 FROM musicbrainz.l_release_group_url lrgu)
28-
AND rg.gid NOT IN
29-
(SELECT rghe.release_group_gid FROM musicbrainz_enricher.release_group_history_entry rghe)
30-
""", Long.class));
23+
public long countFromWorkQueue() {
24+
return Objects.requireNonNull(jdbcTemplate.queryForObject("SELECT COUNT(*) FROM musicbrainz_enricher.release_group_work_queue", Long.class));
3125
}
3226

3327
@NotNull
34-
public List<UUID> findNewReleaseGroupsMbidWhereRelationshipsExist(long offset, int limit) {
35-
List<UUID> mbids = jdbcTemplate.query("""
36-
SELECT rg.gid
37-
FROM musicbrainz.release_group rg
38-
WHERE rg.id IN
39-
(SELECT lrgu.entity0 FROM musicbrainz.l_release_group_url lrgu)
40-
AND rg.gid NOT IN
41-
(SELECT rghe.release_group_gid FROM musicbrainz_enricher.release_group_history_entry rghe)
42-
ORDER BY rg.id
43-
OFFSET ? LIMIT ?
44-
""", (rs, rowNum) -> rs.getObject("gid", UUID.class), offset, limit);
28+
public List<UUID> findFromWorkQueue(long offset) {
29+
List<UUID> mbids = jdbcTemplate.query("SELECT rg.gid FROM musicbrainz_enricher.release_group_work_queue rg LIMIT ?", (rs, rowNum) -> rs.getObject("gid", UUID.class), offset);
4530
return Collections.unmodifiableList(mbids);
4631
}
4732
}

src/main/java/dev/rilling/musicbrainzenricher/core/ReleaseRepository.java

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,13 @@ public class ReleaseRepository {
1919
this.jdbcTemplate = jdbcTemplate;
2020
}
2121

22-
public long countNewReleasesWhereRelationshipsExist() {
23-
return Objects.requireNonNull(jdbcTemplate.queryForObject("""
24-
SELECT COUNT(*) FROM musicbrainz.release r
25-
WHERE r.id IN
26-
(SELECT lru.entity0 FROM musicbrainz.l_release_url lru)
27-
AND r.gid NOT IN
28-
(SELECT rhe.release_gid FROM musicbrainz_enricher.release_history_entry rhe)
29-
""", Long.class));
22+
public long countFromWorkQueue() {
23+
return Objects.requireNonNull(jdbcTemplate.queryForObject("SELECT COUNT(*) FROM musicbrainz_enricher.release_work_queue", Long.class));
3024
}
3125

3226
@NotNull
33-
public List<UUID> findNewReleaseMbidWhereRelationshipsExist(long offset, int limit) {
34-
List<UUID> mbids = jdbcTemplate.query("""
35-
SELECT r.gid FROM musicbrainz.release r
36-
WHERE r.id IN
37-
(SELECT lru.entity0 FROM musicbrainz.l_release_url lru)
38-
AND r.gid NOT IN
39-
(SELECT rhe.release_gid FROM musicbrainz_enricher.release_history_entry rhe)
40-
ORDER BY r.id
41-
OFFSET ? LIMIT ?
42-
""", (rs, rowNum) -> rs.getObject("gid", UUID.class), offset, limit);
27+
public List<UUID> findFromWorkQueue(int limit) {
28+
List<UUID> mbids = jdbcTemplate.query("SELECT r.gid FROM musicbrainz_enricher.release_work_queue r LIMIT ?", (rs, rowNum) -> rs.getObject("gid", UUID.class), limit);
4329
return Collections.unmodifiableList(mbids);
4430
}
4531
}

src/main/resources/db/migration/V1_0__create_history_schema.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,31 @@ CREATE TABLE musicbrainz_enricher.release_history_entry
44
ON DELETE CASCADE
55
);
66

7+
CREATE MATERIALIZED VIEW musicbrainz_enricher.release_with_relationships AS
8+
SELECT *
9+
FROM musicbrainz.release r
10+
WHERE r.id IN
11+
(SELECT lru.entity0 FROM musicbrainz.l_release_url lru);
12+
13+
CREATE VIEW musicbrainz_enricher.release_work_queue AS
14+
SELECT *
15+
FROM musicbrainz_enricher.release_with_relationships r
16+
WHERE r.gid NOT IN (SELECT rhe.release_gid FROM musicbrainz_enricher.release_history_entry rhe);
17+
18+
719
CREATE TABLE musicbrainz_enricher.release_group_history_entry
820
(
921
release_group_gid UUID PRIMARY KEY NOT NULL REFERENCES musicbrainz.release_group (gid)
1022
ON DELETE CASCADE
1123
);
24+
25+
CREATE MATERIALIZED VIEW musicbrainz_enricher.release_group_with_relationships AS
26+
SELECT *
27+
FROM musicbrainz.release_group rg
28+
WHERE rg.id IN
29+
(SELECT lrgu.entity0 FROM musicbrainz.l_release_group_url lrgu);
30+
31+
CREATE VIEW musicbrainz_enricher.release_group_work_queue AS
32+
SELECT *
33+
FROM musicbrainz_enricher.release_group_with_relationships rg
34+
WHERE rg.gid NOT IN (SELECT rghe.release_group_gid FROM musicbrainz_enricher.release_group_history_entry rghe);

0 commit comments

Comments
 (0)