From 3bdc563ed3df3c90a87863b9f6cae79c400372a7 Mon Sep 17 00:00:00 2001 From: Valerian G Date: Sun, 18 Sep 2022 19:33:02 +0100 Subject: [PATCH] add matcher impl for tv show scanner --- Cargo.lock | 71 +++++- dim/Cargo.toml | 4 +- dim/src/errors.rs | 4 +- dim/src/external/filename.rs | 34 ++- dim/src/external/mock.rs | 3 + dim/src/external/mod.rs | 9 +- dim/src/external/tmdb/metadata_provider.rs | 76 +++--- dim/src/external/tmdb/mod.rs | 49 ++-- dim/src/external/tmdb/raw_client.rs | 136 +++++------ dim/src/lib.rs | 2 + dim/src/scanner/error.rs | 2 +- dim/src/scanner/mediafile.rs | 2 +- dim/src/scanner/mod.rs | 15 +- dim/src/scanner/movie.rs | 25 +- dim/src/scanner/tv_show.rs | 256 +++++++++++++++++++-- 15 files changed, 526 insertions(+), 162 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b49ee41f..f1f709bdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,6 +63,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "anitomy" version = "0.1.2" @@ -90,6 +105,19 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "async-compression" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345fd392ab01f746c717b1357165b76f0b67a60192007b234058c9045fdcf695" +dependencies = [ + "brotli", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-recursion" version = "0.3.2" @@ -198,6 +226,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "3.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bstr" version = "0.2.17" @@ -646,6 +695,7 @@ dependencies = [ "fuzzy-matcher", "governor", "http", + "hyper", "image", "itertools", "lazy_static", @@ -658,6 +708,7 @@ dependencies = [ "priority-queue", "rand 0.7.3", "reqwest", + "retry-block", "rust-embed", "serde", "serde_derive", @@ -1196,9 +1247,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" [[package]] name = "hyper" -version = "0.14.19" +version = "0.14.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42dc3c131584288d375f2d07f822b0cb012d8c6fb899a5b9fdb3cb7eb9b6004f" +checksum = "02c929dc5c39e335a03c405292728118860721b10190d98c2a0f0efd5baafbac" dependencies = [ "bytes 1.1.0", "futures-channel", @@ -2198,6 +2249,7 @@ version = "0.11.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b75aa69a3f06bbcc66ede33af2af253c6f7a86b1ca0033f60c580a27074fbf92" dependencies = [ + "async-compression", "base64", "bytes 1.1.0", "encoding_rs", @@ -2222,6 +2274,7 @@ dependencies = [ "serde_urlencoded", "tokio", "tokio-rustls 0.23.4", + "tokio-util 0.7.3", "tower-service", "url", "wasm-bindgen", @@ -2231,6 +2284,20 @@ dependencies = [ "winreg", ] +[[package]] +name = "retry-block" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "095a7c37fd304fa4fef479da6293cd8dcd8ddb7eb26a03c28cc33f949362a379" +dependencies = [ + "async-trait", + "futures-util", + "rand 0.8.5", + "serde", + "tokio", + "tokio-stream", +] + [[package]] name = "ring" version = "0.16.20" diff --git a/dim/Cargo.toml b/dim/Cargo.toml index a62f2db56..04140dd0f 100644 --- a/dim/Cargo.toml +++ b/dim/Cargo.toml @@ -33,7 +33,7 @@ rand = { version = "0.7.3", features = ["small_rng"] } chrono = { version = "0.4.19", features = ["serde"] } rust-embed = "^5.9.0" torrent-name-parser = "0.6.3" -reqwest = { version = "0.11.0", features = ["json", "rustls-tls"], default-features = false } +reqwest = { version = "0.11.0", features = ["json", "rustls-tls", "brotli"], default-features = false } notify = "4.0.17" cfg-if = "1.0.0" once_cell = "1.8.0" @@ -72,6 +72,8 @@ itertools = "0.10.3" # FIXME: Remove when we get rid of xtra_proc new_xtra = { package = "xtra", git = "https://github.com/Restioson/xtra", features = ["with-tokio-1"] } url = "2.2.2" +retry-block = "1.0.0" +hyper = "0.14.20" [build-dependencies] fs_extra = "1.1.0" diff --git a/dim/src/errors.rs b/dim/src/errors.rs index a5f4c5503..84ea17d81 100644 --- a/dim/src/errors.rs +++ b/dim/src/errors.rs @@ -103,8 +103,8 @@ impl warp::reject::Reject for DimError {} impl warp::Reply for DimError { fn into_response(self) -> warp::reply::Response { - //| Self::ScannerError(_) - //| Self::TmdbIdSearchError(_) => StatusCode::NOT_FOUND, + //| Self::ScannerError(_) + //| Self::TmdbIdSearchError(_) => StatusCode::NOT_FOUND, let status = match self { Self::LibraryNotFound | Self::NoneError diff --git a/dim/src/external/filename.rs b/dim/src/external/filename.rs index 97dbcfd0e..94ea1d6fe 100644 --- a/dim/src/external/filename.rs +++ b/dim/src/external/filename.rs @@ -38,12 +38,44 @@ impl FilenameMetadata for Anitomy { year: metadata .get(ElementCategory::AnimeYear) .and_then(|x| x.parse().ok()), + // If season isnt specified we assume season 1 here. season: metadata .get(ElementCategory::AnimeSeason) - .and_then(|x| x.parse().ok()), + .and_then(|x| x.parse().ok()) + .or(Some(1)), episode: metadata .get(ElementCategory::EpisodeNumber) .and_then(|x| x.parse().ok()), }) } } + +/// A special filename metadata extractor that combines torrent_name_parser and anitomy which in +/// some cases is necessary. TNP is really good at extracting show titles but not season and +/// episode numbers. Anitomy excels at this. Here we combine the title extracted by TPN and the +/// season and episode number extracted by Anitomy. +pub struct CombinedExtractor; + +impl FilenameMetadata for CombinedExtractor { + fn from_str(s: &str) -> Option { + let metadata_tnp = TorrentMetadata::from(s).ok()?; + let metadata_anitomy = match Anitomy::new().parse(s) { + Ok(v) | Err(v) => v, + }; + + Some(Metadata { + name: metadata_tnp.title().to_owned(), + year: metadata_tnp.year().map(|x| x as i64), + // If season isnt specified we assume season 1 here as some releases only have a + // episode number and no season number. + season: metadata_anitomy + .get(ElementCategory::AnimeSeason) + .and_then(|x| x.parse().ok()) + .or(Some(1)), + episode: metadata_anitomy + .get(ElementCategory::EpisodeNumber) + .and_then(|x| x.parse().ok()), + }) + + } +} diff --git a/dim/src/external/mock.rs b/dim/src/external/mock.rs index 2ac1c9f84..18a2798ac 100644 --- a/dim/src/external/mock.rs +++ b/dim/src/external/mock.rs @@ -1,6 +1,7 @@ use super::ExternalActor; use super::ExternalMedia; use super::ExternalQuery; +use super::IntoQueryShow; use super::Result; #[derive(Debug, Clone, Copy)] @@ -23,3 +24,5 @@ impl ExternalQuery for MockProvider { unimplemented!() } } + +impl IntoQueryShow for MockProvider {} diff --git a/dim/src/external/mod.rs b/dim/src/external/mod.rs index 28841d78a..3d230dc81 100644 --- a/dim/src/external/mod.rs +++ b/dim/src/external/mod.rs @@ -129,7 +129,7 @@ impl std::fmt::Display for MediaSearchType { /// Trait that must be implemented by external metadata agents which allows the scanners to query /// for data. #[async_trait] -pub trait ExternalQuery: Debug + Send + Sync { +pub trait ExternalQuery: IntoQueryShow + Debug + Send + Sync { /// Search by title and year. This must return a Vec of `ExternalMedia` sorted by the search /// score. async fn search(&self, title: &str, year: Option) -> Result>; @@ -138,11 +138,18 @@ pub trait ExternalQuery: Debug + Send + Sync { async fn search_by_id(&self, external_id: &str) -> Result; /// Get all actors for a media by external id. Actors must be ordered in order of importance. async fn cast(&self, external_id: &str) -> Result>; +} + +pub trait IntoQueryShow { /// Upcast `self` into `ExternalQueryShow`. It is important that providers that can query for /// tv shows, implements this to return `Some(self)`. fn as_query_show<'a>(&'a self) -> Option<&'a dyn ExternalQueryShow> { None } + + fn into_query_show(self: Arc) -> Option> { + None + } } /// Trait must be implemented by all external metadata agents which support querying for tv shows. diff --git a/dim/src/external/tmdb/metadata_provider.rs b/dim/src/external/tmdb/metadata_provider.rs index 4ff2f845c..c3f3376ee 100644 --- a/dim/src/external/tmdb/metadata_provider.rs +++ b/dim/src/external/tmdb/metadata_provider.rs @@ -10,6 +10,7 @@ use std::time::Instant; use async_trait::async_trait; use tokio::sync::broadcast; +use tracing::instrument; use crate::external::{Result as QueryResult, *}; use core::result::Result; @@ -28,7 +29,7 @@ use super::*; /// How long items should be cached for. Defaults to 12 hours. const CACHED_ITEM_TTL: Duration = Duration::from_secs(60 * 60 * 12); /// How many requests we can send per second. -const REQ_QUOTA: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(200) }; +const REQ_QUOTA: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(128) }; type Governor = RateLimiter; @@ -62,6 +63,10 @@ impl TMDBMetadataProvider { pub fn new(api_key: &str) -> Self { let http_client = reqwest::ClientBuilder::new() .user_agent(APP_USER_AGENT) + .brotli(true) + .tcp_keepalive(Some(Duration::from_millis(16_000))) + .tcp_nodelay(true) + .http1_only() .build() .expect("building this client should never fail."); @@ -273,14 +278,22 @@ impl TMDBMetadataProvider { .. }) = media { + let genre_vec = genres.insert(vec![]); + for genre_id in ids.iter().cloned() { if let Some(genre) = genre_id_cache.get(&genre_id) { - genres.push(genre.name.clone()); + genre_vec.push(Genre { + id: genre_id, + name: genre.name.clone(), + }); } else if let Some(genre) = genre_list.genres.iter().find(|x| x.id == genre_id) { genre_id_cache.insert(genre_id, genre.clone()); - genres.push(genre.name.clone()); + genre_vec.push(Genre { + id: genre_id, + name: genre.name.clone(), + }); } } } @@ -321,31 +334,14 @@ impl TMDBMetadataProvider { ) .await?; - match media_type { - MediaSearchType::Movie => { - let movie_details = - serde_json::from_str::(&response_body).map_err(|err| { - crate::external::Error::DeserializationError { - body: response_body, - error: format!("{err}"), - } - })?; - - Ok(movie_details.into()) + let details = serde_json::from_str::(&response_body).map_err(|err| { + crate::external::Error::DeserializationError { + body: response_body, + error: format!("{err}"), } + })?; - MediaSearchType::Tv => { - let tv_details = - serde_json::from_str::(&response_body).map_err(|err| { - crate::external::Error::DeserializationError { - body: response_body, - error: format!("{err}"), - } - })?; - - Ok(tv_details.into()) - } - } + Ok(details.into()) } async fn cast( @@ -497,21 +493,48 @@ impl ExternalQuery for MetadataProviderOf where K: sealed::AssocMediaTypeConst + Send + Sync + 'static, { + #[instrument] async fn search(&self, title: &str, year: Option) -> QueryResult> { self.provider.search(title, year, K::MEDIA_TYPE).await } + #[instrument] async fn search_by_id(&self, external_id: &str) -> QueryResult { self.provider.search_by_id(external_id, K::MEDIA_TYPE).await } + #[instrument] async fn cast(&self, external_id: &str) -> QueryResult> { self.provider.cast(external_id, K::MEDIA_TYPE).await } } +impl IntoQueryShow for MetadataProviderOf +where + K: sealed::AssocMediaTypeConst + Send + Sync + 'static, +{ + default fn as_query_show<'a>(&'a self) -> Option<&'a dyn ExternalQueryShow> { + None + } + + default fn into_query_show(self: Arc) -> Option> { + None + } +} + +impl IntoQueryShow for MetadataProviderOf { + fn as_query_show(&self) -> Option<&dyn ExternalQueryShow> { + Some(self) + } + + fn into_query_show(self: Arc) -> Option> { + Some(self) + } +} + #[async_trait] impl ExternalQueryShow for MetadataProviderOf { + #[instrument] async fn seasons_for_id(&self, external_id: &str) -> QueryResult> { let mut seasons = self.provider.seasons_by_id(external_id).await?; seasons.sort_by(|a, b| a.season_number.cmp(&b.season_number)); @@ -519,6 +542,7 @@ impl ExternalQueryShow for MetadataProviderOf { Ok(seasons) } + #[instrument] async fn episodes_for_season( &self, external_id: &str, diff --git a/dim/src/external/tmdb/mod.rs b/dim/src/external/tmdb/mod.rs index 23831d953..788a4aef5 100644 --- a/dim/src/external/tmdb/mod.rs +++ b/dim/src/external/tmdb/mod.rs @@ -13,10 +13,7 @@ mod metadata_provider; mod raw_client; pub use metadata_provider::{MetadataProviderOf, Movies, TMDBMetadataProvider, TvShows}; -use raw_client::{ - Cast, Genre, GenreList, MovieDetails, SearchResponse, TMDBMediaObject, TvDetails, TvEpisodes, - TvSeasons, -}; +use raw_client::{Cast, Genre, GenreList, SearchResponse, TMDBMediaObject, TvEpisodes, TvSeasons}; #[derive(Debug, displaydoc::Display, Clone, thiserror::Error)] pub(self) enum TMDBClientRequestError { @@ -102,7 +99,7 @@ mod tests { posters: vec!["https://image.tmdb.org/t/p/w600_and_h900_bestv2/yvQGoc9GGTfOyPty5ASShT9tPBD.jpg".into()], backdrops: vec!["https://image.tmdb.org/t/p/original/wdHK7RZNIGfskbGCIusSKN3vto6.jpg".into()], genres: vec!["Comedy".into()], - rating: Some(8.3), + rating: Some(8.0), duration: None } } @@ -112,13 +109,15 @@ mod tests { let provider = TMDBMetadataProvider::new("38c372f5bc572c8aadde7a802638534e"); let provider_shows: MetadataProviderOf = provider.tv_shows(); - let metadata = provider_shows + let mut metadata = provider_shows .search("letterkenny", None) .await .expect("search results should exist"); let letterkenny = make_letterkenny(); + metadata[0].rating.as_mut().map(|x| *x = 8.0); + assert_eq!(metadata, vec![letterkenny]); } @@ -127,13 +126,15 @@ mod tests { let provider = TMDBMetadataProvider::new("38c372f5bc572c8aadde7a802638534e"); let provider_shows: MetadataProviderOf = provider.tv_shows(); - let media = provider_shows + let mut media = provider_shows .search_by_id("65798") .await .expect("search results should exist"); let letterkenny = make_letterkenny(); + media.rating.as_mut().map(|x| *x = 8.0); + assert_eq!(letterkenny, media); } @@ -147,16 +148,13 @@ mod tests { .await .expect("cast should exist"); - let expected = ExternalActor { - external_id: "30614".into(), - name: "Ryan Gosling".into(), - profile_path: Some( - "https://image.tmdb.org/t/p/original/lyUyVARQKhGxaxy0FbPJCQRpiaW.jpg".into(), - ), - character: "K".into(), - }; - - assert_eq!(cast[0], expected); + assert_eq!(cast[0].external_id, "30614".to_string()); + assert_eq!(cast[0].name, "Ryan Gosling".to_string()); + assert_eq!( + cast[0].profile_path, + Some("https://image.tmdb.org/t/p/original/lyUyVARQKhGxaxy0FbPJCQRpiaW.jpg".to_string()) + ); + assert!(matches!(cast[0].character.as_str(), "K" | "\'K\'")); } #[tokio::test] @@ -224,4 +222,21 @@ mod tests { assert_eq!(res.release_date.unwrap().year(), 2019); } + + #[tokio::test] + async fn johhny_test_seasons() { + let provider = TMDBMetadataProvider::new("38c372f5bc572c8aadde7a802638534e"); + let provider_shows: MetadataProviderOf = provider.tv_shows(); + + provider_shows + .seasons_for_id("1769") + .await + .expect("Failed to get seasons."); + } + + #[tokio::test] + async fn deserialize_letterkenny() { + let body = r#"{"id": 1234,"name": "letter kenny"}"#; + serde_json::from_str::(&body).unwrap(); + } } diff --git a/dim/src/external/tmdb/raw_client.rs b/dim/src/external/tmdb/raw_client.rs index 27e0a5743..3818fe7ff 100644 --- a/dim/src/external/tmdb/raw_client.rs +++ b/dim/src/external/tmdb/raw_client.rs @@ -1,3 +1,9 @@ +use futures::future::BoxFuture; +use futures::FutureExt; +use retry_block::async_retry; +use retry_block::delay::Fixed; +use retry_block::OperationResult; +use std::error::Error; use std::future::Future; use std::time::Duration; @@ -16,22 +22,19 @@ pub struct SearchResponse { pub results: Vec>, } -#[derive(Deserialize, Clone, Debug)] +#[derive(Deserialize, Clone, Debug, Default)] pub struct TMDBMediaObject { pub id: u64, - #[serde(rename(serialize = "title"))] - #[serde(alias = "title", alias = "name")] + #[serde(alias = "name")] pub title: String, - #[serde(rename(serialize = "release_date"))] - #[serde(alias = "first_air_date", alias = "release_date")] + #[serde(alias = "first_air_date")] pub release_date: Option, pub overview: Option, pub vote_average: Option, pub poster_path: Option, pub backdrop_path: Option, pub genre_ids: Option>, - #[serde(skip_deserializing)] - pub genres: Vec, + pub genres: Option>, pub runtime: Option, } @@ -57,7 +60,12 @@ impl From for ExternalMedia { .into_iter() .map(|x| format!("https://image.tmdb.org/t/p/original{x}")) .collect(), - genres: media.genres, + genres: media + .genres + .unwrap_or_default() + .into_iter() + .map(|genre| genre.name) + .collect(), rating: media.vote_average, duration: media.runtime.map(|n| Duration::from_secs(n)), } @@ -75,46 +83,6 @@ pub struct Genre { pub name: String, } -#[derive(Deserialize, Debug)] -pub struct MovieDetails { - #[serde(flatten)] - pub media_object: TMDBMediaObject, -} - -impl From for ExternalMedia { - fn from(details: MovieDetails) -> Self { - let MovieDetails { media_object } = details; - - media_object.into() - } -} - -#[derive(Deserialize, Debug)] -pub struct TvDetails { - #[serde(flatten)] - pub media_object: TMDBMediaObject, - pub genres: Option>, -} - -impl From for ExternalMedia { - fn from(details: TvDetails) -> Self { - let TvDetails { - media_object, - genres, - } = details; - - let mut media: ExternalMedia = media_object.into(); - - media.genres = genres - .unwrap_or_default() - .into_iter() - .map(|genre| genre.name) - .collect(); - - media - } -} - #[derive(Deserialize, Debug)] pub struct CastActor { pub id: u64, @@ -174,7 +142,7 @@ impl From for Vec { #[derive(Deserialize, Debug)] pub struct TvSeason { pub id: u64, - pub air_date: String, + pub air_date: Option, pub episode_count: u64, pub name: String, pub overview: Option, @@ -269,7 +237,7 @@ impl TMDBClient { &self, args: A, path: String, - ) -> impl Future> + ) -> BoxFuture> where A: IntoIterator, T: ToString, @@ -280,34 +248,52 @@ impl TMDBClient { .map(|(k, v)| (k.to_string(), v.to_string())) .collect(); - let request = self.provider.http_client.get(url).query(&args); + let provider = self.provider.http_client.clone(); async move { - let response = request - .send() - .await - .map_err(TMDBClientRequestError::reqwest)?; - - let status = response.status(); - - let body = response - .bytes() - .await - .map_err(TMDBClientRequestError::reqwest)?; - - let body = std::str::from_utf8(&body) - .map_err(|_| TMDBClientRequestError::InvalidUTF8Body) - .map(|st| st.to_string()); - - if status != reqwest::StatusCode::OK { - return Err(TMDBClientRequestError::NonOkResponse { - body: body.unwrap_or_default(), - status, - }); - } - - body + let result = async_retry!(Fixed::new(Duration::from_millis(50)).take(24), { + let request = provider.get(url.clone()).query(&args); + let response = match request + .send() + .await + .map_err(TMDBClientRequestError::reqwest) + { + Ok(x) => x, + Err(err) => return Err(err).into(), + }; + + let status = response.status(); + + let body = match response + .bytes() + .await + .map_err(TMDBClientRequestError::reqwest) + { + Ok(x) => x, + Err(err) => return Err(err).into(), + }; + + let body = std::str::from_utf8(&body) + .map_err(|_| TMDBClientRequestError::InvalidUTF8Body) + .map(|st| st.to_string()); + + if status != reqwest::StatusCode::OK { + return Err(TMDBClientRequestError::NonOkResponse { + body: body.unwrap_or_default(), + status, + }) + .into(); + } + + match body { + Ok(x) => OperationResult::Ok(x), + Err(err) => OperationResult::Err(err), + } + }); + + result } + .boxed() } pub async fn genre_list( diff --git a/dim/src/lib.rs b/dim/src/lib.rs index f46cfdb62..f4494585a 100644 --- a/dim/src/lib.rs +++ b/dim/src/lib.rs @@ -19,6 +19,8 @@ //! To test run `make test` in the root, or `cargo test` in the root of each module including the //! root dir. +#![feature(min_specialization, let_else)] + use std::fs::create_dir_all; use tracing_subscriber::fmt; use tracing_subscriber::layer::SubscriberExt; diff --git a/dim/src/scanner/error.rs b/dim/src/scanner/error.rs index 4c75db368..6ed4b5f54 100644 --- a/dim/src/scanner/error.rs +++ b/dim/src/scanner/error.rs @@ -4,5 +4,5 @@ use thiserror::Error; #[derive(Clone, Debug, Error, Display)] pub enum Error { /// Abc - Abc + Abc, } diff --git a/dim/src/scanner/mediafile.rs b/dim/src/scanner/mediafile.rs index b8217ab3c..dc9d9a4a5 100644 --- a/dim/src/scanner/mediafile.rs +++ b/dim/src/scanner/mediafile.rs @@ -12,8 +12,8 @@ use database::DatabaseError; use database::DbConnection; use displaydoc::Display; -use std::sync::Arc; use std::path::PathBuf; +use std::sync::Arc; use tokio::sync::Semaphore; use tokio::sync::SemaphorePermit; diff --git a/dim/src/scanner/mod.rs b/dim/src/scanner/mod.rs index 5228d803c..219f1cdd8 100644 --- a/dim/src/scanner/mod.rs +++ b/dim/src/scanner/mod.rs @@ -12,6 +12,7 @@ use self::mediafile::Error as CreatorError; use self::mediafile::MediafileCreator; use super::external::filename::FilenameMetadata; use super::external::filename::Metadata; +use super::external::filename::CombinedExtractor; use super::external::ExternalQuery; use crate::core::EventTx; @@ -96,6 +97,7 @@ pub fn parse_filenames( let metas = IntoIterator::into_iter([ TorrentMetadata::from_str(&filename), Anitomy::from_str(&filename), + CombinedExtractor::from_str(&filename), ]) .filter_map(|x| x) .collect::>(); @@ -131,10 +133,16 @@ pub async fn insert_mediafiles( dirs: Vec + Send + 'static>, ) -> Result, Error> { let now = Instant::now(); - let subfiles = tokio::task::spawn_blocking(|| get_subfiles(dirs.into_iter())).await.unwrap(); + let subfiles = tokio::task::spawn_blocking(|| get_subfiles(dirs.into_iter())) + .await + .unwrap(); let elapsed = now.elapsed(); - info!(elapsed_ms = elapsed.as_millis(), files = subfiles.len(), "Walked all target directories."); + info!( + elapsed_ms = elapsed.as_millis(), + files = subfiles.len(), + "Walked all target directories." + ); let parsed = parse_filenames(subfiles.iter()); @@ -167,7 +175,7 @@ pub async fn insert_mediafiles( let mut mediafiles = vec![]; - for chunk in insertables.chunks(128) { + for chunk in insertables.chunks(256) { mediafiles.append(&mut instance.insert_batch(chunk.iter()).await?); } @@ -199,6 +207,7 @@ pub async fn start_custom( let matcher = match media_type { MediaType::Movie => Arc::new(movie::MovieMatcher) as Arc, + MediaType::Tv => Arc::new(tv_show::TvMatcher) as Arc, _ => unimplemented!(), }; diff --git a/dim/src/scanner/movie.rs b/dim/src/scanner/movie.rs index e4fc5d782..6b16e49b1 100644 --- a/dim/src/scanner/movie.rs +++ b/dim/src/scanner/movie.rs @@ -55,7 +55,7 @@ pub enum Error { GetOrInsertMedia(database::DatabaseError), } -fn asset_from_url(url: &str) -> Option { +pub fn asset_from_url(url: &str) -> Option { let url = Url::parse(url).ok()?; let filename = uuid::Uuid::new_v4().to_hyphenated().to_string(); let local_path = format_path(Some(format!("{filename}.jpg"))); @@ -128,7 +128,10 @@ impl MovieMatcher { backdrop: backdrop_ids.first().map(|x| x.id), }; - let media_id = media.lazy_insert(tx).await.map_err(Error::GetOrInsertMedia)?; + let media_id = media + .lazy_insert(tx) + .await + .map_err(Error::GetOrInsertMedia)?; // Link all backdrops and posters to our media. for poster in poster_ids { @@ -181,24 +184,24 @@ impl MovieMatcher { } .update(tx, file.id) .await - .inspect_err(|error| { - error!(?error, "Failed to update mediafile to point to new parent.") - }) + .inspect_err(|error| error!(?error, "Failed to update mediafile to point to new parent.")) .map_err(Error::UpdateMediafile)?; // Sometimes we rematch against a media object that already exists but we are the last // child for the parent. When this happens we want to cleanup. match file.media_id { Some(old_id) => { - let count = Movie::count_children(tx, old_id).await.inspect_err( - |error| error!(?error, %old_id, "Failed to grab children count."), - ) + let count = Movie::count_children(tx, old_id) + .await + .inspect_err(|error| error!(?error, %old_id, "Failed to grab children count.")) .map_err(Error::ChildrenCount)?; if count == 0 { - Media::delete(tx, old_id).await.inspect_err( - |error| error!(?error, %old_id, "Failed to cleanup child-less parent."), - ) + Media::delete(tx, old_id) + .await + .inspect_err( + |error| error!(?error, %old_id, "Failed to cleanup child-less parent."), + ) .map_err(Error::ChildCleanup)?; } } diff --git a/dim/src/scanner/tv_show.rs b/dim/src/scanner/tv_show.rs index f85c6d530..f1ae6850e 100644 --- a/dim/src/scanner/tv_show.rs +++ b/dim/src/scanner/tv_show.rs @@ -1,12 +1,23 @@ #![allow(unstable_name_collisions)] +use super::movie::asset_from_url; +use super::MediaMatcher; +use super::Metadata; +use super::WorkUnit; use crate::external::ExternalEpisode; use crate::external::ExternalMedia; +use crate::external::ExternalQuery; +use crate::external::ExternalQueryShow; use crate::external::ExternalSeason; use crate::inspect::ResultExt; +use async_trait::async_trait; + use database::episode::Episode; use database::episode::InsertableEpisode; +use database::genre::Genre; +use database::genre::InsertableGenre; +use database::genre::InsertableGenreMedia; use database::library::MediaType; use database::media::InsertableMedia; use database::media::Media; @@ -21,7 +32,11 @@ use database::Transaction; use chrono::prelude::Utc; use chrono::Datelike; +use std::sync::Arc; +use tracing::debug; use tracing::error; +use tracing::info; +use tracing::instrument; use displaydoc::Display; use thiserror::Error; @@ -70,6 +85,42 @@ impl TvMatcher { // TODO: insert poster and backdrops. let (emedia, eseason, eepisode) = result; + let posters = emedia + .posters + .iter() + .filter_map(|x| asset_from_url(x)) + .collect::>(); + + let mut poster_ids = vec![]; + + for poster in posters { + let asset = poster + .insert(&mut *tx) + .await + .inspect_err(|error| error!(?error, "Failed to insert asset into db.")) + .map_err(Error::PosterInsert)?; + + poster_ids.push(asset); + } + + let backdrops = emedia + .backdrops + .iter() + .filter_map(|x| asset_from_url(x)) + .collect::>(); + + let mut backdrop_ids = vec![]; + + for backdrop in backdrops { + let asset = backdrop + .insert(&mut *tx) + .await + .inspect_err(|error| error!(?error, "Failed to insert asset into db.")) + .map_err(Error::BackdropInsert)?; + + backdrop_ids.push(asset); + } + let media = InsertableMedia { media_type: MediaType::Tv, library_id: file.library_id, @@ -78,8 +129,8 @@ impl TvMatcher { rating: emedia.rating, year: emedia.release_date.map(|x| x.year() as _), added: Utc::now().to_string(), - poster: None, - backdrop: None, + poster: poster_ids.first().map(|x| x.id), + backdrop: backdrop_ids.first().map(|x| x.id), }; let parent_id = media @@ -88,7 +139,28 @@ impl TvMatcher { .inspect_err(|error| error!(?error, ?file, "Failed to lazy insert tv show")) .map_err(Error::GetOrInsertMedia)?; - // TODO: Decouple then re-attach genres for current tv show. + // NOTE: We want to decouple this media from all genres and essentially rebuild the list. + // Its a lot simpler than doing a diff-update but it might be more expensive. + Genre::decouple_all(tx, parent_id) + .await + .inspect_err(|error| error!(?error, "Failed to decouple genres from media.")) + .map_err(Error::GenreDecouple)?; + + for name in emedia.genres { + let genre = InsertableGenre { name } + .insert(tx) + .await + .inspect_err(|error| error!(?error, "Failed to create or get genre.")) + .map_err(Error::GetOrInsertGenre)?; + + // TODO: Recouple genres always otherwise rematching would get buggy genre lists + InsertableGenreMedia::insert_pair(genre, parent_id, tx) + .await + .inspect_err( + |error| error!(?error, %parent_id, "Failed to attach genre to media object."), + ) + .map_err(Error::CoupleGenre)?; + } let seasonid = self.match_to_season(tx, parent_id, eseason).await?; let episodeid = self @@ -100,9 +172,12 @@ impl TvMatcher { // want to erase their existance. match file.media_id { Some(x) if x != episodeid => { - let season_id = Episode::get_seasonid(tx, x).await.inspect_err( - |error| error!(?error, id = %x, "Failed to get seasonid for episode"), - ).map_err(Error::GetSeasonId)?; + let season_id = Episode::get_seasonid(tx, x) + .await + .inspect_err( + |error| error!(?error, id = %x, "Failed to get seasonid for episode"), + ) + .map_err(Error::GetSeasonId)?; let tvshow_id = Season::get_tvshowid(tx, season_id).await.inspect_err( |error| error!(?error, id = %x, "Failed to get tvshowid for season/episode."), @@ -113,19 +188,28 @@ impl TvMatcher { ).map_err(Error::ChildrenCount)?; if count == 0 { - Media::delete(tx, x).await.inspect_err( - |error| error!(?error, id = %x, "Failed to delete child-less episode"), - ).map_err(Error::ChildCleanup)?; + Media::delete(tx, x) + .await + .inspect_err( + |error| error!(?error, id = %x, "Failed to delete child-less episode"), + ) + .map_err(Error::ChildCleanup)?; } - let count = Season::count_children(tx, season_id).await.inspect_err( - |error| error!(?error, id = %x, "Failed to get children count for season"), - ).map_err(Error::ChildrenCount)?; + let count = Season::count_children(tx, season_id) + .await + .inspect_err( + |error| error!(?error, id = %x, "Failed to get children count for season"), + ) + .map_err(Error::ChildrenCount)?; if count == 0 { - Season::delete_by_id(tx, season_id).await.inspect_err( - |error| error!(?error, id = %x, "Failed to delete child-less season"), - ).map_err(Error::ChildCleanup)?; + Season::delete_by_id(tx, season_id) + .await + .inspect_err( + |error| error!(?error, id = %x, "Failed to delete child-less season"), + ) + .map_err(Error::ChildCleanup)?; } let count = TVShow::count_children(tx, tvshow_id).await.inspect_err( @@ -133,9 +217,12 @@ impl TvMatcher { ).map_err(Error::ChildrenCount)?; if count == 0 { - Media::delete(tx, tvshow_id).await.inspect_err( - |error| error!(?error, id = %x, "Failed to delete child-less tv show"), - ).map_err(Error::ChildCleanup)?; + Media::delete(tx, tvshow_id) + .await + .inspect_err( + |error| error!(?error, id = %x, "Failed to delete child-less tv show"), + ) + .map_err(Error::ChildCleanup)?; } } _ => {} @@ -152,11 +239,28 @@ impl TvMatcher { parent_id: i64, result: ExternalSeason, ) -> Result> { - // TODO: Fetch poster. + let posters = result + .posters + .iter() + .filter_map(|x| asset_from_url(x)) + .collect::>(); + + let mut poster_ids = vec![]; + + for poster in posters { + let asset = poster + .insert(&mut *tx) + .await + .inspect_err(|error| error!(?error, "Failed to insert asset into db.")) + .map_err(Error::PosterInsert)?; + + poster_ids.push(asset); + } + let season = InsertableSeason { season_number: result.season_number as _, added: Utc::now().to_string(), - poster: None, + poster: poster_ids.first().map(|x| x.id), }; let season_id = season @@ -175,13 +279,31 @@ impl TvMatcher { seasonid: i64, result: ExternalEpisode, ) -> Result> { - // NOTE: Add backdrops + let stills = result + .stills + .iter() + .filter_map(|x| asset_from_url(x)) + .collect::>(); + + let mut still_ids = vec![]; + + for still in stills { + let asset = still + .insert(&mut *tx) + .await + .inspect_err(|error| error!(?error, "Failed to insert asset into db.")) + .map_err(Error::PosterInsert)?; + + still_ids.push(asset); + } + let media = InsertableMedia { library_id: file.library_id, name: result.title_or_episode(), added: Utc::now().to_string(), media_type: MediaType::Episode, description: result.description.clone(), + backdrop: still_ids.first().map(|x| x.id), ..Default::default() }; @@ -219,6 +341,98 @@ impl TvMatcher { Ok(episode_id) } + + #[instrument(skip(provider, metadata))] + async fn lookup_metadata( + provider: Arc, + file: MediaFile, + metadata: Vec, + ) -> Option<(MediaFile, (ExternalMedia, ExternalSeason, ExternalEpisode))> { + for meta in metadata { + match provider + .search(meta.name.as_ref(), meta.year.map(|x| x as _)) + .await + { + Ok(provided) => { + let first = if let Some(x) = provided.first() { + x.clone() + } else { + continue; + }; + + let Ok(seasons) = provider.seasons_for_id(&first.external_id).await else { + info!(?meta, "Failed to find season match with the current metadata set."); + continue; + }; + + // FIXME: If a file doesnt have season metadata, we want to default to + // marking this file as an extra and put it in season 0 + let Some(season) = seasons + .into_iter() + .find(|x| x.season_number as i64 == meta.season.unwrap_or(0)) else { + info!(?meta, "Provider didnt return our desired season with current metadata."); + continue; + }; + + let Ok(episodes) = provider + .episodes_for_season(&first.external_id, meta.season.unwrap_or(0) as _) + .await else { + // FIXME: We might want to propagate this error. + info!(?meta, "Failed to fetch episodes with current metadata set."); + continue; + }; + + let Some(episode) = episodes + .into_iter() + .find(|x| x.episode_number as i64 == meta.episode.unwrap_or(0)) else { + info!( + ?meta, + "Provider didnt return our desired episode with current metadata." + ); + continue; + }; + + return Some((file, (first, season, episode))); + } + Err(e) => error!(?meta, error = ?e, "Failed to find a movie match."), + } + } + + None + } +} + +#[async_trait] +impl MediaMatcher for TvMatcher { + async fn batch_match( + &self, + tx: &mut Transaction<'_>, + provider: Arc, + work: Vec, + ) { + let provider_show: Arc = provider + .into_query_show() + .expect("Scanner needs a show provider"); + + let metadata_futs = work + .into_iter() + .map(|WorkUnit(file, metadata)| { + let provider_show = Arc::clone(&provider_show); + tokio::spawn(Self::lookup_metadata(provider_show, file, metadata)) + }) + .collect::>(); + + let metadata = futures::future::join_all(metadata_futs).await; + + // FIXME: Propagate errors. + for meta in metadata.into_iter() { + if let Ok(Some((file, provided))) = meta { + self.match_to_result(tx, file, provided) + .await + .inspect_err(|error| error!(?error, "failed to match to result")); + } + } + } } #[cfg(test)]