Skip to content

Commit

Permalink
scanner: refactor most of the movies scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
vgarleanu committed Jun 26, 2022
1 parent 54b3a18 commit dc2af4a
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 21 deletions.
49 changes: 49 additions & 0 deletions dim/src/external/filename.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use anitomy::Anitomy;
use anitomy::ElementCategory;
use torrent_name_parser::Metadata as TorrentMetadata;

#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct Metadata {
pub name: String,
pub year: Option<i64>,
pub season: Option<i64>,
pub episode: Option<i64>,
}

pub trait FilenameMetadata {
fn from_str(s: &str) -> Option<Metadata>;
}

impl FilenameMetadata for TorrentMetadata {
fn from_str(s: &str) -> Option<Metadata> {
let metadata = TorrentMetadata::from(s).ok()?;

Some(Metadata {
name: metadata.title().to_owned(),
year: metadata.year().map(|x| x as i64),
season: metadata.season().map(|x| x as i64),
episode: metadata.episode().map(|x| x as i64),
})
}
}

impl FilenameMetadata for Anitomy {
fn from_str(s: &str) -> Option<Metadata> {
let metadata = match Anitomy::new().parse(s) {
Ok(v) | Err(v) => v,
};

Some(Metadata {
name: metadata.get(ElementCategory::AnimeTitle)?.to_string(),
year: metadata
.get(ElementCategory::AnimeYear)
.and_then(|x| x.parse().ok()),
season: metadata
.get(ElementCategory::AnimeSeason)
.and_then(|x| x.parse().ok()),
episode: metadata
.get(ElementCategory::EpisodeNumber)
.and_then(|x| x.parse().ok()),
})
}
}
13 changes: 13 additions & 0 deletions dim/src/inspect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
pub trait ResultExt<T, E> {
fn inspect_err(self, f: impl FnOnce(&E)) -> Result<T, E>;
}

impl<T, E> ResultExt<T, E> for Result<T, E> {
fn inspect_err(self, f: impl FnOnce(&E)) -> Result<T, E> {
if let Err(ref e) = self {
f(e);
}

self
}
}
2 changes: 2 additions & 0 deletions dim/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub mod errors;
pub mod external;
/// Contains the code for fetching assets like posters and stills.
pub mod fetcher;
/// Inspect api for Result type
pub mod inspect;
/// Contains our custom logger for rocket
pub mod logger;
/// Contains all of the routes exposed by the webapi.
Expand Down
11 changes: 5 additions & 6 deletions dim/src/scanner/mediafile.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Module contains all the code that creates and inserts basic mediafiles into the database.

use crate::external::filename::Metadata;
use crate::streaming::ffprobe::FFProbeCtx;
use crate::streaming::FFPROBE_BIN;

Expand All @@ -13,8 +14,6 @@ use displaydoc::Display;

use std::path::PathBuf;

use torrent_name_parser::Metadata;

use tokio::sync::Semaphore;
use tokio::sync::SemaphorePermit;

Expand Down Expand Up @@ -150,10 +149,10 @@ impl MediafileCreator {
media_id: None,
target_file,

raw_name: metadata.title().to_owned(),
raw_year: metadata.year().map(|x| x as i64),
season: metadata.season().map(|x| x as i64),
episode: metadata.episode().map(|x| x as i64),
raw_name: metadata.name,
raw_year: metadata.year,
season: metadata.season,
episode: metadata.episode,

quality: video_metadata.get_height().map(|x| x.to_string()),
codec: video_metadata.get_video_codec(),
Expand Down
46 changes: 37 additions & 9 deletions dim/src/scanner/mod.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
//! Module contains all the code for the new generation media scanner.

mod mediafile;
mod movie;
#[cfg(test)]
mod tests;

use anitomy::Anitomy;
use async_trait::async_trait;

use database::mediafile::MediaFile;

use super::external::filename::FilenameMetadata;
use super::external::filename::Metadata;
use super::external::ExternalQuery;

use std::ffi::OsStr;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;

use walkdir::WalkDir;

use torrent_name_parser::Metadata;
use torrent_name_parser::Metadata as TorrentMetadata;
use tracing::warn;
use walkdir::WalkDir;

pub(super) static SUPPORTED_EXTS: &[&str] = &["mp4", "mkv", "avi", "webm"];

Expand Down Expand Up @@ -46,7 +56,9 @@ pub fn get_subfiles(paths: impl Iterator<Item = impl AsRef<Path>>) -> Vec<PathBu
files
}

pub fn parse_filenames(files: impl Iterator<Item = impl AsRef<Path>>) -> Vec<(PathBuf, Metadata)> {
pub fn parse_filenames(
files: impl Iterator<Item = impl AsRef<Path>>,
) -> Vec<(PathBuf, Vec<Metadata>)> {
let mut metadata = Vec::new();

for file in files {
Expand All @@ -58,13 +70,29 @@ pub fn parse_filenames(files: impl Iterator<Item = impl AsRef<Path>>) -> Vec<(Pa
}
};

match Metadata::from(&filename) {
Ok(meta) => metadata.push((file.as_ref().into(), meta)),
Err(error) => {
warn!(file = ?file.as_ref(), ?error, "Failed to parse the filename and extract metadata.")
}
let metas = IntoIterator::into_iter([
TorrentMetadata::from_str(&filename),
Anitomy::from_str(&filename),
])
.filter_map(|x| x)
.collect::<Vec<_>>();

if metas.is_empty() {
warn!(file = ?file.as_ref(), "Failed to parse the filename and extract metadata.");
continue;
}

metadata.push((file.as_ref().into(), metas));
}

metadata
}

pub struct WorkUnit(pub MediaFile, pub Vec<Metadata>);

/// Trait that must be implemented by a media matcher. Matchers are responsible for fetching their
/// own external metadata but it is provided a metadata provider at initialization time.
#[async_trait]
pub trait MediaMatcher {
async fn batch_match(self: Arc<Self>, provider: Arc<dyn ExternalQuery>, work: Vec<WorkUnit>);
}
114 changes: 114 additions & 0 deletions dim/src/scanner/movie.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
use crate::external::ExternalMedia;
use crate::external::ExternalQuery;
use crate::inspect::ResultExt;

use super::MediaMatcher;
use super::WorkUnit;

use async_trait::async_trait;
use chrono::prelude::Utc;
use chrono::Datelike;

use database::library::MediaType;
use database::media::InsertableMedia;
use database::media::Media;
use database::mediafile::UpdateMediaFile;
use database::mediafile::MediaFile;
use database::Transaction;

use std::sync::Arc;
use tracing::error;

pub struct MovieMatcher;

impl MovieMatcher {
/// Method will match a mediafile to a new media. Caller must ensure that the mediafile
/// supplied is not coupled to a media object. If it is coupled we will assume that we can
/// replace the metadata supplied to it.
async fn match_to_result(
&self,
tx: &mut Transaction<'_>,
file: MediaFile,
provided: ExternalMedia,
) -> Result<(), Box<dyn std::error::Error>> {
// TODO: Push posters and backdrops to download queue.

let media = InsertableMedia {
media_type: MediaType::Movie,
library_id: file.library_id,
name: provided.title,
description: provided.description,
rating: provided.rating,
year: provided.release_date.map(|x| x.year() as _),
added: Utc::now().to_string(),
poster: None,
backdrop: None,
};

// NOTE: If the mediafile is coupled to a media we assume that we want to reuse the media
// object, but replace its metadata in-place. This is useful when rematching a media.
let media_id = if file.media_id.is_some() {
media.insert(tx, file.media_id).await.inspect_err(|error| {
error!(
?error,
?file.media_id,
"Failed to assign mediafile to media."
)
})?
} else {
// Maybe a media object that can be linked against this file already exists and we want
// to bind to it?
match Media::get_id_by_name(tx, &media.name)
.await
.inspect_err(|error| error!(?error, %media.name, "Failed to get a media by name"))?
{
Some(id) => id,
None => media
.insert(tx, None)
.await
.inspect_err(|error| error!(?error, "Failed to insert media object."))?,
}
};

// NOTE: Previous scanner had a `InsertableMovie::insert`. Honestly no clue if we need
// that, or if that is a remnant from the openflix days.

// Update mediafile to point to a new parent media_id. We also want to set raw_name and
// raw_year to what its parent has so that when we refresh metadata, files that were
// matched manually (due to bogus filenames) dont get unmatched, or matched wrongly.
UpdateMediaFile {
media_id: Some(media_id),
raw_name: Some(media.name),
raw_year: media.year,
..Default::default()
}
.update(tx, file.id)
.await
.inspect_err(|error| {
error!(?error, "Failed to update mediafile to point to new parent.")
})?;

Ok(())
}
}

#[async_trait]
impl MediaMatcher for MovieMatcher {
async fn batch_match(self: Arc<Self>, provider: Arc<dyn ExternalQuery>, work: Vec<WorkUnit>) {
let metadata_futs = work
.into_iter()
.map(|WorkUnit(file, metadata)| async {
for meta in metadata {
match provider.search(meta.name.as_ref(), meta.year.map(|x| x as _)).await {
Ok(provided) => return Some((file, provided)),
Err(e) => error!(?meta, "Failed to find a movie match."),
}
}

None
})
.collect::<Vec<_>>();

let metadata = futures::future::join_all(metadata_futs).await;
}
}
14 changes: 8 additions & 6 deletions dim/src/scanner/tests/mediafile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use super::super::parse_filenames;

use database::library::InsertableLibrary;
use database::library::MediaType;
use database::mediafile::MediaFile;
use database::mediafile::InsertableMediaFile;
use database::mediafile::MediaFile;

use itertools::Itertools;

Expand Down Expand Up @@ -58,7 +58,7 @@ async fn test_construct_mediafile() {
let mut insertable_futures =
parsed
.into_iter()
.map(|(path, meta)| instance.construct_mediafile(path, meta).boxed())
.map(|(path, meta)| instance.construct_mediafile(path, meta[0].clone()).boxed())
.chunks(5)
.into_iter()
.map(|chunk| chunk.collect())
Expand Down Expand Up @@ -93,7 +93,6 @@ async fn test_construct_mediafile() {
// We should have inserted all the files as they dont exist in the database.
assert_eq!(mediafiles.len(), files.len());


// All the files in `insertables` should already exist in the database, thus this should return
// `0`.
for chunk in insertables.chunks(128) {
Expand All @@ -107,7 +106,9 @@ async fn test_construct_mediafile() {

// At this point we should have 512 files in the database.
let mut tx = conn.read().begin().await.unwrap();
let files_in_db = MediaFile::get_by_lib_null_media(&mut tx, library).await.expect("Failed to get mediafiles.");
let files_in_db = MediaFile::get_by_lib_null_media(&mut tx, library)
.await
.expect("Failed to get mediafiles.");
assert_eq!(files_in_db.len(), files.len());
}

Expand All @@ -132,7 +133,7 @@ async fn test_multiple_instances() {
let mut insertable_futures =
parsed
.into_iter()
.map(|(path, meta)| instance.construct_mediafile(path, meta).boxed())
.map(|(path, meta)| instance.construct_mediafile(path, meta[0].clone()).boxed())
.chunks(5)
.into_iter()
.map(|chunk| chunk.collect())
Expand Down Expand Up @@ -169,7 +170,8 @@ async fn test_multiple_instances() {
let addr = addr.clone();
insert_futures.push(async move {
let chunk_len = chunk.len();
let result = addr.send(InsertBatch(chunk.into_iter().cloned().collect()))
let result = addr
.send(InsertBatch(chunk.into_iter().cloned().collect()))
.await
.expect("Addr got dropped")
.expect("Failed to insert batch");
Expand Down

0 comments on commit dc2af4a

Please sign in to comment.