Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Prepare for central MovieScrapeJob class #1539

Merged
merged 2 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ third_party/packaging_win

# Deployment
create-dmg/
tmp/

# OS specific
.directory
Expand Down
22 changes: 22 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
# Changelog

## 2.10.2 - tbd

### Notes

- …

### Fixed

- …

### Changed

- …

### Added

- …

### Removed

- …

## 2.10.0 - Benzar (2023-02-05)

### Notes
Expand Down
14 changes: 14 additions & 0 deletions MediaElch.pro
Original file line number Diff line number Diff line change
Expand Up @@ -258,25 +258,32 @@ SOURCES += src/main.cpp \
src/scrapers/imdb/ImdbReferencePage.cpp \
src/scrapers/movie/adultdvdempire/AdultDvdEmpire.cpp \
src/scrapers/movie/adultdvdempire/AdultDvdEmpireApi.cpp \
src/scrapers/movie/adultdvdempire/AdultDvdEmpireScrapeJob.cpp \
src/scrapers/movie/adultdvdempire/AdultDvdEmpireSearchJob.cpp \
src/scrapers/movie/aebn/AEBN.cpp \
src/scrapers/movie/aebn/AebnApi.cpp \
src/scrapers/movie/aebn/AebnScrapeJob.cpp \
src/scrapers/movie/aebn/AebnSearchJob.cpp \
src/scrapers/movie/custom/CustomMovieScraper.cpp \
src/scrapers/movie/hotmovies/HotMovies.cpp \
src/scrapers/movie/hotmovies/HotMoviesApi.cpp \
src/scrapers/movie/hotmovies/HotMoviesScrapeJob.cpp \
src/scrapers/movie/hotmovies/HotMoviesSearchJob.cpp \
src/scrapers/movie/imdb/ImdbMovie.cpp \
src/scrapers/movie/imdb/ImdbMovieScraper.cpp \
src/scrapers/movie/imdb/ImdbMovieScrapeJob.cpp \
src/scrapers/movie/imdb/ImdbMovieSearchJob.cpp \
src/scrapers/movie/MovieIdentifier.cpp \
src/scrapers/movie/MovieScraper.cpp \
src/scrapers/movie/MovieSearchJob.cpp \
src/scrapers/movie/MovieScrapeJob.cpp \
src/scrapers/movie/tmdb/TmdbMovie.cpp \
src/scrapers/movie/tmdb/TmdbMovieSearchJob.cpp \
src/scrapers/movie/tmdb/TmdbMovieScrapeJob.cpp \
src/scrapers/movie/videobuster/VideoBuster.cpp \
src/scrapers/movie/videobuster/VideoBusterApi.cpp \
src/scrapers/movie/videobuster/VideoBusterSearchJob.cpp \
src/scrapers/movie/videobuster/VideoBusterScrapeJob.cpp \
src/scrapers/music/AllMusic.cpp \
src/scrapers/music/Discogs.cpp \
src/scrapers/music/MusicBrainz.cpp \
Expand Down Expand Up @@ -609,25 +616,32 @@ HEADERS += Version.h \
src/scrapers/imdb/ImdbReferencePage.h \
src/scrapers/movie/adultdvdempire/AdultDvdEmpire.h \
src/scrapers/movie/adultdvdempire/AdultDvdEmpireApi.h \
src/scrapers/movie/adultdvdempire/AdultDvdEmpireScrapeJob.h \
src/scrapers/movie/adultdvdempire/AdultDvdEmpireSearchJob.h \
src/scrapers/movie/aebn/AEBN.h \
src/scrapers/movie/aebn/AebnApi.h \
src/scrapers/movie/aebn/AebnSearchJob.h \
src/scrapers/movie/aebn/AebnScrapeJob.h \
src/scrapers/movie/custom/CustomMovieScraper.h \
src/scrapers/movie/hotmovies/HotMovies.h \
src/scrapers/movie/hotmovies/HotMoviesApi.h \
src/scrapers/movie/hotmovies/HotMoviesSearchJob.h \
src/scrapers/movie/hotmovies/HotMoviesScrapeJob.h \
src/scrapers/movie/imdb/ImdbMovie.h \
src/scrapers/movie/imdb/ImdbMovieScraper.h \
src/scrapers/movie/imdb/ImdbMovieSearchJob.h \
src/scrapers/movie/imdb/ImdbMovieScrapeJob.h \
src/scrapers/movie/MovieIdentifier.h \
src/scrapers/movie/MovieScraper.h \
src/scrapers/movie/MovieSearchJob.h \
src/scrapers/movie/MovieScrapeJob.h \
src/scrapers/movie/tmdb/TmdbMovie.h \
src/scrapers/movie/tmdb/TmdbMovieSearchJob.h \
src/scrapers/movie/tmdb/TmdbMovieScrapeJob.h \
src/scrapers/movie/videobuster/VideoBuster.h \
src/scrapers/movie/videobuster/VideoBusterApi.h \
src/scrapers/movie/videobuster/VideoBusterSearchJob.h \
src/scrapers/movie/videobuster/VideoBusterScrapeJob.h \
src/scrapers/music/AllMusic.h \
src/scrapers/music/Discogs.h \
src/scrapers/music/MusicBrainz.h \
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ static void loadStylesheet(QApplication& app, const QString& theme, const QStrin
{
const QStringList availableStyles = QStyleFactory::keys();
QString filename;
qDebug() << "Using theme:" << theme;
qCDebug(generic) << "Using theme:" << theme;

#ifdef Q_OS_MAC
QString mainWindowTheme;
Expand Down
1 change: 0 additions & 1 deletion src/renamer/MovieRenamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ MovieRenamer::RenameError MovieRenamer::renameMovie(Movie& movie)
QStringList newSubFiles;
bool hasCurrentNewName = false;
for (const QString& subFile : subtitle->files()) {
qDebug() << subFile;
QFileInfo subFi(fi.canonicalPath() + "/" + subFile);
QString newSubFileName = subFileName + "." + subFi.suffix();
if (subFile != newSubFileName) {
Expand Down
1 change: 1 addition & 0 deletions src/scrapers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ add_library(
concert/tmdb/TmdbConcertSearchJob.cpp
movie/MovieIdentifier.cpp
movie/MovieScraper.cpp
movie/MovieScrapeJob.cpp
movie/MovieSearchJob.cpp
music/AllMusic.cpp
music/Discogs.cpp
Expand Down
32 changes: 32 additions & 0 deletions src/scrapers/movie/MovieScrapeJob.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include "scrapers/movie/MovieScrapeJob.h"

#include "data/movie/Movie.h"

namespace mediaelch {
namespace scraper {

MovieScrapeJob::MovieScrapeJob(MovieScrapeJob::Config config, QObject* parent) :
worker::Job(parent), m_movie{new Movie({}, this)}, m_config{std::move(config)}
{
// Wrapper to avoid static_assert calls.
connect(this, &Job::finished, this, [this]() { emit loadFinished(this, QPrivateSignal{}); });

// TODO: Change to true / remove once all usages of MovieScrapeJob are updated.
setAutoDelete(false);
}

const ScraperError& MovieScrapeJob::scraperError() const
{
return m_scraperError;
}

void MovieScrapeJob::setScraperError(ScraperError error)
{
m_scraperError = std::move(error);
setError(static_cast<int>(m_scraperError.error));
setErrorString(m_scraperError.message);
setErrorText(m_scraperError.technical);
}

} // namespace scraper
} // namespace mediaelch
66 changes: 66 additions & 0 deletions src/scrapers/movie/MovieScrapeJob.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#pragma once

#include "data/Locale.h"
#include "scrapers/ScraperError.h"
#include "scrapers/ScraperInfos.h"
#include "scrapers/movie/MovieIdentifier.h"
#include "utils/Meta.h"
#include "workers/Job.h"

#include <QObject>

class Movie;

namespace mediaelch {
namespace scraper {

/// \todo Currently not used properly; only used as a base for future changes.
class MovieScrapeJob : public worker::Job
{
Q_OBJECT

public:
/// \brief Configuration object for a movie scrape job.
struct Config
{
/// \brief A string that can be consumed by the movie scraper.
/// \details It is used to uniquely identify the movie. May be an IMDb ID in
/// string representation or an URL.
MovieIdentifier identifier;
/// \brief Language key for the scraper, e.g. "en-US", "de-DE", ...
Locale locale = Locale::English;
/// \brief movie details to be loaded using the scraper.
QSet<MovieScraperInfo> details;
};

public:
MovieScrapeJob(Config config, QObject* parent = nullptr);
~MovieScrapeJob() override = default;

public:
ELCH_NODISCARD Movie& movie() { return *m_movie; }
ELCH_NODISCARD const Movie& movie() const { return *m_movie; }

ELCH_NODISCARD const Config& config() const { return m_config; }
ELCH_NODISCARD const ScraperError& scraperError() const;

signals:
/// \brief Signal emitted when the scrape job has finished.
/// \details A simple wrapper around finished() to avoid static_asserts
/// from Job* to ShowSearchJob*.
/// Use hasError() and movie() to know whether the request was successful.
void loadFinished(mediaelch::scraper::MovieScrapeJob* scrapeJob, QPrivateSignal);

protected:
void setScraperError(ScraperError error);

protected:
Movie* m_movie = nullptr;

private:
const Config m_config;
ScraperError m_scraperError;
};

} // namespace scraper
} // namespace mediaelch
156 changes: 2 additions & 154 deletions src/scrapers/movie/adultdvdempire/AdultDvdEmpire.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
namespace mediaelch {
namespace scraper {

AdultDvdEmpire::AdultDvdEmpire(QObject* parent) : MovieScraper(parent)
AdultDvdEmpire::AdultDvdEmpire(QObject* parent) : MovieScraper(parent), m_scrapeJob(m_api, {}, nullptr)
{
m_meta.identifier = ID;
m_meta.name = "Adult DVD Empire";
Expand Down Expand Up @@ -92,159 +92,7 @@ void AdultDvdEmpire::loadData(QHash<MovieScraper*, mediaelch::scraper::MovieIden

void AdultDvdEmpire::parseAndAssignInfos(QString html, Movie* movie, QSet<MovieScraperInfo> infos)
{
using namespace std::chrono;

QTextDocument doc;
QRegularExpression rx;
rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption);
QRegularExpressionMatch match;

rx.setPattern("<h1>(.*)</h1>");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Title) && match.hasMatch()) {
doc.setHtml(match.captured(1).trimmed());
movie->setName(doc.toPlainText());
}

rx.setPattern("<small>Length: </small> ([0-9]*) hrs. ([0-9]*) mins.[\\s\\n]*</li>");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Runtime) && match.hasMatch()) {
minutes runtime = hours(match.captured(1).toInt()) + minutes(match.captured(2).toInt());
movie->setRuntime(runtime);
}

if (infos.contains(MovieScraperInfo::Released)) {
rx.setPattern("<li><small>Production Year:</small> ([0-9]{4})[\\s\\n]*</li>");
match = rx.match(html);
if (match.hasMatch()) {
movie->setReleased(QDate::fromString(match.captured(1), "yyyy"));
} else {
rx.setPattern(R"re(<li><small>Released:</small>\s+([A-Za-z]+) (\d{2} \d{4})[\s\n\r]*</li>)re");
match = rx.match(html);
if (match.hasMatch()) {
const QString dateStr = match.captured(2);
// Note: We can't use MMM because Qt < 6 is locale aware.
QDate date = QDate::fromString(dateStr, "dd yyyy");
const int month = helper::monthNameToInt(match.captured(1));
date.setDate(date.year(), month, date.day());
movie->setReleased(date);
}
}
}

rx.setPattern("<li><small>Studio: </small><a href=\"[^\"]*\"[\\s\\n]*Category=\"Item Page\"[\\s\\n]*Label=\"Studio "
"- Details\">(.*)[\\s\\n]*</a>");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Studios) && match.hasMatch()) {
doc.setHtml(match.captured(1));
movie->addStudio(doc.toPlainText().trimmed());
}

if (infos.contains(MovieScraperInfo::Actors)) {
// clear actors
movie->setActors({});

QTextDocument text;

// The Regex is "a bit" more complex because ADE has two HTML styles:
// One with images and one without. The second Regex line has an OR for this.
rx.setPattern(
R"re(<a href="(?:\/[a-zA-Z-]+)?\/\d+\/[^"]+"\r?\n\s+style="[^"]+"\r?\n\s+Category="Item Page" Label="Performer">)re"
R"re((?:(?:<div class="[^"]+"><u>([^<]+)</u>(?:<div[^>]+>)*<img src="([^"]+)")|(?:(?:\r?\n\t+)+(.+)</a>)))re");
rx.optimize();
QRegularExpressionMatchIterator matches = rx.globalMatch(html);
while (matches.hasNext()) {
QRegularExpressionMatch actorMatch = matches.next();
Actor a;
if (actorMatch.captured(1).isEmpty()) {
text.setHtml(actorMatch.captured(3).trimmed());
a.name = replaceEntities(text.toPlainText());
} else {
text.setHtml(actorMatch.captured(1).trimmed());
a.name = replaceEntities(text.toPlainText());
a.thumb = actorMatch.captured(2);
}
if (!a.name.isEmpty()) {
movie->addActor(a);
}
}
}

rx.setPattern(R"(<a href="/\d+/[^"]+"\r\n\s+Category="Item Page" Label="Director">([^<]+)</a>)");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Director) && match.hasMatch()) {
movie->setDirector(match.captured(1).trimmed());
}

// get the list of categories first (to avoid parsing categories of other movies)
rx.setPattern(R"(<strong>Categories:</strong>&nbsp;(.*)</div>)");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Genres) && match.hasMatch()) {
QString categoryHtml = match.captured(1);
rx.setPattern(R"(<a href="[^"]*"[\r\s\n]*Category="Item Page" Label="Category">([^<]*)</a>)");

QRegularExpressionMatchIterator matches = rx.globalMatch(categoryHtml);
while (matches.hasNext()) {
movie->addGenre(matches.next().captured(1).trimmed());
}
}

rx.setPattern(R"(<h4 class="m-b-0 text-dark synopsis">(<p( class="markdown-h[12]")?>.*)</p></h4>)");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Overview) && match.hasMatch()) {
// add some newlines to simulate the paragraphs (scene descriptions)
QString content{match.captured(1).trimmed()};
content.remove("<p class=\"markdown-h1\">");
content.remove("<p>");
content.replace("<p class=\"markdown-h2\">", "<br>");
content.replace("</p>", "<br>");
doc.setHtml(content);
movie->setOverview(doc.toPlainText());
if (Settings::instance()->usePlotForOutline()) {
movie->setOutline(doc.toPlainText());
}
}

rx.setPattern("href=\"([^\"]*)\"[\\s\\n]*id=\"front-cover\"");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Poster) && match.hasMatch()) {
Poster p;
p.thumbUrl = match.captured(1);
p.originalUrl = match.captured(1);
movie->images().addPoster(p);
}

rx.setPattern(R"(<a href="[^"]*"[\s\r\n]*Category="Item Page" Label="Series">[\s\r\n]*([^<]*)<span)");
match = rx.match(html);
if (infos.contains(MovieScraperInfo::Set) && match.hasMatch()) {
doc.setHtml(match.captured(1));
QString setName = doc.toPlainText().trimmed();
if (setName.endsWith("Series", Qt::CaseInsensitive)) {
setName.chop(6);
}
setName = setName.trimmed();
if (setName.startsWith("\"")) {
setName.remove(0, 1);
}
if (setName.endsWith("\"")) {
setName.chop(1);
}
MovieSet set;
set.name = setName.trimmed();
movie->setSet(set);
}

if (infos.contains(MovieScraperInfo::Backdrop)) {
rx.setPattern(R"re(<a rel="(scene)?screenshots"[\s\n]*href="([^"]*)")re");
QRegularExpressionMatchIterator matches = rx.globalMatch(html);
while (matches.hasNext()) {
QRegularExpressionMatch backDropMatch = matches.next();
Poster p;
p.thumbUrl = backDropMatch.captured(2);
p.originalUrl = backDropMatch.captured(2);
movie->images().addBackdrop(p);
}
}
m_scrapeJob.parseAndAssignInfos(html, movie, infos);
}

QString AdultDvdEmpire::replaceEntities(QString str) const
Expand Down
Loading