Permalink
Browse files

Update latest IMDB scripts

  • Loading branch information...
1 parent dcb30ab commit bb2e029f5af44d6fdbe4acaa36190ce47d834608 @dedahr dedahr committed Feb 25, 2013
View
4 mediaportal/MediaPortal.Base/VDBParserStrings.xml
@@ -29,7 +29,7 @@
<![CDATA[http://www.imdb.com/title/]]>
</string>
<string><!--1-->
- <![CDATA[id="img_primary">.*?src='/rg/title-overview/primary/images.*?href="(?<defaultPic>.*?)"]]>
+ <![CDATA[(?<defaultPic>/media/rm[^/]*/tt\d{7})]]>
</string>
<string><!--2-->
<![CDATA[http://www.imdb.com]]>
@@ -65,7 +65,7 @@
<![CDATA[Writer[s]?:\s*</h4>[^<]*(?<writers_block>.*?)</div>]]>
</string>
<string><!--4-->
- <![CDATA[/writer-\d/.*?/name/(?<imdbWriterId>nm\d{7})/"[\s]+>(?<writer>.*?)</a>\s+<span>(?<wrole>[(].*?[)])</span>|/writer-\d/.*?/name/(?<imdbWriterId>nm\d{7})/"[\s]+>(?<writer>.*?)</a>]]>
+ <![CDATA[href="/name/(?<imdbWriterId>nm\d{7})/[^>]*>(?<writer>[^<]+)</a>[^(]*(?<wrole>[^)]*\))|href="/name/(?<imdbWriterId>nm\d{7})/[^>]*>(?<writer>[^<]+)</a>(?<wrole>)]]>
</string>
<string><!--5-->
<![CDATA[<table class="cast">.*?</table>|<table class="cast_list">.*?</table>]]>
View
118 mediaportal/MediaPortal.Base/scripts/MovieInfo/IMDB.csscript
@@ -211,7 +211,7 @@ internal class Grabber : IIMDBScriptGrabber
string strAbsUrl;
string strBody = GetPage(url.URL, "utf-8", out strAbsUrl);
-
+
if (string.IsNullOrEmpty(strBody))
{
return false;
@@ -223,7 +223,7 @@ internal class Grabber : IIMDBScriptGrabber
// IMDB Number
int iPos = strAbsUrl.IndexOf("/title/");
-
+
if (iPos > 0)
{
iPos += "/title/".Length;
@@ -239,19 +239,42 @@ internal class Grabber : IIMDBScriptGrabber
// Get covers
movieDetails.ThumbURL = SearchCover(movieDetails.IMDBNumber, strBody);
+ // Title
+ string engTitleBodyUrl = url.URL + @"releaseinfo?ref_=tt_dt_dt#akas";
+ string engNamePage = GetPage(engTitleBodyUrl, "utf-8", out strAbsUrl);
+ regexPattern = @"<title>(?<movieTitle>.*?)[(].*?(?<movieYear>\d{4})";
+ string titleIMDB = Regex.Match(engNamePage, regexPattern).Groups["movieTitle"].Value.Replace("IMDb -", string.Empty);
+
+ if (!string.IsNullOrEmpty(titleIMDB))
+ {
+ titleIMDB = HttpUtility.HtmlDecode(titleIMDB);
+ if (titleIMDB != null) movieDetails.Title = titleIMDB.Trim();
+ // Year
+ int year = 0;
+ int.TryParse(Regex.Match(engNamePage, regexPattern).Groups["movieYear"].Value, out year);
+ movieDetails.Year = year;
+ }
+ else
+ {
+ titleIMDB = Regex.Match(strBody, regexPattern).Groups["movieTitle"].Value.Replace("IMDb -", string.Empty);
+ titleIMDB = HttpUtility.HtmlDecode(titleIMDB);
+ if (titleIMDB != null) movieDetails.Title = titleIMDB.Trim();
+ // Year
+ int year = 0;
+ int.TryParse(Regex.Match(strBody, regexPattern).Groups["movieYear"].Value, out year);
+ movieDetails.Year = year;
+ }
+
+ engNamePage = null;
+
// TMDB Title
bool useTMDB = false; // set-> false <-for original IMDB
- regexPattern = @"<title>(?<movieTitle>.*?)[(].*?(?<movieYear>\d{4})";
- string titleIMDB = Regex.Match(strBody, regexPattern).Groups["movieTitle"].Value.Replace("IMDb -", string.Empty);
- movieDetails.Title = HttpUtility.HtmlDecode(titleIMDB).Trim();
-
if (useTMDB)
{
string tmdbUrl = "http://api.themoviedb.org/2.1/Movie.imdbLookup/-/xml/2ed40b5d82aa804a2b1fcedb5ca8d97a/" +
movieDetails.IMDBNumber;
- string strAbsTmdbUrl = string.Empty;
- string strTmdbXML = GetPage(tmdbUrl, "utf-8", out strAbsTmdbUrl);
+ string strTmdbXML = GetPage(tmdbUrl, "utf-8", out strAbsUrl);
string titleEng = Regex.Match(strTmdbXML, "<name>(?<titleEng>.*?)</name>").Groups["titleEng"].Value.
Replace("&amp;", "&").
Replace("&lt;", "<").
@@ -264,25 +287,20 @@ internal class Grabber : IIMDBScriptGrabber
}
}
- // Year
- int year = 0;
- int.TryParse(Regex.Match(strBody, regexPattern).Groups["movieYear"].Value, out year);
- movieDetails.Year = year;
-
// Director
regexBlockPattern = @"<h4[^>]*>[^D]*Director[s]?:[^<]*</h4>[^<]*(?<directors_block>.*?)</div>";
- regexPattern = @"href=""/name/nm\d{7}/[^>]*>(?<movieDirectors>[^<]+)</a>";
+ regexPattern = @"href=""/name/nm\d{7}/[^>]*>(?<movieDirectors>.*?)</a>";
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Groups["directors_block"].Value;
movieDetails.Director =
- HttpUtility.HtmlDecode(Regex.Match(block, regexPattern, RegexOptions.Singleline).Groups["movieDirectors"].Value);
+ Utils.stripHTMLtags(HttpUtility.HtmlDecode(Regex.Match(block, regexPattern, RegexOptions.Singleline).Groups["movieDirectors"].Value));
// Writers
regexBlockPattern = "<h4[^>]*>[^W]*Writer[s]?:[^<]*</h4>[^<]*(?<writers_block>.*?)</div>";
regexPattern =
- @"href=""/name/nm\d{7}/[^>]*>(?<writer>[^<]+)</a>[^(]*(?<role>[^)]*\))|href=""/name/nm\d{7}/[^>]*>(?<writer>[^<]+)</a>(?<role>)";
+ @"href=""/name/nm\d{7}/[^>]*>(?<writer>.*?)</a>[^(]*(?<role>[^)]*\))|href=""/name/nm\d{7}/[^>]*>(?<writer>.*?)</a>(?<role>)";
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Groups["writers_block"].Value;
MatchCollection mcWriters = Regex.Matches(block, regexPattern);
-
+
if (mcWriters.Count != 0)
{
string writers = string.Empty;
@@ -291,9 +309,11 @@ internal class Grabber : IIMDBScriptGrabber
{
string strWriter = string.Empty;
strWriter = HttpUtility.HtmlDecode(mwriter.Groups["writer"].Value);
+ strWriter = Utils.stripHTMLtags(strWriter);
string strWriterRole = string.Empty;
strWriterRole = HttpUtility.HtmlDecode(mwriter.Groups["role"].Value);
-
+ strWriterRole = Utils.stripHTMLtags(strWriterRole);
+
if (strWriterRole != string.Empty)
{
writers = writers + strWriter + " " + strWriterRole + " / ";
@@ -308,7 +328,7 @@ internal class Grabber : IIMDBScriptGrabber
if (!string.IsNullOrEmpty(writers))
{
writers = HttpUtility.HtmlDecode(writers.Remove(writers.LastIndexOf(" / ")));
-
+
if (writers != null)
{
movieDetails.WritingCredits = writers.Replace("...", "").Trim();
@@ -350,7 +370,7 @@ internal class Grabber : IIMDBScriptGrabber
strRole = m.Groups["role"].Value;
strRole = Utils.stripHTMLtags(strRole).Trim().Replace("\n", "");
string decode = HttpUtility.HtmlDecode(strRole);
-
+
if (decode != null)
{
strRole = decode.Replace(",", ";").Replace(" ", "").Replace("(", " (").Replace(" /", "/");
@@ -362,7 +382,7 @@ internal class Grabber : IIMDBScriptGrabber
{
movieDetails.Cast += " as " + strRole;
}
-
+
movieDetails.Cast += "\n";
}
}
@@ -376,12 +396,12 @@ internal class Grabber : IIMDBScriptGrabber
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Value;
MatchCollection mcGenre = Regex.Matches(block, regexPattern);
string genre = string.Empty;
-
+
foreach (Match m in mcGenre)
{
genre = genre + m.Groups["movieGenres"].Value.Trim() + " / ";
}
-
+
try
{
if (!string.IsNullOrEmpty(genre))
@@ -396,7 +416,7 @@ internal class Grabber : IIMDBScriptGrabber
// MPARating
//
// Get rating from main movie page if exists
- regexBlockPattern = @"<div\sclass=""infobar"">.*?<span\stitle=""[^""]*"".*?itemprop=""contentRating""></span>";
+ regexBlockPattern = @"<div\sclass=""infobar"">.*?<span\stitle=""[^""]*"".*?itemprop=""contentRating"">";
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Value;
regexPattern = @"class=""us_(?<rating>[^\s]*)";
string mCertUsa = Regex.Match(block, regexPattern, RegexOptions.Singleline).Groups["rating"].Value.Replace("_",
@@ -421,7 +441,7 @@ internal class Grabber : IIMDBScriptGrabber
//Try to find rating according user country
char[] splitter = { '|' };
string[] landRating = tempRating.Split(splitter);
-
+
if (landRating.Length > 0)
{
for (int i = 0; i < landRating.Length; ++i)
@@ -430,7 +450,9 @@ internal class Grabber : IIMDBScriptGrabber
//Country check - IMDB is not so ISO oriented
if (mpaRating.Contains("UK:"))
+ {
mpaRating = mpaRating.Replace("UK:", "United Kingdom:");
+ }
//
if (mpaRating.Contains(land) | mpaRating.Contains(landcode))
@@ -451,7 +473,7 @@ internal class Grabber : IIMDBScriptGrabber
for (int i = 0; i < landRating.Length; ++i)
{
mpaRating = landRating[i];
-
+
if (mpaRating.Contains("USA:") &&
!mpaRating.ToLower().Contains("unrated") &&
!mpaRating.ToLower().Contains("approved") &&
@@ -467,7 +489,7 @@ internal class Grabber : IIMDBScriptGrabber
}
}
}
-
+
// No user country but IMDB usa rating from main page is there
if (movieDetails.MPARating == string.Empty)
{
@@ -484,7 +506,7 @@ internal class Grabber : IIMDBScriptGrabber
// Runtime
regexPattern = "<h5>Runtime:</h5>.*?(?<movieRuntime>\\d+)\\smin\\s+|(?<movieRuntime>\\d+)\\smin";
int runtime;
-
+
if (int.TryParse(Regex.Match(strBody, regexPattern).Groups["movieRuntime"].Value, out runtime))
{
movieDetails.RunTime = runtime;
@@ -518,11 +540,11 @@ internal class Grabber : IIMDBScriptGrabber
// Plot short
regexPattern = @"itemprop=""description"">(?<moviePlotShort>[^<]+)<";
string shortPlot = Regex.Match(strBody, regexPattern, RegexOptions.Singleline).Groups["moviePlotShort"].Value;
- string s = HttpUtility.HtmlDecode(Utils.stripHTMLtags(shortPlot));
-
- if (s != null)
+ shortPlot = HttpUtility.HtmlDecode(Utils.stripHTMLtags(shortPlot));
+
+ if (shortPlot != null)
{
- movieDetails.PlotOutline = s.Trim();
+ movieDetails.PlotOutline = shortPlot.Trim();
}
// Plot long
@@ -531,17 +553,17 @@ internal class Grabber : IIMDBScriptGrabber
{
string absoluteUri;
string strPlotHtml = GetPage(strPlotUrl, "utf-8", out absoluteUri);
-
+
if (0 != strPlotHtml.Length)
{
int iPlotStart = strPlotHtml.IndexOf("<p class=\"plotpar\">");
-
+
if (iPlotStart >= 0)
{
iPlotStart += "<p class=\"plotpar\">".Length;
int iPlotEnd = strPlotHtml.IndexOf("<i>", iPlotStart); // ends with <i> for person who wrote it or
-
- if (iPlotEnd < 0)
+
+ if (iPlotEnd < 0)
{
iPlotEnd = strPlotHtml.IndexOf("</p>", iPlotStart); // </p> for end of paragraph
}
@@ -578,24 +600,24 @@ internal class Grabber : IIMDBScriptGrabber
private string SearchCover(string imdbID, string strBody)
{
- string absoluteUri = string.Empty;
- if (imdbID == null) return string.Empty;
- if (imdbID == string.Empty | !imdbID.StartsWith("tt")) return string.Empty;
+ if (string.IsNullOrEmpty(imdbID) || !imdbID.StartsWith("tt")) return string.Empty;
- string cover = "";
+ string cover = string.Empty;
// Get Main Movie page and find default poster link
- Match posterPageLink = Regex.Match(strBody,
- @"/media/rm[^/]*/"+ imdbID + @"\?ref_=tt_ov_i",
- RegexOptions.Singleline);
+ string regexBlockPattern = @"id=""img_primary"">.*?itemprop=""image""";
+ string block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Value;
+ Match posterPageLink = Regex.Match(block, @"src=""(?<image>.*?_V1_)", RegexOptions.Singleline);
// Now parse default cover picture html page to get default cover
- strBody = GetPage("http://www.imdb.com" + posterPageLink.Value, "utf-8", out absoluteUri);
- Match jpgDefault = Regex.Match(strBody, @"link\srel=""image_src""\shref=""(?<jpg>.*?jpg)""");
-
- if (jpgDefault.Success)
+ if (posterPageLink.Success)
{
- cover = HttpUtility.HtmlDecode(jpgDefault.Groups["jpg"].Value);
+ string posterUrl = HttpUtility.HtmlDecode(posterPageLink.Groups["image"].Value);
+
+ if (!string.IsNullOrEmpty(posterUrl))
+ {
+ cover = posterUrl + "SX400.jpg";
+ }
}
return cover;
}
View
127 mediaportal/MediaPortal.Base/scripts/MovieInfo/IMDB_MP13x.csscript
@@ -211,7 +211,7 @@ internal class Grabber : IIMDBScriptGrabber
string strAbsUrl;
string strBody = GetPage(url.URL, "utf-8", out strAbsUrl);
-
+
if (string.IsNullOrEmpty(strBody))
{
return false;
@@ -223,7 +223,7 @@ internal class Grabber : IIMDBScriptGrabber
// IMDB Number
int iPos = strAbsUrl.IndexOf("/title/");
-
+
if (iPos > 0)
{
iPos += "/title/".Length;
@@ -239,19 +239,42 @@ internal class Grabber : IIMDBScriptGrabber
// Get covers
movieDetails.ThumbURL = SearchCover(movieDetails.IMDBNumber, strBody);
+ // Title
+ string engTitleBodyUrl = url.URL + @"releaseinfo?ref_=tt_dt_dt#akas";
+ string engNamePage = GetPage(engTitleBodyUrl, "utf-8", out strAbsUrl);
+ regexPattern = @"<title>(?<movieTitle>.*?)[(].*?(?<movieYear>\d{4})";
+ string titleIMDB = Regex.Match(engNamePage, regexPattern).Groups["movieTitle"].Value.Replace("IMDb -", string.Empty);
+
+ if (!string.IsNullOrEmpty(titleIMDB))
+ {
+ titleIMDB = HttpUtility.HtmlDecode(titleIMDB);
+ if (titleIMDB != null) movieDetails.Title = titleIMDB.Trim();
+ // Year
+ int year = 0;
+ int.TryParse(Regex.Match(engNamePage, regexPattern).Groups["movieYear"].Value, out year);
+ movieDetails.Year = year;
+ }
+ else
+ {
+ titleIMDB = Regex.Match(strBody, regexPattern).Groups["movieTitle"].Value.Replace("IMDb -", string.Empty);
+ titleIMDB = HttpUtility.HtmlDecode(titleIMDB);
+ if (titleIMDB != null) movieDetails.Title = titleIMDB.Trim();
+ // Year
+ int year = 0;
+ int.TryParse(Regex.Match(strBody, regexPattern).Groups["movieYear"].Value, out year);
+ movieDetails.Year = year;
+ }
+
+ engNamePage = null;
+
// TMDB Title
bool useTMDB = false; // set-> false <-for original IMDB
- regexPattern = @"<title>(?<movieTitle>.*?)[(].*?(?<movieYear>\d{4})";
- string titleIMDB = Regex.Match(strBody, regexPattern).Groups["movieTitle"].Value.Replace("IMDb -", string.Empty);
- movieDetails.Title = HttpUtility.HtmlDecode(titleIMDB).Trim();
-
if (useTMDB)
{
string tmdbUrl = "http://api.themoviedb.org/2.1/Movie.imdbLookup/-/xml/2ed40b5d82aa804a2b1fcedb5ca8d97a/" +
movieDetails.IMDBNumber;
- string strAbsTmdbUrl = string.Empty;
- string strTmdbXML = GetPage(tmdbUrl, "utf-8", out strAbsTmdbUrl);
+ string strTmdbXML = GetPage(tmdbUrl, "utf-8", out strAbsUrl);
string titleEng = Regex.Match(strTmdbXML, "<name>(?<titleEng>.*?)</name>").Groups["titleEng"].Value.
Replace("&amp;", "&").
Replace("&lt;", "<").
@@ -264,25 +287,20 @@ internal class Grabber : IIMDBScriptGrabber
}
}
- // Year
- int year = 0;
- int.TryParse(Regex.Match(strBody, regexPattern).Groups["movieYear"].Value, out year);
- movieDetails.Year = year;
-
// Director
regexBlockPattern = @"<h4[^>]*>[^D]*Director[s]?:[^<]*</h4>[^<]*(?<directors_block>.*?)</div>";
- regexPattern = @"href=""/name/nm\d{7}/[^>]*>(?<movieDirectors>[^<]+)</a>";
+ regexPattern = @"href=""/name/nm\d{7}/[^>]*>(?<movieDirectors>.*?)</a>";
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Groups["directors_block"].Value;
movieDetails.Director =
- HttpUtility.HtmlDecode(Regex.Match(block, regexPattern, RegexOptions.Singleline).Groups["movieDirectors"].Value);
+ Utils.stripHTMLtags(HttpUtility.HtmlDecode(Regex.Match(block, regexPattern, RegexOptions.Singleline).Groups["movieDirectors"].Value));
// Writers
regexBlockPattern = "<h4[^>]*>[^W]*Writer[s]?:[^<]*</h4>[^<]*(?<writers_block>.*?)</div>";
regexPattern =
- @"href=""/name/nm\d{7}/[^>]*>(?<writer>[^<]+)</a>[^(]*(?<role>[^)]*\))|href=""/name/nm\d{7}/[^>]*>(?<writer>[^<]+)</a>(?<role>)";
+ @"href=""/name/nm\d{7}/[^>]*>(?<writer>.*?)</a>[^(]*(?<role>[^)]*\))|href=""/name/nm\d{7}/[^>]*>(?<writer>.*?)</a>(?<role>)";
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Groups["writers_block"].Value;
MatchCollection mcWriters = Regex.Matches(block, regexPattern);
-
+
if (mcWriters.Count != 0)
{
string writers = string.Empty;
@@ -291,9 +309,11 @@ internal class Grabber : IIMDBScriptGrabber
{
string strWriter = string.Empty;
strWriter = HttpUtility.HtmlDecode(mwriter.Groups["writer"].Value);
+ strWriter = Utils.stripHTMLtags(strWriter);
string strWriterRole = string.Empty;
strWriterRole = HttpUtility.HtmlDecode(mwriter.Groups["role"].Value);
-
+ strWriterRole = Utils.stripHTMLtags(strWriterRole);
+
if (strWriterRole != string.Empty)
{
writers = writers + strWriter + " " + strWriterRole + " / ";
@@ -308,7 +328,7 @@ internal class Grabber : IIMDBScriptGrabber
if (!string.IsNullOrEmpty(writers))
{
writers = HttpUtility.HtmlDecode(writers.Remove(writers.LastIndexOf(" / ")));
-
+
if (writers != null)
{
movieDetails.WritingCredits = writers.Replace("...", "").Trim();
@@ -350,7 +370,7 @@ internal class Grabber : IIMDBScriptGrabber
strRole = m.Groups["role"].Value;
strRole = Utils.stripHTMLtags(strRole).Trim().Replace("\n", "");
string decode = HttpUtility.HtmlDecode(strRole);
-
+
if (decode != null)
{
strRole = decode.Replace(",", ";").Replace(" ", "").Replace("(", " (").Replace(" /", "/");
@@ -362,7 +382,7 @@ internal class Grabber : IIMDBScriptGrabber
{
movieDetails.Cast += " as " + strRole;
}
-
+
movieDetails.Cast += "\n";
}
}
@@ -376,12 +396,12 @@ internal class Grabber : IIMDBScriptGrabber
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Value;
MatchCollection mcGenre = Regex.Matches(block, regexPattern);
string genre = string.Empty;
-
+
foreach (Match m in mcGenre)
{
genre = genre + m.Groups["movieGenres"].Value.Trim() + " / ";
}
-
+
try
{
if (!string.IsNullOrEmpty(genre))
@@ -395,16 +415,16 @@ internal class Grabber : IIMDBScriptGrabber
// Studios
regexBlockPattern = @"<h4\sclass=""inline"">Production\sCo:</h4>.*?href=""companycredits";
- regexPattern = @"href=""/company[^""]+""\s+.*?>(?<movieStudios>[^<]+)</a>";
+ regexPattern = @"href=""/company[^""]+""\s+.*?>(?<movieStudios>.*?)</a>";
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Value;
MatchCollection mcStudios = Regex.Matches(block, regexPattern, RegexOptions.Singleline);
string studio = string.Empty;
-
+
foreach (Match ms in mcStudios)
{
- studio = studio + ms.Groups["movieStudios"] + " / ";
+ studio = studio + Utils.stripHTMLtags(ms.Groups["movieStudios"].Value) + " / ";
}
-
+
try
{
if (!string.IsNullOrEmpty(studio))
@@ -429,7 +449,7 @@ internal class Grabber : IIMDBScriptGrabber
// MPARating
//
// Get rating from main movie page if exists
- regexBlockPattern = @"<div\sclass=""infobar"">.*?<span\stitle=""[^""]*"".*?itemprop=""contentRating""></span>";
+ regexBlockPattern = @"<div\sclass=""infobar"">.*?<span\stitle=""[^""]*"".*?itemprop=""contentRating"">";
block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Value;
regexPattern = @"class=""us_(?<rating>[^\s]*)";
string mCertUsa = Regex.Match(block, regexPattern, RegexOptions.Singleline).Groups["rating"].Value.Replace("_",
@@ -454,7 +474,7 @@ internal class Grabber : IIMDBScriptGrabber
//Try to find rating according user country
char[] splitter = { '|' };
string[] landRating = tempRating.Split(splitter);
-
+
if (landRating.Length > 0)
{
for (int i = 0; i < landRating.Length; ++i)
@@ -463,7 +483,9 @@ internal class Grabber : IIMDBScriptGrabber
//Country check - IMDB is not so ISO oriented
if (mpaRating.Contains("UK:"))
+ {
mpaRating = mpaRating.Replace("UK:", "United Kingdom:");
+ }
//
if (mpaRating.Contains(land) | mpaRating.Contains(landcode))
@@ -484,7 +506,7 @@ internal class Grabber : IIMDBScriptGrabber
for (int i = 0; i < landRating.Length; ++i)
{
mpaRating = landRating[i];
-
+
if (mpaRating.Contains("USA:") &&
!mpaRating.ToLower().Contains("unrated") &&
!mpaRating.ToLower().Contains("approved") &&
@@ -500,7 +522,7 @@ internal class Grabber : IIMDBScriptGrabber
}
}
}
-
+
// No user country but IMDB usa rating from main page is there
if (movieDetails.MPARating == string.Empty)
{
@@ -517,7 +539,7 @@ internal class Grabber : IIMDBScriptGrabber
// Runtime
regexPattern = "<h5>Runtime:</h5>.*?(?<movieRuntime>\\d+)\\smin\\s+|(?<movieRuntime>\\d+)\\smin";
int runtime;
-
+
if (int.TryParse(Regex.Match(strBody, regexPattern).Groups["movieRuntime"].Value, out runtime))
{
movieDetails.RunTime = runtime;
@@ -551,11 +573,11 @@ internal class Grabber : IIMDBScriptGrabber
// Plot short
regexPattern = @"itemprop=""description"">(?<moviePlotShort>[^<]+)<";
string shortPlot = Regex.Match(strBody, regexPattern, RegexOptions.Singleline).Groups["moviePlotShort"].Value;
- string s = HttpUtility.HtmlDecode(Utils.stripHTMLtags(shortPlot));
-
- if (s != null)
+ shortPlot = HttpUtility.HtmlDecode(Utils.stripHTMLtags(shortPlot));
+
+ if (shortPlot != null)
{
- movieDetails.PlotOutline = s.Trim();
+ movieDetails.PlotOutline = shortPlot.Trim();
}
// Plot long
@@ -564,17 +586,17 @@ internal class Grabber : IIMDBScriptGrabber
{
string absoluteUri;
string strPlotHtml = GetPage(strPlotUrl, "utf-8", out absoluteUri);
-
+
if (0 != strPlotHtml.Length)
{
int iPlotStart = strPlotHtml.IndexOf("<p class=\"plotpar\">");
-
+
if (iPlotStart >= 0)
{
iPlotStart += "<p class=\"plotpar\">".Length;
int iPlotEnd = strPlotHtml.IndexOf("<i>", iPlotStart); // ends with <i> for person who wrote it or
-
- if (iPlotEnd < 0)
+
+ if (iPlotEnd < 0)
{
iPlotEnd = strPlotHtml.IndexOf("</p>", iPlotStart); // </p> for end of paragraph
}
@@ -607,6 +629,7 @@ internal class Grabber : IIMDBScriptGrabber
strBody = GetPage(reviewUrl, "utf-8", out strAbsUrl);
regexPattern = "<a\\shref=\"/user/.*?<p>(?<review>.*?)</p>";
Match strReview = Regex.Match(HttpUtility.HtmlDecode(strBody), regexPattern, RegexOptions.Singleline);
+
if (strReview.Success)
{
string review = Utils.stripHTMLtags(strReview.Groups["review"].Value.Replace("\n", " ").Trim());
@@ -622,24 +645,24 @@ internal class Grabber : IIMDBScriptGrabber
private string SearchCover(string imdbID, string strBody)
{
- string absoluteUri = string.Empty;
- if (imdbID == null) return string.Empty;
- if (imdbID == string.Empty | !imdbID.StartsWith("tt")) return string.Empty;
+ if (string.IsNullOrEmpty(imdbID) || !imdbID.StartsWith("tt")) return string.Empty;
- string cover = "";
+ string cover = string.Empty;
// Get Main Movie page and find default poster link
- Match posterPageLink = Regex.Match(strBody,
- @"/media/rm[^/]*/"+ imdbID + @"\?ref_=tt_ov_i",
- RegexOptions.Singleline);
+ string regexBlockPattern = @"id=""img_primary"">.*?itemprop=""image""";
+ string block = Regex.Match(strBody, regexBlockPattern, RegexOptions.Singleline).Value;
+ Match posterPageLink = Regex.Match(block, @"src=""(?<image>.*?_V1_)", RegexOptions.Singleline);
// Now parse default cover picture html page to get default cover
- strBody = GetPage("http://www.imdb.com" + posterPageLink.Value, "utf-8", out absoluteUri);
- Match jpgDefault = Regex.Match(strBody, @"link\srel=""image_src""\shref=""(?<jpg>.*?jpg)""");
-
- if (jpgDefault.Success)
+ if (posterPageLink.Success)
{
- cover = HttpUtility.HtmlDecode(jpgDefault.Groups["jpg"].Value);
+ string posterUrl = HttpUtility.HtmlDecode(posterPageLink.Groups["image"].Value);
+
+ if (!string.IsNullOrEmpty(posterUrl))
+ {
+ cover = posterUrl + "SX400.jpg";
+ }
}
return cover;
}
View
4 mediaportal/MediaPortal.Base/scripts/VDBParserStrings.xml
@@ -29,7 +29,7 @@
<![CDATA[http://www.imdb.com/title/]]>
</string>
<string><!--1-->
- <![CDATA[id="img_primary">.*?src='/rg/title-overview/primary/images.*?href="(?<defaultPic>.*?)"]]>
+ <![CDATA[(?<defaultPic>/media/rm[^/]*/tt\d{7})]]>
</string>
<string><!--2-->
<![CDATA[http://www.imdb.com]]>
@@ -65,7 +65,7 @@
<![CDATA[Writer[s]?:\s*</h4>[^<]*(?<writers_block>.*?)</div>]]>
</string>
<string><!--4-->
- <![CDATA[/writer-\d/.*?/name/(?<imdbWriterId>nm\d{7})/"[\s]+>(?<writer>.*?)</a>\s+<span>(?<wrole>[(].*?[)])</span>|/writer-\d/.*?/name/(?<imdbWriterId>nm\d{7})/"[\s]+>(?<writer>.*?)</a>]]>
+ <![CDATA[href="/name/(?<imdbWriterId>nm\d{7})/[^>]*>(?<writer>[^<]+)</a>[^(]*(?<wrole>[^)]*\))|href="/name/(?<imdbWriterId>nm\d{7})/[^>]*>(?<writer>[^<]+)</a>(?<wrole>)]]>
</string>
<string><!--5-->
<![CDATA[<table class="cast">.*?</table>|<table class="cast_list">.*?</table>]]>

0 comments on commit bb2e029

Please sign in to comment.