Merge pull request #43 from AlexCSDev/0_9_4_1

Fixed #40, #42
AlexCSDev · Jan 24, 2021 · ee4bcef · ee4bcef
2 parents 33790e1 + ed45668
commit ee4bcef
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 15 deletions.
diff --git a/PatreonDownloader.Engine/DefaultPlugin.cs b/PatreonDownloader.Engine/DefaultPlugin.cs
@@ -191,6 +191,9 @@ public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
             {
                 foreach (var imgNode in imgNodeCollection)
                 {
+                    if (imgNode.Attributes.Count == 0 || !imgNode.Attributes.Contains("src"))
+                        continue;
+
                     string url = imgNode.Attributes["src"].Value;
 
                     if (IsAllowedUrl(url))
@@ -207,19 +210,15 @@ public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
             {
                 foreach (var linkNode in linkNodeCollection)
                 {
-                    if (linkNode.Attributes["href"] != null)
-                    {
-                        var url = linkNode.Attributes["href"].Value;
+                    if (linkNode.Attributes.Count == 0 || !linkNode.Attributes.Contains("href"))
+                        continue;
 
-                        if (IsAllowedUrl(url))
-                        {
-                            retList.Add(url);
-                            _logger.Debug($"Parsed by default plugin (direct): {url}");
-                        }
-                    }
-                    else
+                    var url = linkNode.Attributes["href"].Value;
+
+                    if (IsAllowedUrl(url))
                     {
-                        _logger.Warn($"link with invalid href found, ignoring...");
+                        retList.Add(url);
+                        _logger.Debug($"Parsed by default plugin (direct): {url}");
                     }
                 }
             }

diff --git a/PatreonDownloader.MegaDownloader/Plugin.cs b/PatreonDownloader.MegaDownloader/Plugin.cs
@@ -92,13 +92,14 @@ public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
             List<string> retList = new List<string>();
             HtmlDocument doc = new HtmlDocument();
             doc.LoadHtml(htmlContents);
-            string plainText = string.Join(" ", doc.DocumentNode.Descendants()
+            string parseText = string.Join(" ", doc.DocumentNode.Descendants()
                 .Where(n => !n.HasChildNodes && !string.IsNullOrWhiteSpace(n.InnerText))
-                .Select(n => n.InnerText));
+                .Select(n => n.InnerText)); //first get a copy of text without all html tags
+            parseText += doc.DocumentNode.InnerHtml; //now append a copy of this text with all html tags intact (otherwise we lose all <a href=... links)
 
-            MatchCollection matchesNewFormat = _newFormatRegex.Matches(plainText);
+            MatchCollection matchesNewFormat = _newFormatRegex.Matches(parseText);
 
-            MatchCollection matchesOldFormat = _oldFormatRegex.Matches(plainText);
+            MatchCollection matchesOldFormat = _oldFormatRegex.Matches(parseText);
 
             _logger.Debug($"Found NEW:{matchesNewFormat.Count}|OLD:{matchesOldFormat.Count} possible mega links in description");