Skip to content

Commit

Permalink
- Support --force-js for Copyparty
Browse files Browse the repository at this point in the history
  • Loading branch information
KoalaBear84 committed Jan 27, 2023
1 parent 64c29d1 commit 02ede0b
Show file tree
Hide file tree
Showing 3 changed files with 229 additions and 43 deletions.
47 changes: 4 additions & 43 deletions src/OpenDirectoryDownloader/DirectoryParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using OpenDirectoryDownloader.Shared;
using OpenDirectoryDownloader.Shared.Models;
using OpenDirectoryDownloader.Site.BlitzfilesTech;
using OpenDirectoryDownloader.Site.Copyparty;
using OpenDirectoryDownloader.Site.Dropbox;
using OpenDirectoryDownloader.Site.GDIndex;
using OpenDirectoryDownloader.Site.GDIndex.Bhadoo;
Expand Down Expand Up @@ -269,7 +270,7 @@ await foreach (string source in sources)
// copyparty
if (htmlDocument.QuerySelector("#op_bup #u2err") is not null)
{
return ParseCopypartyListing(baseUrl, parsedWebDirectory, htmlDocument);
return await ParseCopypartyListingAsync(baseUrl, httpClient, parsedWebDirectory, htmlDocument, html);
}

IHtmlCollection<IElement> pres = htmlDocument.QuerySelectorAll("pre");
Expand Down Expand Up @@ -443,49 +444,9 @@ await foreach (string source in sources)
return parsedWebDirectory;
}

private static WebDirectory ParseCopypartyListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlDocument htmlDocument)
private static async Task<WebDirectory> ParseCopypartyListingAsync(string baseUrl, HttpClient httpClient, WebDirectory parsedWebDirectory, IHtmlDocument htmlDocument, string html)
{
IElement table = htmlDocument.QuerySelector("table#files");

parsedWebDirectory.ParsedSuccessfully = true;

IHtmlCollection<IElement> entries = table.QuerySelectorAll("tbody tr");

foreach (IElement entry in entries)
{
IHtmlAnchorElement link = entry.QuerySelector("td:nth-child(2) a") as IHtmlAnchorElement;
IHtmlTableCellElement fileSize = entry.QuerySelector("td:nth-child(3)") as IHtmlTableCellElement;

bool isDirectory = link.TextContent.EndsWith("/");

if (link is not null)
{
ProcessUrl(baseUrl, link, out _, out _, out string fullUrl);

if (isDirectory)
{
string directoryName = link.TextContent.TrimEnd('/');

parsedWebDirectory.Subdirectories.Add(new WebDirectory(parsedWebDirectory)
{
Parser = "ParseCopypartyListing",
Url = fullUrl,
Name = directoryName
});
}
else
{
parsedWebDirectory.Files.Add(new WebFile
{
Url = fullUrl,
FileName = Path.GetFileName(WebUtility.UrlDecode(fullUrl.Split('?')[0])),
FileSize = FileSizeHelper.ParseFileSize(fileSize.TextContent)
});
}
}
}

return parsedWebDirectory;
return await Copyparty.ParseIndex(baseUrl, httpClient, parsedWebDirectory, htmlDocument, html);
}

private static WebDirectory ParseDirLIST(string baseUrl, WebDirectory parsedWebDirectory, IHtmlDocument htmlDocument, IHtmlCollection<IElement> tables)
Expand Down
154 changes: 154 additions & 0 deletions src/OpenDirectoryDownloader/Site/Copyparty/CopypartyParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using OpenDirectoryDownloader.Helpers;
using OpenDirectoryDownloader.Shared.Models;
using System.Net;
using System.Text.RegularExpressions;

namespace OpenDirectoryDownloader.Site.Copyparty;

/// <summary>
/// Similar to GoIndex
/// </summary>
public static class Copyparty
{
private const string Parser = "Copyparty";
private static readonly Regex JsListingRegex = new("ls0\\s?=\\s?(?<Listing>.*);$", RegexOptions.Multiline);

public static async Task<WebDirectory> ParseIndex(string baseUrl, HttpClient httpClient, WebDirectory webDirectory, IHtmlDocument htmlDocument, string html)
{
try
{
webDirectory = await ScanAsync(baseUrl, httpClient, webDirectory, htmlDocument, html);
}
catch (Exception ex)
{
Program.Logger.Error(ex, "Error parsing {parser} for '{url}'", Parser, webDirectory.Url);
webDirectory.Error = true;

OpenDirectoryIndexer.Session.Errors++;

if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
{
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
}

throw;
}

return webDirectory;
}

private static async Task<WebDirectory> ScanAsync(string baseUrl, HttpClient httpClient, WebDirectory webDirectory, IHtmlDocument htmlDocument, string html)
{
Program.Logger.Debug("Processing listings for '{url}'", webDirectory.Uri);

webDirectory.Parser = Parser;

try
{
IElement table = htmlDocument.QuerySelector("table#files");

IHtmlCollection<IElement> entries = table.QuerySelectorAll("tbody tr");

if (entries.Any())
{
foreach (IElement entry in entries)
{
IHtmlAnchorElement link = entry.QuerySelector("td:nth-child(2) a") as IHtmlAnchorElement;
IHtmlTableCellElement fileSize = entry.QuerySelector("td:nth-child(3)") as IHtmlTableCellElement;

bool isDirectory = link.TextContent.EndsWith("/");

if (link is not null)
{
Library.ProcessUrl(baseUrl, link, out _, out _, out string fullUrl);

if (isDirectory)
{
string directoryName = link.TextContent.TrimEnd('/');

webDirectory.Subdirectories.Add(new WebDirectory(webDirectory)
{
Parser = Parser,
Url = fullUrl,
Name = directoryName
});
}
else
{
webDirectory.Files.Add(new WebFile
{
Url = fullUrl,
FileName = Path.GetFileName(WebUtility.UrlDecode(fullUrl.Split('?')[0])),
FileSize = FileSizeHelper.ParseFileSize(fileSize.TextContent)
});
}
}
}

webDirectory.ParsedSuccessfully = true;
}
else
{
return ParseCopypartyJavaScriptListing(baseUrl, webDirectory, htmlDocument, html);
}

return webDirectory;
}
catch (Exception ex)
{
Program.Logger.Error(ex, "Error processing {parser} for '{url}'", Parser, webDirectory.Url);
webDirectory.Error = true;

OpenDirectoryIndexer.Session.Errors++;

if (!OpenDirectoryIndexer.Session.UrlsWithErrors.Contains(webDirectory.Url))
{
OpenDirectoryIndexer.Session.UrlsWithErrors.Add(webDirectory.Url);
}

//throw;
}

return webDirectory;
}

private static WebDirectory ParseCopypartyJavaScriptListing(string baseUrl, WebDirectory parsedWebDirectory, IHtmlDocument htmlDocument, string html)
{
Match jsListingRegexMatch = JsListingRegex.Match(html);

if (!jsListingRegexMatch.Success)
{
return parsedWebDirectory;
}

CopypartyListing copypartyListing = CopypartyListing.FromJson(jsListingRegexMatch.Groups["Listing"].Value);

Uri baseUri = new(baseUrl);

foreach (Dir dir in copypartyListing.Dirs)
{
parsedWebDirectory.Subdirectories.Add(new WebDirectory(parsedWebDirectory)
{
Parser = Parser,
Url = new Uri(baseUri, dir.Href).ToString(),
Name = dir.Name.TrimEnd('/')
});
}

foreach (Dir file in copypartyListing.Files)
{
parsedWebDirectory.Files.Add(new WebFile
{
Url = new Uri(baseUri, file.Href).ToString(),
FileName = file.Name,
FileSize = file.Sz
});
}

parsedWebDirectory.ParsedSuccessfully = true;

return parsedWebDirectory;
}
}
71 changes: 71 additions & 0 deletions src/OpenDirectoryDownloader/Site/Copyparty/CopypartyResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
using System.Globalization;

namespace OpenDirectoryDownloader.Site.Copyparty;

public partial class CopypartyListing
{
[JsonProperty("dirs")]
public Dir[] Dirs { get; set; }

[JsonProperty("files")]
public Dir[] Files { get; set; }

[JsonProperty("taglist")]
public object[] Taglist { get; set; }
}

public partial class Dir
{
[JsonProperty("dt")]
public DateTimeOffset Dt { get; set; }

[JsonProperty("ext")]
public string Ext { get; set; }

[JsonProperty("href")]
public string Href { get; set; }

[JsonProperty("lead")]
public string Lead { get; set; }

[JsonProperty("name")]
public string Name { get; set; }

[JsonProperty("sz")]
public long Sz { get; set; }

[JsonProperty("tags")]
public Tags Tags { get; set; }

[JsonProperty("ts")]
public long Ts { get; set; }
}

public partial class Tags
{
}

public partial class CopypartyListing
{
public static CopypartyListing FromJson(string json) => JsonConvert.DeserializeObject<CopypartyListing>(json, Converter.Settings);
}

public static class Serialize
{
public static string ToJson(this CopypartyListing self) => JsonConvert.SerializeObject(self, Converter.Settings);
}

internal static class Converter
{
public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings
{
MetadataPropertyHandling = MetadataPropertyHandling.Ignore,
DateParseHandling = DateParseHandling.None,
Converters =
{
new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal }
},
};
}

0 comments on commit 02ede0b

Please sign in to comment.