Skip to content

Commit

Permalink
+ interfaces ICrawler, ICrawlFacade, IPostParser & IPostSaver
Browse files Browse the repository at this point in the history
- generic type param `TBaseRevision` @ CrawlFacade.cs
@ c#/crawler
  • Loading branch information
n0099 committed May 10, 2024
1 parent 584499b commit c2bf8f8
Show file tree
Hide file tree
Showing 12 changed files with 75 additions and 23 deletions.
4 changes: 2 additions & 2 deletions c#/crawler/src/EntryPoint.cs
Expand Up @@ -36,8 +36,8 @@ protected override void ConfigureContainer(HostBuilderContext context, Container
{
builder.RegisterImplementsOfBaseTypes(typeof(EntryPoint).Assembly,
[
typeof(BaseCrawler<,>), typeof(CrawlFacade<,,,>),
typeof(PostParser<,>), typeof(BaseSaver<>)
typeof(ICrawler<,>), typeof(ICrawlFacade<>),
typeof(IPostParser<,>), typeof(BaseSaver<>)
]);
builder.RegisterType<CrawlerDbContext>();
builder.RegisterType<ClientRequester>();
Expand Down
3 changes: 1 addition & 2 deletions c#/crawler/src/Tieba/Crawl/Crawler/BaseCrawler.cs
Expand Up @@ -3,8 +3,6 @@ namespace tbm.Crawler.Tieba.Crawl.Crawler;
public abstract partial class BaseCrawler<TResponse, TPostProtoBuf>
{
public abstract Exception FillExceptionData(Exception e);

// ReSharper disable once UnusedParameter.Global
public abstract IReadOnlyCollection<TPostProtoBuf> GetValidPosts(TResponse response, CrawlRequestFlag flag);
public abstract TbClient.Page? GetResponsePage(TResponse response);
protected abstract IReadOnlyCollection<TPostProtoBuf> GetResponsePostList(TResponse response);
Expand All @@ -15,6 +13,7 @@ public record Response(TResponse Result, CrawlRequestFlag Flag = CrawlRequestFla
protected record Request(Task<TResponse> Response, CrawlRequestFlag Flag = CrawlRequestFlag.None);
}
public abstract partial class BaseCrawler<TResponse, TPostProtoBuf>
: ICrawler<TResponse, TPostProtoBuf>
where TResponse : class, IMessage<TResponse>
where TPostProtoBuf : class, IMessage<TPostProtoBuf>
{
Expand Down
10 changes: 10 additions & 0 deletions c#/crawler/src/Tieba/Crawl/Crawler/ICrawler.cs
@@ -0,0 +1,10 @@
namespace tbm.Crawler.Tieba.Crawl.Crawler;

public interface ICrawler<in TResponse, out TPostProtoBuf>
where TResponse : class, IMessage<TResponse>
where TPostProtoBuf : class, IMessage<TPostProtoBuf>
{
public Exception FillExceptionData(Exception e);
public IReadOnlyCollection<TPostProtoBuf> GetValidPosts(TResponse response, CrawlRequestFlag flag);
public TbClient.Page? GetResponsePage(TResponse response);
}
24 changes: 11 additions & 13 deletions c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs
@@ -1,31 +1,28 @@
namespace tbm.Crawler.Tieba.Crawl.Facade;

#pragma warning disable S3881 // "IDisposable" should be implemented correctly
public abstract class CrawlFacade<TPost, TBaseRevision, TResponse, TPostProtoBuf>(
public abstract class CrawlFacade<TPost, TResponse, TPostProtoBuf>(
#pragma warning restore S3881 // "IDisposable" should be implemented correctly
BaseCrawler<TResponse, TPostProtoBuf> crawler,
Fid fid,
CrawlerLocks.LockId lockId,
CrawlerLocks locks,
PostParser<TPost, TPostProtoBuf> postParser,
Func<ConcurrentDictionary<PostId, TPost>, PostSaver<TPost, TBaseRevision>> postSaverFactory,
IPostParser<TPost, TPostProtoBuf> postParser,
Func<ConcurrentDictionary<PostId, TPost>, IPostSaver<TPost>> postSaverFactory,
Func<ConcurrentDictionary<Uid, User>, UserParser> userParserFactory,
Func<ConcurrentDictionary<Uid, User>, UserSaver> userSaverFactory)
: IDisposable
: ICrawlFacade<TPost>
where TPost : BasePost
where TBaseRevision : BaseRevisionWithSplitting
where TResponse : class, IMessage<TResponse>
where TPostProtoBuf : class, IMessage<TPostProtoBuf>
{
private readonly HashSet<Page> _lockingPages = [];
private readonly ConcurrentDictionary<Uid, User> _users = new();
private UserParser? _userParser;
private ExceptionHandler _exceptionHandler = _ => { };

public delegate void ExceptionHandler(Exception ex);
private ICrawlFacade<TPost>.ExceptionHandler _exceptionHandler = _ => { };

// ReSharper disable UnusedAutoPropertyAccessor.Global
public required ILogger<CrawlFacade<TPost, TBaseRevision, TResponse, TPostProtoBuf>>
public required ILogger<CrawlFacade<TPost, TResponse, TPostProtoBuf>>
Logger { private get; init; }
public required CrawlerDbContext.New DbContextFactory { private get; init; }
public required ClientRequesterTcs RequesterTcs { private get; init; }
Expand Down Expand Up @@ -77,8 +74,10 @@ public virtual void Dispose()
}
}

public async Task<CrawlFacade<TPost, TBaseRevision, TResponse, TPostProtoBuf>>
CrawlPageRange(Page startPage, Page endPage = Page.MaxValue, CancellationToken stoppingToken = default)
public async Task<ICrawlFacade<TPost>> CrawlPageRange(
Page startPage,
Page endPage = Page.MaxValue,
CancellationToken stoppingToken = default)
{ // cancel when startPage is already locked
if (_lockingPages.Count != 0) ThrowHelper.ThrowInvalidOperationException(
"CrawlPageRange() can only be called once, a instance of CrawlFacade shouldn't be reuse for other crawls.");
Expand Down Expand Up @@ -120,8 +119,7 @@ public virtual void Dispose()
return SaveCrawled(stoppingToken);
}

public CrawlFacade<TPost, TBaseRevision, TResponse, TPostProtoBuf>
AddExceptionHandler(ExceptionHandler handler)
public ICrawlFacade<TPost> AddExceptionHandler(ICrawlFacade<TPost>.ExceptionHandler handler)
{
_exceptionHandler += handler;
return this;
Expand Down
21 changes: 21 additions & 0 deletions c#/crawler/src/Tieba/Crawl/Facade/ICrawlFacade.cs
@@ -0,0 +1,21 @@
namespace tbm.Crawler.Tieba.Crawl.Facade;

public interface ICrawlFacade<TPost> : IDisposable
where TPost : BasePost
{
public SaverChangeSet<TPost>? SaveCrawled(CancellationToken stoppingToken = default);

public Task<ICrawlFacade<TPost>> CrawlPageRange(
Page startPage,
Page endPage = Page.MaxValue,
CancellationToken stoppingToken = default);

public Task<SaverChangeSet<TPost>?> RetryThenSave(
IReadOnlyList<Page> pages,
Func<Page, FailureCount> failureCountSelector,
CancellationToken stoppingToken = default);

public ICrawlFacade<TPost> AddExceptionHandler(ExceptionHandler handler);

public delegate void ExceptionHandler(Exception ex);
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs
Expand Up @@ -11,7 +11,7 @@ public class ReplyCrawlFacade(
UserSaver.New userSaverFactory,
CrawlerDbContext.New dbContextFactory,
SonicPusher sonicPusher)
: CrawlFacade<ReplyPost, BaseReplyRevision, ReplyResponse, Reply>(
: CrawlFacade<ReplyPost, ReplyResponse, Reply>(
crawlerFactory(fid, tid), fid, new(fid, tid), locks[CrawlerLocks.Type.Reply],
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs
Expand Up @@ -11,7 +11,7 @@ public class SubReplyCrawlFacade(
UserParser.New userParserFactory,
UserSaver.New userSaverFactory,
SonicPusher sonicPusher)
: CrawlFacade<SubReplyPost, BaseSubReplyRevision, SubReplyResponse, SubReply>(
: CrawlFacade<SubReplyPost, SubReplyResponse, SubReply>(
crawlerFactory(tid, pid), fid, new(fid, tid, pid), locks[CrawlerLocks.Type.SubReply],
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs
Expand Up @@ -9,7 +9,7 @@ public class ThreadCrawlFacade(
ThreadSaver.New postSaverFactory,
UserParser.New userParserFactory,
UserSaver.New userSaverFactory)
: CrawlFacade<ThreadPost, BaseThreadRevision, ThreadResponse, Thread>(
: CrawlFacade<ThreadPost, ThreadResponse, Thread>(
crawlerFactory(forumName), fid, new(fid), locks[CrawlerLocks.Type.Thread],
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
Expand Down
12 changes: 12 additions & 0 deletions c#/crawler/src/Tieba/Crawl/Parser/Post/IPostParser.cs
@@ -0,0 +1,12 @@
namespace tbm.Crawler.Tieba.Crawl.Parser.Post;

public interface IPostParser<TPost, in TPostProtoBuf>
where TPost : BasePost
where TPostProtoBuf : class, IMessage<TPostProtoBuf>
{
public void Parse(
CrawlRequestFlag requestFlag,
IReadOnlyCollection<TPostProtoBuf> inPosts,
out IReadOnlyDictionary<PostId, TPost> outPosts,
out IReadOnlyCollection<TbClient.User> outUsers);
}
7 changes: 5 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Parser/Post/PostParser.cs
@@ -1,12 +1,15 @@
namespace tbm.Crawler.Tieba.Crawl.Parser.Post;

public abstract class PostParser<TPost, TPostProtoBuf>
: IPostParser<TPost, TPostProtoBuf>
where TPost : BasePost
where TPostProtoBuf : class, IMessage<TPostProtoBuf>
{
public void Parse(
CrawlRequestFlag requestFlag, IReadOnlyCollection<TPostProtoBuf> inPosts,
out IReadOnlyDictionary<PostId, TPost> outPosts, out IReadOnlyCollection<TbClient.User> outUsers)
CrawlRequestFlag requestFlag,
IReadOnlyCollection<TPostProtoBuf> inPosts,
out IReadOnlyDictionary<PostId, TPost> outPosts,
out IReadOnlyCollection<TbClient.User> outUsers)
{
if (ShouldSkipParse(requestFlag))
{
Expand Down
9 changes: 9 additions & 0 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/IPostSaver.cs
@@ -0,0 +1,9 @@
namespace tbm.Crawler.Tieba.Crawl.Saver.Post;

public interface IPostSaver<TPost> where TPost : BasePost
{
public IFieldChangeIgnorance.FieldChangeIgnoranceDelegates UserFieldChangeIgnorance { get; }
public PostType CurrentPostType { get; }
public void OnPostSaveEvent();
public SaverChangeSet<TPost> Save(CrawlerDbContext db);
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/PostSaver.cs
Expand Up @@ -7,7 +7,7 @@ public abstract class PostSaver<TPost, TBaseRevision>(
ConcurrentDictionary<PostId, TPost> posts,
AuthorRevisionSaver.New authorRevisionSaverFactory,
PostType currentPostType)
: BaseSaver<TBaseRevision>(logger)
: BaseSaver<TBaseRevision>(logger), IPostSaver<TPost>
where TPost : BasePost
where TBaseRevision : BaseRevisionWithSplitting
{
Expand Down

0 comments on commit c2bf8f8

Please sign in to comment.