{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":647186540,"defaultBranch":"main","name":"Yulan-GARDEN","ownerLogin":"Emanual20","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-05-30T08:41:24.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/45003836?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1685436269.318099","currentOid":""},"activityList":{"items":[{"before":"def91e8358024f6a7a8d694ee1cdd2b7c4df5d56","after":"683fd30120010953014ec018e3048759e2486d16","ref":"refs/heads/main","pushedAt":"2024-05-13T09:06:22.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"PhealenWang","name":"Phealen","path":"/PhealenWang","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/77534523?s=80&v=4"},"commit":{"message":"fix ppl bug in sampler","shortMessageHtmlLink":"fix ppl bug in sampler"}},{"before":"1a8f5d2869d9fbf2f6214df7df4d4a8aeb8bfb52","after":"def91e8358024f6a7a8d694ee1cdd2b7c4df5d56","ref":"refs/heads/main","pushedAt":"2024-04-18T09:24:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"fix fil_ppl bug","shortMessageHtmlLink":"fix fil_ppl bug"}},{"before":"2155eac2159307552ec8b4bffb48e4c2843464af","after":"1a8f5d2869d9fbf2f6214df7df4d4a8aeb8bfb52","ref":"refs/heads/main","pushedAt":"2024-04-18T04:18:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"1. fix language label logics bug; 2. fix identification of PII regex bug; 3. update settings/README.md for more detail of provided configuration files; 4. update initialization logics of fil_passage_ppl","shortMessageHtmlLink":"1. fix language label logics bug; 2. fix identification of PII regex …"}},{"before":"808a9abfce5b7a1f6e63f3adf837b5e89905d451","after":"2155eac2159307552ec8b4bffb48e4c2843464af","ref":"refs/heads/main","pushedAt":"2024-03-26T06:50:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"Renamed as Yulan-GARDEN","shortMessageHtmlLink":"Renamed as Yulan-GARDEN"}},{"before":"2f2d2dc635298a66e6d66567d51f46db48e9cee2","after":"808a9abfce5b7a1f6e63f3adf837b5e89905d451","ref":"refs/heads/main","pushedAt":"2024-03-25T10:00:07.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add a module which supports deduplication by ngram for eeach lines","shortMessageHtmlLink":"add a module which supports deduplication by ngram for eeach lines"}},{"before":"523d8d74e4ce7fec3c8a51b5ba6af8eaa8945880","after":"2f2d2dc635298a66e6d66567d51f46db48e9cee2","ref":"refs/heads/main","pushedAt":"2024-03-25T01:34:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"update cleaner and config, add privacy info removal.","shortMessageHtmlLink":"update cleaner and config, add privacy info removal."}},{"before":"7f5ab249d57d4ccbe1cead72df5867a09786a310","after":"523d8d74e4ce7fec3c8a51b5ba6af8eaa8945880","ref":"refs/heads/main","pushedAt":"2024-03-08T02:22:38.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add requirements","shortMessageHtmlLink":"add requirements"}},{"before":"be5b8ec88a9c103dd83831597e4b43a4bc8f8c5f","after":"7f5ab249d57d4ccbe1cead72df5867a09786a310","ref":"refs/heads/main","pushedAt":"2024-02-28T06:33:14.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"Update arxiv link and formatted citation","shortMessageHtmlLink":"Update arxiv link and formatted citation"}},{"before":"7b043e8c3aaa4cdfd197914bfafa4c052c0d9c80","after":"be5b8ec88a9c103dd83831597e4b43a4bc8f8c5f","ref":"refs/heads/main","pushedAt":"2024-02-09T18:00:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"remove OpenAI keys","shortMessageHtmlLink":"remove OpenAI keys"}},{"before":"2339ecc357bd4afacbd75d5c2e3f76e043f979c0","after":"7b043e8c3aaa4cdfd197914bfafa4c052c0d9c80","ref":"refs/heads/main","pushedAt":"2024-02-09T11:31:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"update README.md","shortMessageHtmlLink":"update README.md"}},{"before":"f909e1f295f9d462f8c0af4f3813395a43fae643","after":"2339ecc357bd4afacbd75d5c2e3f76e043f979c0","ref":"refs/heads/main","pushedAt":"2024-02-09T11:17:04.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"code of chatGPT labelling experiments","shortMessageHtmlLink":"code of chatGPT labelling experiments"}},{"before":"6066f515fa41fb1d5ec578775433e3d48b6ce7b9","after":"f909e1f295f9d462f8c0af4f3813395a43fae643","ref":"refs/heads/main","pushedAt":"2024-02-09T09:49:50.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"update path parameter in each modules","shortMessageHtmlLink":"update path parameter in each modules"}},{"before":"2edfe5d0ad4bc5b9498a4893e1890b5c67d6671a","after":"6066f515fa41fb1d5ec578775433e3d48b6ce7b9","ref":"refs/heads/main","pushedAt":"2024-02-09T07:16:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add templates, retriever and text-dedup","shortMessageHtmlLink":"add templates, retriever and text-dedup"}},{"before":"7ec4a23a8525fc5ac32163b5f45ba8a559f86062","after":"2edfe5d0ad4bc5b9498a4893e1890b5c67d6671a","ref":"refs/heads/main","pushedAt":"2024-02-04T14:18:41.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add ppl into filter","shortMessageHtmlLink":"add ppl into filter"}},{"before":"791ae89fed72c05f61ea39d0ba046b72e98b377c","after":"7ec4a23a8525fc5ac32163b5f45ba8a559f86062","ref":"refs/heads/main","pushedAt":"2024-01-12T02:10:38.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"fix parallel bug by adding 'utils/utils/load_modules.py, all the processing modules become global. fix some other small bugs.'","shortMessageHtmlLink":"fix parallel bug by adding 'utils/utils/load_modules.py, all the proc…"}},{"before":"38572b72f56c3912b40a11394edd67cf62a4f437","after":"791ae89fed72c05f61ea39d0ba046b72e98b377c","ref":"refs/heads/main","pushedAt":"2024-01-05T11:19:48.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"unify the log as global_logger in utils/utils/logger","shortMessageHtmlLink":"unify the log as global_logger in utils/utils/logger"}},{"before":"7b83550b7161da7c9ebea692d5550a41a3f52896","after":"38572b72f56c3912b40a11394edd67cf62a4f437","ref":"refs/heads/main","pushedAt":"2024-01-05T08:36:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"fix small bugs. make sure the figure of raw data and refined data distribution on the processing page","shortMessageHtmlLink":"fix small bugs. make sure the figure of raw data and refined data dis…"}},{"before":"31250ba4146da6ce4e4c9b456a8859c06c5204d1","after":"7b83550b7161da7c9ebea692d5550a41a3f52896","ref":"refs/heads/main","pushedAt":"2024-01-05T08:06:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add graphs on web ui (app.py, debugger.py, templates/*). the process is splited in 5 steps for only sample and debug before the whole refined process","shortMessageHtmlLink":"add graphs on web ui (app.py, debugger.py, templates/*). the process …"}},{"before":"b067e85c7cce8f2c7ed83750aaffad46252cced3","after":"31250ba4146da6ce4e4c9b456a8859c06c5204d1","ref":"refs/heads/main","pushedAt":"2024-01-05T07:47:51.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"without retriever, fix load model bug, global logger bug","shortMessageHtmlLink":"without retriever, fix load model bug, global logger bug"}},{"before":"322069bb828ca36a35d3b2c0d3ed4a6432362391","after":"b067e85c7cce8f2c7ed83750aaffad46252cced3","ref":"refs/heads/main","pushedAt":"2023-12-29T16:59:22.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"fix input_path check bug","shortMessageHtmlLink":"fix input_path check bug"}},{"before":"7c4ca13432cb0332599e4e78c00a031df26bdf3d","after":"322069bb828ca36a35d3b2c0d3ed4a6432362391","ref":"refs/heads/main","pushedAt":"2023-12-29T16:58:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add LangIdentifier, PPLEvaluator, and corresponding filter module; now all the evaluator subclass inherit from evaluator_base.py which is the base class of evaluators; [Wgit add utils/evaluator/GPTEvaluator.py utils/filter/__init__.py utils/utils/__init__.py utils/evaluator/LangIdentifier.py utils/evaluator/PerplexityEvaluator.py utils/evaluator/evaluator_base.py utils/filter/fil_passage_langs.py utils/filter/fil_passage_langscore.py utils/filter/fil_passage_ppl.py utils/utils/kenlm_model.pyARNINGgit add utils/evaluator/GPTEvaluator.py utils/filter/__init__.py utils/utils/__init__.py utils/evaluator/LangIdentifier.py utils/evaluator/PerplexityEvaluator.py utils/evaluator/evaluator_base.py utils/filter/fil_passage_langs.py utils/filter/fil_passage_langscore.py utils/filter/fil_passage_ppl.py utils/utils/kenlm_model.py] no checking correctness of aforementioned module in pipeline, just did unit test of modules alone","shortMessageHtmlLink":"add LangIdentifier, PPLEvaluator, and corresponding filter module; no…"}},{"before":"d8184d6a09376b7e0649b812944dfeff125be96b","after":"7c4ca13432cb0332599e4e78c00a031df26bdf3d","ref":"refs/heads/main","pushedAt":"2023-12-28T08:52:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add app.py, templates and retriever","shortMessageHtmlLink":"add app.py, templates and retriever"}},{"before":"48fcc43fdc6bf8af087112bbdb883fcd5239a0bf","after":"d8184d6a09376b7e0649b812944dfeff125be96b","ref":"refs/heads/main","pushedAt":"2023-12-27T08:15:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"complement of last commit, add deduplication module","shortMessageHtmlLink":"complement of last commit, add deduplication module"}},{"before":"9c7fa61a7d3c0bdd963918dc31663dd3624b8514","after":"48fcc43fdc6bf8af087112bbdb883fcd5239a0bf","ref":"refs/heads/main","pushedAt":"2023-12-27T08:11:49.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"unified logger, fix input_text_key bug(origin text should be 'text' or not following configuration file)","shortMessageHtmlLink":"unified logger, fix input_text_key bug(origin text should be 'text' o…"}},{"before":"7b1c3863d3912d95e30e08851c1a71fedeca4b9a","after":"9c7fa61a7d3c0bdd963918dc31663dd3624b8514","ref":"refs/heads/main","pushedAt":"2023-12-25T13:32:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"split original filter into different file, each file inherent from base class FilterBase()","shortMessageHtmlLink":"split original filter into different file, each file inherent from ba…"}},{"before":"9c20f638f8d41ad1f2be856d35b51e56fb90791e","after":"7b1c3863d3912d95e30e08851c1a71fedeca4b9a","ref":"refs/heads/main","pushedAt":"2023-12-22T13:16:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add predefined cleaner operator(email, idcard, ip, phone number, url); Matched pattern will be substitute with masked string(e.g. **MASKED**EMAIL** for email field)","shortMessageHtmlLink":"add predefined cleaner operator(email, idcard, ip, phone number, url)…"}},{"before":"f53e2249431b1f8f3589cc87c80b2615c58c6333","after":"9c20f638f8d41ad1f2be856d35b51e56fb90791e","ref":"refs/heads/main","pushedAt":"2023-12-22T08:37:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"split original cleaner into different file, each file inherent from base class CleanerBase()","shortMessageHtmlLink":"split original cleaner into different file, each file inherent from b…"}},{"before":"2b11061ada33c3feee85b43b4371cf97259fb2a6","after":"f53e2249431b1f8f3589cc87c80b2615c58c6333","ref":"refs/heads/main","pushedAt":"2023-12-21T13:32:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"1. check the version of configuration file before each run; 2. user-defined module, related to utils/utils/my_funcs.py and my_rules.py","shortMessageHtmlLink":"1. check the version of configuration file before each run; 2. user-d…"}},{"before":"06f69766e22103a576eb5a03c23d4695b8fe3b45","after":"2b11061ada33c3feee85b43b4371cf97259fb2a6","ref":"refs/heads/main","pushedAt":"2023-12-21T10:05:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"GPTEvaluator, fix bugs","shortMessageHtmlLink":"GPTEvaluator, fix bugs"}},{"before":"5fd6f14ca50ff91414c1c51819a1fbe0d51d729c","after":"06f69766e22103a576eb5a03c23d4695b8fe3b45","ref":"refs/heads/main","pushedAt":"2023-11-21T10:04:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Emanual20","name":"Yiding Sun","path":"/Emanual20","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/45003836?s=80&v=4"},"commit":{"message":"add some new parts of filter (fil_dirty_words, fil_meta, fil_my_rules) and cleaner (rm_str_seg, my_funcs) and update README.md. And some all changes.","shortMessageHtmlLink":"add some new parts of filter (fil_dirty_words, fil_meta, fil_my_rules…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAESDKa8wA","startCursor":null,"endCursor":null}},"title":"Activity · Emanual20/Yulan-GARDEN"}