From 8e1bb98858f2356845ea9cb77ca8839fc79cce65 Mon Sep 17 00:00:00 2001 From: saihtaungkham <31495612+saihtaungkham@users.noreply.github.com> Date: Thu, 7 May 2020 01:52:26 +0900 Subject: [PATCH 1/6] Fix feedback bug and add features. --- src/main/python/mlsearch/api_requester.py | 27 +++- src/main/python/mlsearch/config.py | 38 ++++-- src/main/scripts/mlsearch | 146 ++++++++++++++-------- 3 files changed, 143 insertions(+), 68 deletions(-) diff --git a/src/main/python/mlsearch/api_requester.py b/src/main/python/mlsearch/api_requester.py index 30082d2..ca44f5d 100644 --- a/src/main/python/mlsearch/api_requester.py +++ b/src/main/python/mlsearch/api_requester.py @@ -44,6 +44,15 @@ def __init__(self, source, query, init_idx, count, y_next_page_token=None): "y_next_page_token": None, } + @property + def youtube_query_order(self): + return self._config.YOUTUBE_ORDER + + @youtube_query_order.setter + def youtube_query_order(self, youtube_order): + if youtube_order: + self._config.YOUTUBE_ORDER = youtube_order + @property def github_acc_token(self): return self._config.GITHUB_ACC_TOKEN @@ -217,6 +226,12 @@ def _fetch_paperwithcode(self) -> [Protocol]: def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]: """Fetch the Youtube Repository""" results = [] + input_query = str(self.params["query"]).lower().strip() + user_query = input_query + + if not self._config.YOUTUBE_FIX_KEYWORD.strip() in user_query: + user_query = input_query + self._config.YOUTUBE_QUERY_FILTER + youtube = googleapiclient.discovery.build( self._config.YOUTUBE_SERVICE_NAME, self._config.YOUTUBE_API_VERSION, @@ -226,7 +241,7 @@ def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]: part=self._config.YOUTUBE_PART, maxResults=self.params["count"], order=self._config.YOUTUBE_ORDER, - q=self.params["query"], + q=user_query, safeSearch=self._config.YOUTUBE_SAFESEARCH, pageToken=y_next_page_token, ) @@ -234,6 +249,10 @@ def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]: if "items" in response and len(response["items"]) > 0: for item in response["items"]: + # Skip if the video id is null + if not item.get("id", dict({"videoId": None})).get("videoId", None): + continue + data = { "video_id": self._unescape( item.get("id", dict({"videoId": None})).get("videoId", None) @@ -282,6 +301,7 @@ def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]: ) > 0 ) + self.data["y_query_order"] = self._config.YOUTUBE_ORDER self.data["response_code"] = 200 def fetch_data(self) -> json: @@ -295,6 +315,11 @@ def fetch_data(self) -> json: self._fetch_github() if self.params.get("source", "") == "youtube": + if not self._config.YOUTUBE_ORDER in self._config.VALID_YOUTUBE_ORDER: + self.data["response_code"] = 400 + self.data["content"] = "Invalid Youtube Query Order." + return self.data + self._fetch_youtube(self.params.get("y_next_page_token", None)) # TODO: Implement the function for Coursera. However, this function diff --git a/src/main/python/mlsearch/config.py b/src/main/python/mlsearch/config.py index c53574f..1bd805d 100644 --- a/src/main/python/mlsearch/config.py +++ b/src/main/python/mlsearch/config.py @@ -1,24 +1,38 @@ import os + class Config(object): """Class for API Request configuration.""" # Paper with code configuration - PWC_USER_NAME = os.environ.get('PWC_USER_NAME') or '' - PWC_PASSWORD = os.environ.get('PWC_PASSWORD') or '' - PWC_URL = os.environ.get('PWC_URL') or "https://paperswithcode.com/api/v0/search/?q=" + PWC_USER_NAME = os.environ.get("PWC_USER_NAME") or "" + PWC_PASSWORD = os.environ.get("PWC_PASSWORD") or "" + PWC_URL = ( + os.environ.get("PWC_URL") or "https://paperswithcode.com/api/v0/search/?q=" + ) # Github configuration - GITHUB_ACC_TOKEN = os.environ.get('GITHUB_ACC_TOKEN') or None - GITHUB_URL = os.environ.get('GITHUB_URL') or "in:readme+in:description" + GITHUB_ACC_TOKEN = os.environ.get("GITHUB_ACC_TOKEN") or None + GITHUB_URL = os.environ.get("GITHUB_URL") or "in:readme+in:description" # AIP Source - VALID_API_SOURCE = ['paperwithcode', 'github', 'coursera', 'youtube'] + VALID_API_SOURCE = ["paperwithcode", "github", "coursera", "youtube"] # Youtube configuration - YOUTUBE_SERVICE_NAME = os.environ.get('YOUTUBE_SERVICE_NAME') or "youtube" - YOUTUBE_API_VERSION = os.environ.get('YOUTUBE_API_VERSION') or "v3" - YOUTUBE_DEVELOPER_KEY = os.environ.get('YOUTUBE_DEVELOPER_KEY') or None - YOUTUBE_ORDER = os.environ.get('YOUTUBE_ORDER') or "relevance" - YOUTUBE_SAFESEARCH = os.environ.get('YOUTUBE_SAFESEARCH') or "strict" - YOUTUBE_PART = os.environ.get('YOUTUBE_PART') or "snippet" \ No newline at end of file + YOUTUBE_SERVICE_NAME = os.environ.get("YOUTUBE_SERVICE_NAME") or "youtube" + YOUTUBE_API_VERSION = os.environ.get("YOUTUBE_API_VERSION") or "v3" + YOUTUBE_DEVELOPER_KEY = os.environ.get("YOUTUBE_DEVELOPER_KEY") or None + YOUTUBE_ORDER = os.environ.get("YOUTUBE_ORDER") or "relevance" + YOUTUBE_SAFESEARCH = os.environ.get("YOUTUBE_SAFESEARCH") or "strict" + YOUTUBE_PART = os.environ.get("YOUTUBE_PART") or "snippet" + YOUTUBE_FIX_KEYWORD = "machine learning" + YOUTUBE_QUERY_FILTER = " " + YOUTUBE_FIX_KEYWORD + " -news" + VALID_YOUTUBE_ORDER = [ + "date", + "rating", + "relevance", + "title", + # "videoCount", # This is for channel only + "viewCount", + ] + diff --git a/src/main/scripts/mlsearch b/src/main/scripts/mlsearch index 0d1b98e..878f055 100644 --- a/src/main/scripts/mlsearch +++ b/src/main/scripts/mlsearch @@ -6,67 +6,101 @@ import os import json # For debugging purpose -if 'mlsearch' not in sys.modules: - sys.path.append(os.path.join(os.getcwd(), 'src/main/python')) +if "mlsearch" not in sys.modules: + sys.path.append(os.path.join(os.getcwd(), "src/main/python")) from mlsearch.api_requester import APIRequest from mlsearch import helper as hp ap = argparse.ArgumentParser() -ap.add_argument('-q', '--query', required=True, help="Keyword for searching.") -ap.add_argument('-i', '--init_idx', required=True, help="Initial index for pagination.") -ap.add_argument('-c', '--count', required=True, help="Total number of results to be fetched.") -ap.add_argument('-s', '--source', required=True, help="Source API to be looking for.") -ap.add_argument('-ck', '--cookies', required=True, help="Cookies of current user.") -ap.add_argument('-tm', '--timestamp', required=True, help="Timestamp of requesting API.") -ap.add_argument('-pu', '--pwc_user', required=False, help="Paper with code repository user name.") -ap.add_argument('-pp', '--pwc_password', required=False, help="Paper with code repository password.") -ap.add_argument('-gt', '--github_acc_token', required=False, help="Github access token.") -ap.add_argument('-yk', '--youtube_dev_key', required=False, help="Youtube developer key.") -ap.add_argument('-yntp', '--y_next_page_token', required=False, help="Next page token for Youtube API.") +ap.add_argument("-q", "--query", required=True, help="Keyword for searching.") +ap.add_argument("-i", "--init_idx", required=True, help="Initial index for pagination.") +ap.add_argument( + "-c", "--count", required=True, help="Total number of results to be fetched." +) +ap.add_argument("-s", "--source", required=True, help="Source API to be looking for.") +ap.add_argument("-ck", "--cookies", required=True, help="Cookies of current user.") +ap.add_argument( + "-tm", "--timestamp", required=True, help="Timestamp of requesting API." +) +ap.add_argument( + "-pu", "--pwc_user", required=False, help="Paper with code repository user name." +) +ap.add_argument( + "-pp", "--pwc_password", required=False, help="Paper with code repository password." +) +ap.add_argument( + "-gt", "--github_acc_token", required=False, help="Github access token." +) +ap.add_argument( + "-yk", "--youtube_dev_key", required=False, help="Youtube developer key." +) +ap.add_argument( + "-yntp", + "--y_next_page_token", + required=False, + help="Next page token for Youtube API.", +) +ap.add_argument( + "-yo", "--youtube_query_order", required=False, help="Youtube Query Order." +) args = vars(ap.parse_args()) + def main(event): headers = { - 'Access-Control-Allow-Origin': '*', - 'X-Requested-With': '*', - 'Access-Control-Allow-Headers': 'Content-Type,X-Amz-Date,Authorization,X-Api-Key,x-requested-with', - 'Access-Control-Allow-Methods': 'OPTIONS,POST,GET' - } + "Access-Control-Allow-Origin": "*", + "X-Requested-With": "*", + "Access-Control-Allow-Headers": "Content-Type,X-Amz-Date,Authorization,X-Api-Key,x-requested-with", + "Access-Control-Allow-Methods": "OPTIONS,POST,GET", + } try: param_names = [ - 'query', 'init_idx', - 'count', 'source', - 'cookies', 'timestamp', - 'y_next_page_token'] - response_msg = hp.response('success', 200) + "query", + "init_idx", + "count", + "source", + "cookies", + "timestamp", + "y_next_page_token", + ] + response_msg = hp.response("success", 200) if hp.is_valid_parameters(event, param_names): params = hp.parse_parameters(event) if params.values(): api_request = APIRequest( - params['source'], - params['query'], - params['init_idx'], - params['count'], - params['y_next_page_token']) - if 'pwc_user'in event and 'pwc_password' in event: - api_request.pwc_auth_info = (event['pwc_user'], event['pwc_password']) - if 'github_acc_token' in event: - api_request.github_acc_token = event['github_acc_token'] - if 'youtube_developer_key' in event: - api_request.youtube_developer_key = event['youtube_developer_key'] + params["source"], + params["query"], + params["init_idx"], + params["count"], + params["y_next_page_token"], + ) + if "pwc_user" in event and "pwc_password" in event: + api_request.pwc_auth_info = ( + event["pwc_user"], + event["pwc_password"], + ) + if "github_acc_token" in event: + api_request.github_acc_token = event["github_acc_token"] + if "youtube_developer_key" in event: + api_request.youtube_developer_key = event["youtube_developer_key"] + if "youtube_query_order" in event: + api_request.youtube_query_order = event["youtube_query_order"] data = api_request.fetch_data() response_msg = hp.response( - message=data.get('content',''), - status_code=data.get('response_code'), + message=data.get("content", ""), + status_code=data.get("response_code"), headers=headers, optional_attributes={ - 'has_next_page': data.get('has_next_page', False), - 'y_next_page_token': data.get('y_next_page_token', None)}) + "has_next_page": data.get("has_next_page", False), + "y_next_page_token": data.get("y_next_page_token", None), + "y_query_order": data.get("y_query_order", None) + }, + ) return response_msg - response_msg = hp.response('Invalid parameters.', 400) + response_msg = hp.response("Invalid parameters.", 400) return response_msg except (ValueError, TypeError) as ex: @@ -77,26 +111,28 @@ def main(event): response_msg = hp.response(str(ex), 500) return response_msg + if __name__ == "__main__": event = { - 'query': args['query'], - 'init_idx': args['init_idx'], - 'count': args['count'], - 'source': args['source'], - 'cookies': args['cookies'], - 'timestamp': args['timestamp'], - 'y_next_page_token': args['y_next_page_token'] + "query": args["query"], + "init_idx": args["init_idx"], + "count": args["count"], + "source": args["source"], + "cookies": args["cookies"], + "timestamp": args["timestamp"], + "y_next_page_token": args["y_next_page_token"], } - if args['pwc_user']: - event['pwc_user'] = args['pwc_user'] - if args['pwc_password']: - event['pwc_password'] = args['pwc_password'] - if args['github_acc_token']: - event['github_acc_token'] = args['github_acc_token'] - if args['youtube_dev_key']: - event['youtube_developer_key'] = args['youtube_dev_key'] - + if args["pwc_user"]: + event["pwc_user"] = args["pwc_user"] + if args["pwc_password"]: + event["pwc_password"] = args["pwc_password"] + if args["github_acc_token"]: + event["github_acc_token"] = args["github_acc_token"] + if args["youtube_dev_key"]: + event["youtube_developer_key"] = args["youtube_dev_key"] + if args["youtube_query_order"]: + event["youtube_query_order"] = args["youtube_query_order"] result = main(event) pp = pprint.PrettyPrinter(indent=2) pp.pprint(result) From 46bf6f6e39f5dbfd62e8bec4e30949bcf24f3a4f Mon Sep 17 00:00:00 2001 From: saihtaungkham <31495612+saihtaungkham@users.noreply.github.com> Date: Thu, 7 May 2020 01:59:50 +0900 Subject: [PATCH 2/6] Fix ambiguous youtube query order keyword. --- src/main/scripts/mlsearch | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scripts/mlsearch b/src/main/scripts/mlsearch index 878f055..527b615 100644 --- a/src/main/scripts/mlsearch +++ b/src/main/scripts/mlsearch @@ -42,7 +42,7 @@ ap.add_argument( help="Next page token for Youtube API.", ) ap.add_argument( - "-yo", "--youtube_query_order", required=False, help="Youtube Query Order." + "-yo", "--y_query_order", required=False, help="Youtube Query Order." ) args = vars(ap.parse_args()) @@ -84,8 +84,8 @@ def main(event): api_request.github_acc_token = event["github_acc_token"] if "youtube_developer_key" in event: api_request.youtube_developer_key = event["youtube_developer_key"] - if "youtube_query_order" in event: - api_request.youtube_query_order = event["youtube_query_order"] + if "y_query_order" in event: + api_request.youtube_query_order = event["y_query_order"] data = api_request.fetch_data() response_msg = hp.response( message=data.get("content", ""), @@ -131,8 +131,8 @@ if __name__ == "__main__": event["github_acc_token"] = args["github_acc_token"] if args["youtube_dev_key"]: event["youtube_developer_key"] = args["youtube_dev_key"] - if args["youtube_query_order"]: - event["youtube_query_order"] = args["youtube_query_order"] + if args["y_query_order"]: + event["y_query_order"] = args["y_query_order"] result = main(event) pp = pprint.PrettyPrinter(indent=2) pp.pprint(result) From 9abf08fda1d98ca010b159548e6ff6b31a74c04e Mon Sep 17 00:00:00 2001 From: saihtaungkham <31495612+saihtaungkham@users.noreply.github.com> Date: Thu, 7 May 2020 02:19:02 +0900 Subject: [PATCH 3/6] Add usage to README file. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c425d65..da9ec19 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,8 @@ Optional Parameters: Youtube developer key. -ynpt NEXT_PAGE_TOKEN, --y_next_page_token NEXT_PAGE_TOKEN Next page token for Youtube API. + -yo Y_QUERY_ORDER, --y_query_order Y_QUERY_ORDER + Youtube Query Order. ```