Skip to content

Commit

Permalink
refactored: crawler's codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
ManiMozaffar committed May 30, 2023
1 parent bf318ec commit 436ec47
Show file tree
Hide file tree
Showing 28 changed files with 983 additions and 674 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ win32-setctime==1.1.0
wincertstore==0.2
alembic==1.10.2
aiohttp
fastapi-integration==0.1.1
fastapi-integration==0.1.2
160 changes: 0 additions & 160 deletions worker/connection.py

This file was deleted.

File renamed without changes.
40 changes: 40 additions & 0 deletions worker/core/abc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from abc import ABC, abstractmethod, abstractproperty
from typing import List, Callable


class AbstractEngine(ABC):
base_url: str
_tasks: list
@abstractproperty
def tasks(self): ...
@abstractproperty
def data(self): ...
@abstractmethod
async def setup(self): ...
@abstractmethod
async def tear_down(self): ...
@abstractmethod
async def execute(self): ...
@abstractmethod
def get_task_kwargs(self, task): ...


class AbstractBrowserCrawler(AbstractEngine):
@abstractmethod
async def base_action(
self, browser, xpath, raise_error, timeout, action
): ...
async def tear_down(self): ...
@abstractmethod
async def setup(self): ...
@abstractmethod
async def click_xpath(self, browser, xpath, raise_error, timeout): ...
@abstractmethod
async def read_from_xpath(self, browser, xpath, raise_error, timeout): ...
@abstractmethod
async def get_all_elements(self, browser, xpath, raise_error, timeout): ...


class AbstractBaseRepository(ABC):
@abstractmethod
async def gather_tasks(self, tasks: List[Callable]): ...
79 changes: 79 additions & 0 deletions worker/core/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging
from typing import Callable, List, Any
import inspect
import traceback

from core.abc import AbstractEngine


class BaseTaskEngine(AbstractEngine):
base_url = "http://127.0.0.1:8000"

def __init__(self):
self.register_tasks()

@property
def tasks(self) -> List[Callable]:
"""Gets all tasks starting with task_"""
return [
getattr(self, method_name) for _, method_name in self._tasks
]

def register_tasks(self):
self._tasks = []
for attr in dir(self):
func = getattr(self, attr)
if hasattr(func, '_task_info'):
level = func._task_info['level']
self._tasks.append((level, attr))
self._tasks.sort()

async def setup(self) -> dict:
"""Overide this method to configure setup for the crawler
Returns: dependancies for all tasks
"""
async def tear_down(self) -> Any:
"""Overide this method to teardown setup for the crawler"""

def get_task_kwargs(
self, task: Callable, setup_data: dict
) -> dict:
signature = inspect.signature(task)
default_values = {
param.name: param.default
for param in signature.parameters.values()
if param.default is not inspect.Parameter.empty
}
return {**default_values, **setup_data}

async def execute(self) -> Any:
for _, task_name in self._tasks:
task = getattr(self, task_name)
try:
setup_data = await self.setup()
except Exception as error:
logging.error(
f"\n error raised for task: {task.__name__}: {error}"
f"\n traceback: {traceback.format_exc()} \n"
)
break

try:
kwargs = self.get_task_kwargs(task, setup_data)
await task(
**kwargs
)
except Exception as error:
logging.error(
f"\n error raised for task: {task.__name__}: {error}"
f"\n traceback: {traceback.format_exc()} \n"
)
finally:
try:
await self.tear_down(**setup_data)
except Exception as error:
logging.error(
f"\n error raised for teardown: {error} \n"
f"\n traceback: {traceback.format_exc()} \n"
)

0 comments on commit 436ec47

Please sign in to comment.