PythonFreeCourse · yammesicka · Aug 8, 2020 · Aug 8, 2020 · Aug 8, 2020 · Aug 8, 2020
diff --git a/.flake8 b/.flake8
@@ -3,4 +3,5 @@ per-file-ignores =
   lms/tests/*.py:S101
   lms/lmstests/sandbox/flake8/defines.py:E501
   lms/tests/test_exercise_unit_tests.py:Q001,S101
+  lms/tests/test_extractor.py:W293,S101
 ignore=I100,I201,W503
diff --git a/dev_requirements.txt b/dev_requirements.txt
@@ -1,2 +1,2 @@
-ipdb
-pytest-env
+ipdb==0.13.3
+pytest-env==0.6.2
diff --git a/devops/dev_bootstrap.sh b/devops/dev_bootstrap.sh
@@ -40,9 +40,9 @@ echo "Activating venv"
 source "${VENV_DIR}/bin/activate"
 
 echo "Installing prod requirements"
-$pip_exec install --user -r "${MAIN_FOLDER}/requirements.txt"
+$pip_exec install -r "${MAIN_FOLDER}/requirements.txt"
 echo "Installing dev requirements"
-$pip_exec install --user -r "${MAIN_FOLDER}/dev_requirements.txt"
+$pip_exec install -r "${MAIN_FOLDER}/dev_requirements.txt"
 
 echo "Creating local SQLite DB"
 $python_exec "${DB_BOOTSTRAP_FILE_PATH}"

diff --git a/lms/extractors/__init__.py b/lms/extractors/__init__.py
@@ -0,0 +1,11 @@
+from pathlib import Path
+
+for module in Path(__file__).parent.glob('[!_]*.py'):
+    __import__(f'{__name__}.{module.stem}', locals(), globals())
+
+
+del Path
+try:
+    del module
+except NameError:
+    pass  # No modules found in the directory
diff --git a/lms/extractors/base.py b/lms/extractors/base.py
@@ -0,0 +1,74 @@
+from abc import abstractmethod
+import re
+from re import IGNORECASE
+import string
+from typing import (
+    Any, ClassVar, Iterator, Pattern, Sequence, Tuple, Union, cast,
+)
+
+from loguru import logger
+
+Text = Union[str, bytes]
+CodeFile = Union[Sequence[Text], str, bytes]
+
+
+class Extractor:
+    UPLOAD_TITLE: ClassVar[Pattern] = re.compile(r'Upload\s+(\d+)', IGNORECASE)
+
+    def __init__(self, to_extract: Any):
+        self.to_extract = to_extract
+
+    @staticmethod
+    def _convert_to_text(code: CodeFile) -> str:
+        if isinstance(code, (list, tuple, set)):
+            if code and isinstance(code[0], bytes):
+                code = b''.join(code)
+                return code.decode(errors='replace')
+            return ''.join(code)
+
+        if code and isinstance(code, bytes):
+            return code.decode(errors='replace')
+
+        assert isinstance(code, str)  # noqa: S101
+        return code
+
+    @classmethod
+    def _split_header(cls, code: CodeFile) -> Tuple[str, str]:
+        code = cast(str, cls._convert_to_text(code))
+
+        clean_text = code.strip('#' + string.whitespace)
+        first_line_end = clean_text.find('\n')
+        first_line = clean_text[:first_line_end].strip()
+        code_lines = clean_text[first_line_end:].strip()
+
+        logger.debug(f'Upload title: {first_line}')
+        return first_line, code_lines
+
+    @classmethod
+    def _clean(cls, code: Union[Sequence, str]) -> Tuple[str, str]:
+        first_line, code_text = cls._split_header(code)
+        upload_title = cls.UPLOAD_TITLE.fullmatch(first_line)
+        if upload_title:
+            return upload_title.group(1), code_text
+
+        logger.debug(f'Unmatched title: {first_line}')
+        return '', ''
+
+    @abstractmethod
+    def can_extract(self) -> bool:
+        pass
+
+    @classmethod
+    @abstractmethod
+    def get_exercise(cls, to_extract: Any) -> Tuple[str, str]:
+        pass
+
+    @abstractmethod
+    def get_exercises(self):
+        pass
+
+    def __iter__(self) -> Iterator[Tuple[str, str]]:
+        for cls in self.__class__.__subclasses__():
+            extractor = cls(to_extract=self.to_extract)
+            if extractor.can_extract():
+                yield from extractor.get_exercises()
diff --git a/lms/extractors/notebook.py b/lms/extractors/notebook.py
@@ -0,0 +1,46 @@
+from itertools import chain
+import json
+from operator import itemgetter
+from typing import Any, Dict, Iterator, List, Tuple
+
+from lms.extractors.base import Extractor
+
+
+NotebookJson = Dict[str, Any]
+Cell = Dict[str, Any]
+
+
+class Notebook(Extractor):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        try:
+            cells = self._get_code_cells()
+            self.cells = chain([next(cells)], cells)  # Run the generator
+        except (json.JSONDecodeError, KeyError):
+            self.is_json = False
+        else:
+            self.is_json = True
+
+    def can_extract(self) -> bool:
+        return self.is_json
+
+    def _get_code_cells(self) -> Iterator[Cell]:
+        notebook = json.loads(self.to_extract)
+        cells = notebook['cells']
+        yield from filter(self._is_code_cell, cells)
+
+    @staticmethod
+    def _is_code_cell(cell: Cell) -> bool:
+        return (
+            cell.get('cell_type', '') == 'code'
+            and bool(cell.get('source'))
+        )
+
+    @classmethod
+    def get_exercise(cls, to_extract: Cell) -> Tuple[str, str]:
+        code: List[str] = to_extract.get('source', [])
+        return cls._clean(code)
+
+    def get_exercises(self) -> Iterator[Tuple[str, str]]:
+        """Yield exercise ID and code from notebook."""
+        yield from filter(itemgetter(0), map(self.get_exercise, self.cells))
diff --git a/lms/extractors/pyfile.py b/lms/extractors/pyfile.py
@@ -0,0 +1,20 @@
+from typing import Iterator, Tuple
+
+from lms.extractors.base import Extractor
+
+
+class Pyfile(Extractor):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def can_extract(self) -> bool:
+        return True
+
+    @classmethod
+    def get_exercise(cls, to_extract: str) -> Tuple[str, str]:
+        return cls._clean(to_extract)
+
+    def get_exercises(self) -> Iterator[Tuple[str, str]]:
+        extractor = self.get_exercise(self.to_extract)
+        if extractor and extractor[0]:
+            yield extractor
diff --git a/lms/lmsweb/__init__.py b/lms/lmsweb/__init__.py
@@ -25,8 +25,8 @@
 csrf = CSRFProtect(webapp)
 
 # Must import files after app's creation
-from lms.lmsdb import models  # NOQA: F401
-from lms.lmsweb import views  # NOQA: F401
+from lms.lmsdb import models  # NOQA: F401, E402
+from lms.lmsweb import views  # NOQA: F401, E402
 
 
 # gunicorn search for application

diff --git a/lms/lmsweb/tools/notebook_extractor.py b/lms/lmsweb/tools/notebook_extractor.py
diff --git a/lms/lmsweb/views.py b/lms/lmsweb/views.py
@@ -1,10 +1,9 @@
-import json
 import os
 from functools import wraps
 from typing import Optional
 from urllib.parse import urljoin, urlparse
 
-import arrow
+import arrow  # type: ignore
 from flask import (
     abort, jsonify, render_template, request, send_from_directory, url_for,
 )
@@ -22,11 +21,11 @@
     ALL_MODELS, Comment, CommentText, Exercise, RoleOptions, Solution, User,
     database,
 )
+import lms.extractors.base as extractor
 from lms.lmstests.public.flake8 import tasks as flake8_tasks
 from lms.lmstests.public.unittests import tasks as unittests_tasks
 from lms.lmstests.public.identical_tests import tasks as identical_tests_tasks
 from lms.lmsweb import config, routes, webapp
-from lms.lmsweb.tools.notebook_extractor import extract_exercises
 from lms.models import notifications, solutions
 
 login_manager = LoginManager()
@@ -312,12 +311,7 @@ def upload():
     if not file:
         return fail(422, 'No file was given')
 
-    json_file_data = file.read()
-    try:
-        file_content = json.loads(json_file_data)
-        exercises = list(extract_exercises(file_content))
-    except (ValueError, json.JSONDecodeError):
-        return fail(422, 'Invalid file format - must be ipynb')
+    exercises = list(extractor.Extractor(file.read()))
     if not exercises:
         msg = 'No exercises were found in the notebook'
         desc = 'did you use Upload <number of exercise> ? (example: Upload 1)'