diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 464080ff..7d2b2b48 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,5 +1,3 @@ -name: Pylint - on: [push] jobs: @@ -20,4 +18,13 @@ jobs: pip install pylint pip install -r requirements.txt - name: Analysing the code with pylint - run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py \ No newline at end of file + run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py + - name: Check Pylint score + run: | + pylint_score=$(pylint --disable=all --enable=metrics --output-format=text scrapegraphai/**/*.py scrapegraphai/*.py | grep 'Raw metrics' | awk '{print $4}') + if (( $(echo "$pylint_score < 8" | bc -l) )); then + echo "Pylint score is below 8. Blocking commit." + exit 1 + else + echo "Pylint score is acceptable." + fi diff --git a/pyproject.toml b/pyproject.toml index 21bca442..caccc0a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scrapegraphai" -version = "0.2.1" +version = "0.2.2" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ "Marco Vinciguerra ", diff --git a/scrapegraphai/nodes/__init__.py b/scrapegraphai/nodes/__init__.py index b5b03d73..7cd1a1c4 100644 --- a/scrapegraphai/nodes/__init__.py +++ b/scrapegraphai/nodes/__init__.py @@ -8,7 +8,7 @@ from .generate_answer_node import GenerateAnswerNode from .parse_node import ParseNode from .rag_node import RAGNode -from .text_to_speech_node import TextToSpeechNode -from .image_to_text_node import ImageToTextNode +from .text_to_speech_node_openai import TextToSpeechNode +from .image_to_text_node_openai import ImageToTextNode from .search_internet_node import SearchInternetNode from .generate_scraper_node import GenerateScraperNode diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node_openai.py similarity index 100% rename from scrapegraphai/nodes/image_to_text_node.py rename to scrapegraphai/nodes/image_to_text_node_openai.py diff --git a/scrapegraphai/nodes/text_to_speech_node.py b/scrapegraphai/nodes/text_to_speech_node_openai.py similarity index 100% rename from scrapegraphai/nodes/text_to_speech_node.py rename to scrapegraphai/nodes/text_to_speech_node_openai.py diff --git a/tests/scrape_plain_text_ollama_test.py b/tests/scrape_plain_text_ollama_test.py index 0b37b205..919d48c0 100644 --- a/tests/scrape_plain_text_ollama_test.py +++ b/tests/scrape_plain_text_ollama_test.py @@ -8,7 +8,9 @@ @pytest.fixture def sample_text(): - # Read the sample text file + """ + Example of text + """ file_name = "inputs/plain_html_example.txt" curr_dir = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(curr_dir, file_name) @@ -21,6 +23,9 @@ def sample_text(): @pytest.fixture def graph_config(): + """ + Configuration of the graph + """ return { "llm": { "model": "ollama/mistral", @@ -36,16 +41,16 @@ def graph_config(): } -def test_scraping_pipeline(sample_text, graph_config): - # Create the SmartScraperGraph instance +def test_scraping_pipeline(sample_text: str, graph_config: dict): + """ + Start of the scraping pipeline + """ smart_scraper_graph = SmartScraperGraph( prompt="List me all the news with their description.", source=sample_text, config=graph_config ) - # Run the graph result = smart_scraper_graph.run() - # Check that the result is not empty assert result is not None diff --git a/tests/scrape_xml_ollama_test.py b/tests/scrape_xml_ollama_test.py index 343e4ecc..afa7527f 100644 --- a/tests/scrape_xml_ollama_test.py +++ b/tests/scrape_xml_ollama_test.py @@ -8,7 +8,9 @@ @pytest.fixture def sample_xml(): - # Leggi il file XML di esempio + """ + Example of text + """ file_name = "inputs/books.xml" curr_dir = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(curr_dir, file_name) @@ -21,6 +23,9 @@ def sample_xml(): @pytest.fixture def graph_config(): + """ + Configuration of the graph + """ return { "llm": { "model": "ollama/mistral", @@ -36,16 +41,16 @@ def graph_config(): } -def test_scraping_pipeline(sample_xml, graph_config): - # Crea un'istanza di SmartScraperGraph +def test_scraping_pipeline(sample_xml: str, graph_config: dict): + """ + Start of the scraping pipeline + """ smart_scraper_graph = SmartScraperGraph( prompt="List me all the authors, title and genres of the books", source=sample_xml, config=graph_config ) - # Esegui il grafico result = smart_scraper_graph.run() - # Verifica che il risultato non sia vuoto assert result is not None diff --git a/tests/script_generator_test.py b/tests/script_generator_test.py index b0f1c343..6114bac4 100644 --- a/tests/script_generator_test.py +++ b/tests/script_generator_test.py @@ -1,9 +1,5 @@ """ -<<<<<<< Updated upstream Module for making the tests for ScriptGeneratorGraph -======= -Test for script generator ->>>>>>> Stashed changes """ import pytest from scrapegraphai.graphs import ScriptCreatorGraph @@ -12,6 +8,9 @@ @pytest.fixture def graph_config(): + """ + Configuration of the graph + """ return { "llm": { "model": "ollama/mistral", @@ -29,28 +28,24 @@ def graph_config(): } -def test_script_creator_graph(graph_config): - # Create the ScriptCreatorGraph instance +def test_script_creator_graph(graph_config: dict): + """ + Start of the scraping pipeline + """ smart_scraper_graph = ScriptCreatorGraph( prompt="List me all the news with their description.", source="https://perinim.github.io/projects", config=graph_config ) - # Run the graph result = smart_scraper_graph.run() - # Check that the result is not empty assert result is not None - # Get graph execution info graph_exec_info = smart_scraper_graph.get_execution_info() - # Check that execution info is not empty assert graph_exec_info is not None - # Check that execution info is a dictionary assert isinstance(graph_exec_info, dict) - # Print execution info print(prettify_exec_info(graph_exec_info)) diff --git a/tests/smart_scraper_ollama_test.py b/tests/smart_scraper_ollama_test.py index 70eb9af6..b35907c0 100644 --- a/tests/smart_scraper_ollama_test.py +++ b/tests/smart_scraper_ollama_test.py @@ -7,6 +7,9 @@ @pytest.fixture def graph_config(): + """ + Configuration of the graph + """ return { "llm": { "model": "ollama/mistral", @@ -22,34 +25,33 @@ def graph_config(): } -def test_scraping_pipeline(graph_config): - # Crea un'istanza di SmartScraperGraph +def test_scraping_pipeline(graph_config: dict): + """ + Start of the scraping pipeline + """ smart_scraper_graph = SmartScraperGraph( prompt="List me all the news with their description.", source="https://perinim.github.io/projects", config=graph_config ) - # Esegui il grafico result = smart_scraper_graph.run() - # Verifica che il risultato non sia vuoto assert result is not None -def test_get_execution_info(graph_config): - # Crea un'istanza di SmartScraperGraph +def test_get_execution_info(graph_config: dict): + """ + Get the execution info + """ smart_scraper_graph = SmartScraperGraph( prompt="List me all the news with their description.", source="https://perinim.github.io/projects", config=graph_config ) - # Esegui il grafico smart_scraper_graph.run() - # Ottieni le informazioni sull'esecuzione del grafico graph_exec_info = smart_scraper_graph.get_execution_info() - # Verifica che le informazioni sull'esecuzione non siano vuote assert graph_exec_info is not None