Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
name: Pylint

on: [push]

jobs:
Expand All @@ -20,4 +18,13 @@ jobs:
pip install pylint
pip install -r requirements.txt
- name: Analysing the code with pylint
run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py
run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py
- name: Check Pylint score
run: |
pylint_score=$(pylint --disable=all --enable=metrics --output-format=text scrapegraphai/**/*.py scrapegraphai/*.py | grep 'Raw metrics' | awk '{print $4}')
if (( $(echo "$pylint_score < 8" | bc -l) )); then
echo "Pylint score is below 8. Blocking commit."
exit 1
else
echo "Pylint score is acceptable."
fi
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "scrapegraphai"
version = "0.2.1"
version = "0.2.2"
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
authors = [
"Marco Vinciguerra <mvincig11@gmail.com>",
Expand Down
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .generate_answer_node import GenerateAnswerNode
from .parse_node import ParseNode
from .rag_node import RAGNode
from .text_to_speech_node import TextToSpeechNode
from .image_to_text_node import ImageToTextNode
from .text_to_speech_node_openai import TextToSpeechNode
from .image_to_text_node_openai import ImageToTextNode
from .search_internet_node import SearchInternetNode
from .generate_scraper_node import GenerateScraperNode
15 changes: 10 additions & 5 deletions tests/scrape_plain_text_ollama_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

@pytest.fixture
def sample_text():
# Read the sample text file
"""
Example of text
"""
file_name = "inputs/plain_html_example.txt"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, file_name)
Expand All @@ -21,6 +23,9 @@ def sample_text():

@pytest.fixture
def graph_config():
"""
Configuration of the graph
"""
return {
"llm": {
"model": "ollama/mistral",
Expand All @@ -36,16 +41,16 @@ def graph_config():
}


def test_scraping_pipeline(sample_text, graph_config):
# Create the SmartScraperGraph instance
def test_scraping_pipeline(sample_text: str, graph_config: dict):
"""
Start of the scraping pipeline
"""
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the news with their description.",
source=sample_text,
config=graph_config
)

# Run the graph
result = smart_scraper_graph.run()

# Check that the result is not empty
assert result is not None
15 changes: 10 additions & 5 deletions tests/scrape_xml_ollama_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

@pytest.fixture
def sample_xml():
# Leggi il file XML di esempio
"""
Example of text
"""
file_name = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, file_name)
Expand All @@ -21,6 +23,9 @@ def sample_xml():

@pytest.fixture
def graph_config():
"""
Configuration of the graph
"""
return {
"llm": {
"model": "ollama/mistral",
Expand All @@ -36,16 +41,16 @@ def graph_config():
}


def test_scraping_pipeline(sample_xml, graph_config):
# Crea un'istanza di SmartScraperGraph
def test_scraping_pipeline(sample_xml: str, graph_config: dict):
"""
Start of the scraping pipeline
"""
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the authors, title and genres of the books",
source=sample_xml,
config=graph_config
)

# Esegui il grafico
result = smart_scraper_graph.run()

# Verifica che il risultato non sia vuoto
assert result is not None
19 changes: 7 additions & 12 deletions tests/script_generator_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
"""
<<<<<<< Updated upstream
Module for making the tests for ScriptGeneratorGraph
=======
Test for script generator
>>>>>>> Stashed changes
"""
import pytest
from scrapegraphai.graphs import ScriptCreatorGraph
Expand All @@ -12,6 +8,9 @@

@pytest.fixture
def graph_config():
"""
Configuration of the graph
"""
return {
"llm": {
"model": "ollama/mistral",
Expand All @@ -29,28 +28,24 @@ def graph_config():
}


def test_script_creator_graph(graph_config):
# Create the ScriptCreatorGraph instance
def test_script_creator_graph(graph_config: dict):
"""
Start of the scraping pipeline
"""
smart_scraper_graph = ScriptCreatorGraph(
prompt="List me all the news with their description.",
source="https://perinim.github.io/projects",
config=graph_config
)

# Run the graph
result = smart_scraper_graph.run()

# Check that the result is not empty
assert result is not None

# Get graph execution info
graph_exec_info = smart_scraper_graph.get_execution_info()

# Check that execution info is not empty
assert graph_exec_info is not None

# Check that execution info is a dictionary
assert isinstance(graph_exec_info, dict)

# Print execution info
print(prettify_exec_info(graph_exec_info))
20 changes: 11 additions & 9 deletions tests/smart_scraper_ollama_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

@pytest.fixture
def graph_config():
"""
Configuration of the graph
"""
return {
"llm": {
"model": "ollama/mistral",
Expand All @@ -22,34 +25,33 @@ def graph_config():
}


def test_scraping_pipeline(graph_config):
# Crea un'istanza di SmartScraperGraph
def test_scraping_pipeline(graph_config: dict):
"""
Start of the scraping pipeline
"""
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the news with their description.",
source="https://perinim.github.io/projects",
config=graph_config
)

# Esegui il grafico
result = smart_scraper_graph.run()

# Verifica che il risultato non sia vuoto
assert result is not None


def test_get_execution_info(graph_config):
# Crea un'istanza di SmartScraperGraph
def test_get_execution_info(graph_config: dict):
"""
Get the execution info
"""
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the news with their description.",
source="https://perinim.github.io/projects",
config=graph_config
)

# Esegui il grafico
smart_scraper_graph.run()

# Ottieni le informazioni sull'esecuzione del grafico
graph_exec_info = smart_scraper_graph.get_execution_info()

# Verifica che le informazioni sull'esecuzione non siano vuote
assert graph_exec_info is not None