In [1]:
# web_scraping.py

import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_html(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    return response.text

def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    fpga_data = []
    for fpga in soup.find_all('div', class_='fpga-spec'):
        model = fpga.find('h2').text
        ram = fpga.find('span', class_='ram').text
        logic_elements = fpga.find('span', class_='logic-elements').text
        fpga_data.append({'model': model, 'ram': ram, 'logic_elements': logic_elements})
    return fpga_data

def main():
    url = "https://www.example.com/altera-fpgas"
    html = fetch_html(url)
    data = parse_html(html)
    df = pd.DataFrame(data)
    df.to_csv('fpgas.csv', index=False)
    print(df)

if __name__ == "__main__":
    main()


Empty DataFrame
Columns: []
Index: []


In [2]:
pip install neo4j pandas

Collecting neo4j
  Downloading neo4j-5.22.0-py3-none-any.whl.metadata (5.7 kB)
Downloading neo4j-5.22.0-py3-none-any.whl (293 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.5/293.5 kB[0m [31m594.3 kB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0mm
[?25hInstalling collected packages: neo4j
Successfully installed neo4j-5.22.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install --upgrade pip

Collecting pip
  Downloading pip-24.1.2-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.1.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.0
    Uninstalling pip-24.0:
      Successfully uninstalled pip-24.0
Successfully installed pip-24.1.2
Note: you may need to restart the kernel to use updated packages.


In [5]:
# knowledge_graph.py

from neo4j import GraphDatabase
import pandas as pd

class KnowledgeGraph:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_node(self, tx, label, properties):
        query = f"CREATE (n:{label} {{"
        query += ", ".join([f"{key}: ${key}" for key in properties.keys()])
        query += "})"
        tx.run(query, **properties)

    def create_relationship(self, tx, from_node, to_node, relationship_type, properties):
        query = f"""
        MATCH (a:{from_node['label']}), (b:{to_node['label']})
        WHERE a.{from_node['key']} = $from_value AND b.{to_node['key']} = $to_value
        CREATE (a)-[r:{relationship_type} {{"""
        query += ", ".join([f"{key}: ${key}" for key in properties.keys()])
        query += f"}}]->(b)"
        tx.run(query, from_value=from_node['value'], to_value=to_node['value'], **properties)

    def build_graph(self, data):
        with self.driver.session() as session:
            for _, row in data.iterrows():
                session.write_transaction(self.create_node, "FPGA", {"model": row['model']})
                session.write_transaction(self.create_node, "RAM", {"size": row['ram']})
                session.write_transaction(self.create_node, "LogicElement", {"count": row['logic_elements']})
                session.write_transaction(
                    self.create_relationship,
                    {"label": "FPGA", "key": "model", "value": row['model']},
                    {"label": "RAM", "key": "size", "value": row['ram']},
                    "HAS_RAM", {}
                )
                session.write_transaction(
                    self.create_relationship,
                    {"label": "FPGA", "key": "model", "value": row['model']},
                    {"label": "LogicElement", "key": "count", "value": row['logic_elements']},
                    "HAS_LOGIC_ELEMENTS", {}
                )

def main():
    uri = "bolt://localhost:7687"
    user = "neo4j"
    password = "password"
    kg = KnowledgeGraph(uri, user, password)
    
    df = pd.read_csv('fpgas.csv')
    kg.build_graph(df)
    kg.close()

if __name__ == "__main__":
    main()

  session.write_transaction(self.create_node, "FPGA", {"model": row['model']})
Transaction failed and will be retried in 1.027309238867623s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused))
Transaction failed and will be retried in 1.72597411822719s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused))
Transaction failed and will be retried in 4.174043524216788s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Faile

ServiceUnavailable: Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused)

In [6]:
neo4j start

SyntaxError: invalid syntax (216100076.py, line 1)

In [7]:
neo4j status

SyntaxError: invalid syntax (2712008427.py, line 1)

In [8]:
import pandas as pd
from neo4j import GraphDatabase

class KnowledgeGraph:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_entity(self, tx, label, properties):
        query = f"CREATE (n:{label} {{"
        query += ", ".join([f"{key}: ${key}" for key in properties.keys()])
        query += "})"
        tx.run(query, **properties)

    def create_relationship(self, tx, from_label, from_properties, to_label, to_properties, relationship_type):
        query = f"""
        MATCH (a:{from_label}), (b:{to_label})
        WHERE a.{from_properties['key']} = $from_value AND b.{to_properties['key']} = $to_value
        CREATE (a)-[r:{relationship_type}]->(b)
        """
        tx.run(query, from_value=from_properties['value'], to_value=to_properties['value'])

    def build_graph(self, df):
        with self.driver.session() as session:
            for index, row in df.iterrows():
                session.execute_write(self.create_entity, "FPGA", {"model": row['model']})
                session.execute_write(self.create_entity, "RAM", {"size": row['RAM_size']})
                session.execute_write(self.create_entity, "LogicElements", {"count": row['logic_elements']})
                session.execute_write(
                    self.create_relationship, 
                    "FPGA", {"key": "model", "value": row['model']}, 
                    "RAM", {"key": "size", "value": row['RAM_size']}, 
                    "HAS_RAM"
                )
                session.execute_write(
                    self.create_relationship, 
                    "FPGA", {"key": "model", "value": row['model']}, 
                    "LogicElements", {"key": "count", "value": row['logic_elements']}, 
                    "HAS_LOGIC_ELEMENTS"
                )

def main():
    uri = "bolt://localhost:7687"
    user = "neo4j"
    password = "password"
    kg = KnowledgeGraph(uri, user, password)

    # Ensure the CSV file exists and has content
    try:
        df = pd.read_csv('fpgas.csv')
        kg.build_graph(df)
    except pd.errors.EmptyDataError:
        print("The CSV file is empty or does not contain any data.")

    kg.close()

if __name__ == "__main__":
    main()


Transaction failed and will be retried in 1.1488412997142425s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused))
Transaction failed and will be retried in 1.8796966157233272s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused))
Transaction failed and will be retried in 4.193654113382349s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reaso

ServiceUnavailable: Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused)

In [9]:
# Example for downloading Neo4j Community Edition 4.5.2 for macOS
wget https://neo4j.com/artifact.php?name=neo4j-community-4.5.2-unix.tar.gz -O neo4j.tar.gz

SyntaxError: invalid syntax (2231069450.py, line 2)

In [10]:
wget https://neo4j.com/artifact.php?name=neo4j-community-4.5.2-unix.tar.gz -O neo4j.tar.gz

SyntaxError: invalid syntax (1351421323.py, line 1)

In [11]:
# web_scraping.py

import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_html(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    return response.text

def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    fpga_data = []
    for fpga in soup.find_all('div', class_='fpga-spec'):
        model = fpga.find('h2').text
        ram = fpga.find('span', class_='ram').text
        logic_elements = fpga.find('span', class_='logic-elements').text
        fpga_data.append({'model': model, 'ram': ram, 'logic_elements': logic_elements})
    return fpga_data

def main():
    url = "https://www.example.com/altera-fpgas"
    html = fetch_html(url)
    data = parse_html(html)
    df = pd.DataFrame(data)
    df.to_csv('fpgas.csv', index=False)
    print(df)

if __name__ == "__main__":
    main()

Empty DataFrame
Columns: []
Index: []


In [12]:
pip install neo4j pandas


Note: you may need to restart the kernel to use updated packages.


In [14]:
# knowledge_graph.py

from neo4j import GraphDatabase
import pandas as pd

class KnowledgeGraph:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_node(self, tx, label, properties):
        query = f"CREATE (n:{label} {{"
        query += ", ".join([f"{key}: ${key}" for key in properties.keys()])
        query += "})"
        tx.run(query, **properties)

    def create_relationship(self, tx, from_node, to_node, relationship_type, properties):
        query = f"""
        MATCH (a:{from_node['label']}), (b:{to_node['label']})
        WHERE a.{from_node['key']} = $from_value AND b.{to_node['key']} = $to_value
        CREATE (a)-[r:{relationship_type} {{"""
        query += ", ".join([f"{key}: ${key}" for key in properties.keys()])
        query += f"}}]->(b)"
        tx.run(query, from_value=from_node['value'], to_value=to_node['value'], **properties)

    def build_graph(self, data):
        with self.driver.session() as session:
            for _, row in data.iterrows():
                session.write_transaction(self.create_node, "FPGA", {"model": row['model']})
                session.write_transaction(self.create_node, "RAM", {"size": row['ram']})
                session.write_transaction(self.create_node, "LogicElement", {"count": row['logic_elements']})
                session.write_transaction(
                    self.create_relationship,
                    {"label": "FPGA", "key": "model", "value": row['model']},
                    {"label": "RAM", "key": "size", "value": row['ram']},
                    "HAS_RAM", {}
                )
                session.write_transaction(
                    self.create_relationship,
                    {"label": "FPGA", "key": "model", "value": row['model']},
                    {"label": "LogicElement", "key": "count", "value": row['logic_elements']},
                    "HAS_LOGIC_ELEMENTS", {}
                )

def main():
    uri = "bolt://localhost:7687"
    user = "neo4j"
    password = "password"
    kg = KnowledgeGraph(uri, user, password)
    
    df = pd.read_csv('fpgas.csv')
    kg.build_graph(df)
    kg.close()

if __name__ == "__main__":
    main()


  session.write_transaction(self.create_node, "FPGA", {"model": row['model']})
Transaction failed and will be retried in 1.0641245872124314s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused))
Transaction failed and will be retried in 2.1889157889983s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused))
Transaction failed and will be retried in 4.365143840213662s (Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Faile

ServiceUnavailable: Couldn't connect to localhost:7687 (resolved to ()):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 61] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 61] Connection refused)

In [15]:
pip install requests beautifulsoup4 scrapy pandas

Note: you may need to restart the kernel to use updated packages.


In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_webpage(url):
    """Fetches the content of a webpage."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None

def parse_page(html):
    """Parses the HTML content and extracts relevant information."""
    soup = BeautifulSoup(html, 'html.parser')

    # Example: Extract headers and descriptions
    headers = [header.get_text(strip=True) for header in soup.find_all('h2')]
    descriptions = [desc.get_text(strip=True) for desc in soup.find_all('p')]

    # Combine extracted data into a dictionary
    data = {
        "headers": headers,
        "descriptions": descriptions
    }
    return data

def save_to_csv(data):
    """Saves extracted data to a CSV file."""
    df = pd.DataFrame({
        'Header': data['headers'],
        'Description': data['descriptions']
    })
    df.to_csv('fpga_data.csv', index=False)

def main():
    url = 'https://www.intel.com/content/www/us/en/products/details/fpga.html'
    html = fetch_webpage(url)
    if html:
        data = parse_page(html)
        save_to_csv(data)
        print("Data has been saved to fpga_data.csv")

if __name__ == '__main__':
    main()

ValueError: All arrays must be of the same length

In [17]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_html(url):
    """Fetches HTML content from the specified URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return None

def parse_page(html):
    """Parses the HTML content and extracts relevant information."""
    soup = BeautifulSoup(html, 'html.parser')

    # Example: Extract headers and descriptions
    headers = [header.get_text(strip=True) for header in soup.find_all('h2')]
    descriptions = [desc.get_text(strip=True) for desc in soup.find_all('p')]

    # Debug: Print lengths of the lists
    print(f"Number of headers: {len(headers)}")
    print(f"Number of descriptions: {len(descriptions)}")

    # Ensure that headers and descriptions have the same length
    max_length = max(len(headers), len(descriptions))
    headers.extend([''] * (max_length - len(headers)))
    descriptions.extend([''] * (max_length - len(descriptions)))

    data = {
        "headers": headers,
        "descriptions": descriptions
    }
    return data

def save_to_csv(data):
    """Saves extracted data to a CSV file."""
    df = pd.DataFrame({
        'Header': data['headers'],
        'Description': data['descriptions']
    })
    df.to_csv('fpga_data.csv', index=False)
    print("Data has been saved to fpga_data.csv")

def main():
    url = 'https://www.intel.com/content/www/us/en/products/details/fpga.html'
    html = fetch_html(url)
    if html:
        data = parse_page(html)
        save_to_csv(data)

if __name__ == '__main__':
    main()


Number of headers: 7
Number of descriptions: 39
Data has been saved to fpga_data.csv


In [18]:
pip install scrapy

Note: you may need to restart the kernel to use updated packages.


In [19]:
import scrapy

class IntelFpgaSpider(scrapy.Spider):
    name = 'intel_fpga_spider'
    start_urls = ['https://www.intel.com/content/www/us/en/products/details/fpga.html']

    def parse(self, response):
        headers = response.css('h2::text').getall()
        descriptions = response.css('p::text').getall()

        # Debug: Print lengths of the lists
        self.log(f"Number of headers: {len(headers)}")
        self.log(f"Number of descriptions: {len(descriptions)}")

        # Ensure that headers and descriptions have the same length
        max_length = max(len(headers), len(descriptions))
        headers.extend([''] * (max_length - len(headers)))
        descriptions.extend([''] * (max_length - len(descriptions)))

        for header, description in zip(headers, descriptions):
            yield {
                'Header': header,
                'Description': description
            }


In [20]:
import scrapy

class IntelFpgaSpider(scrapy.Spider):
    name = 'intel_fpga_spider'
    
    def __init__(self, url=None, *args, **kwargs):
        super(IntelFpgaSpider, self).__init__(*args, **kwargs)
        self.start_urls = [url] if url else []

    def parse(self, response):
        headers = response.css('h2::text').getall()
        descriptions = response.css('p::text').getall()

        # Ensure that headers and descriptions have the same length
        max_length = max(len(headers), len(descriptions))
        headers.extend([''] * (max_length - len(headers)))
        descriptions.extend([''] * (max_length - len(descriptions)))

        for header, description in zip(headers, descriptions):
            yield {
                'Header': header,
                'Description': description
            }


In [21]:
scrapy crawl intel_fpga_spider -o fpga_data.csv -a url=https://www.intel.com/content/www/us/en/products/details/fpga.html


SyntaxError: invalid syntax (254185081.py, line 1)

In [22]:
scrapy crawl intel_fpga_spider -o fpga_data.csv -a url="https://www.intel.com/content/www/us/en/products/details/fpga.html"

SyntaxError: invalid syntax (3706751374.py, line 1)

In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_html(url):
    """Fetches HTML content from the specified URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return None

def parse_page(html):
    """Parses the HTML content and extracts relevant information."""
    soup = BeautifulSoup(html, 'html.parser')

    # Example: Extract headers and descriptions
    headers = [header.get_text(strip=True) for header in soup.find_all('h2')]
    descriptions = [desc.get_text(strip=True) for desc in soup.find_all('p')]

    # Debug: Print lengths of the lists
    print(f"Number of headers: {len(headers)}")
    print(f"Number of descriptions: {len(descriptions)}")

    # Ensure that headers and descriptions have the same length
    max_length = max(len(headers), len(descriptions))
    headers.extend([''] * (max_length - len(headers)))
    descriptions.extend([''] * (max_length - len(descriptions)))

    data = {
        "headers": headers,
        "descriptions": descriptions
    }
    return data

def save_to_csv(data):
    """Saves extracted data to a CSV file."""
    df = pd.DataFrame({
        'Header': data['headers'],
        'Description': data['descriptions']
    })
    df.to_csv('fpga_data.csv', index=False)
    print("Data has been saved to fpga_data.csv")

def main():
    url = 'https://en.wikipedia.org/wiki/Altera'
    html = fetch_html(url)
    if html:
        data = parse_page(html)
        save_to_csv(data)

if __name__ == '__main__':
    main()

Number of headers: 6
Number of descriptions: 19
Data has been saved to fpga_data.csv


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_html(url):
    """Fetches HTML content from the specified URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return None

def parse_page(html):
    """Parses the HTML content and extracts relevant information."""
    soup = BeautifulSoup(html, 'html.parser')
    headers = [header.get_text(strip=True) for header in soup.find_all('h2')]
    descriptions = [desc.get_text(strip=True) for desc in soup.find_all('p')]
    max_length = max(len(headers), len(descriptions))
    headers.extend([''] * (max_length - len(headers)))
    descriptions.extend([''] * (max_length - len(descriptions)))
    data = {
        "headers": headers,
        "descriptions": descriptions
    }
    return data

def save_to_csv(data, filename='data.csv'):
    """Saves extracted data to a CSV file."""
    df = pd.DataFrame({
        'Header': data['headers'],
        'Description': data['descriptions']
    })
    df.to_csv(filename, index=False)
    print(f"Data has been saved to {filename}")

def main():
    url = input("Enter the URL to scrape: ")
    html = fetch_html(url)
    if html:
        data = parse_page(html)
        save_to_csv(data)

if __name__ == '__main__':
    main()
