# Visualizing Scrapy Output in Jupyter Notebook

This notebook demonstrates how to scrape web content using Scrapy and visualize the results in a Jupyter notebook. 

## Steps to Set Up and Run Scrapy
**Install Scrapy**:
   pip install scrapy


# Create a Scrapy Project:
 If you haven't already, create a new Scrapy project:
scrapy startproject myproject
cd myproject

# Define a Spider:
Create a new spider file in the spiders directory (e.g., my_spider.py) with the following content:

# Run the Spider:

Run the spider from the command line to generate the output:
 scrapy crawl spider_name -O output_name.json

In [26]:
import pandas as pd
from IPython.display import display, Image, HTML

# Load the scraped data
data = pd.read_json('images.json')
# Function to display images and text for each label
def display_content(data):
    for index, row in data.iterrows():
        label = row['label']
        content = row['content']
        display(HTML(f"<h2>{label}</h2>"))
        for section in content:
            if section['images']:
                for img in section['images']:
                    display(Image(url=img['image_url'], width=300, height=300))
            if section['text']:
                display(HTML(f"<p>{section['text']}</p>"))

# Display the content
display_content(data)



In [28]:
import pandas as pd
from IPython.display import display, Image, HTML

# Load the scraped data
data = pd.read_json('images-humboldt.json')

# Function to display images and text for each label
def display_content(data):
    for index, row in data.iterrows():
        labels = row['labels']
        images = row['images']
        content = row['content']
        
        for label in labels:
            display(HTML(f"<h2>{label}</h2>"))
            
        for img in images:
            display(Image(url=img, width=300, height=300))
            
        for section, text in content.items():
            display(HTML(f"<h3>{section}</h3>"))
            display(HTML(f"<p>{text}</p>"))

# Display the content
display_content(data)


In [1]:
import pandas as pd
from IPython.display import display, Image, HTML

# Load the scraped data
data = pd.read_json('products.json')

# Function to display images and text for each product
def display_content(data):
    for index, row in data.iterrows():
        name = row['name']
        url = row['url']
        image = row['image']
        description = row['description']
        price = row['price']
        availability = row['availability']
        
        display(HTML(f"<h2><a href='{url}' target='_blank'>{name}</a></h2>"))
        display(Image(url=image, width=300, height=300))
        display(HTML(f"<p><strong>Description:</strong> {description}</p>"))
        display(HTML(f"<p><strong>Price:</strong> {price}</p>"))
        display(HTML(f"<p><strong>Availability:</strong> {availability}</p>"))
        display(HTML("<hr>"))

# Display the content
display_content(data)