In [1]:
from bs4 import BeautifulSoup
import json


In [3]:
with open("verizon_plan_common_details.html", "r", encoding="utf-8") as file:
    html_data = file.read()

In [5]:

html = html_data#"""<your_html_here>"""  # Replace this with your HTML string

soup = BeautifulSoup(html, 'html.parser')

results = []

# Extract the main heading
main_heading = soup.find('h3')
if main_heading:
    results.append({
        "type": "heading",
        "text": main_heading.get_text(strip=True)
    })

# Extract includes-content blocks
for block in soup.select('div.includes-content'):
    title = block.find('div', class_='u-fontDisplayBold')
    paragraph = block.find('div', class_='u-textRegular')
    if title and paragraph:
        results.append({
            "type": "content_block",
            "title": title.get_text(strip=True),
            "description": paragraph.get_text(strip=True)
        })

# Extract promotional cards from value-pods-section
cards = soup.select('#value-pods-section .card')
for card in cards:
    visually_hidden_title = card.find('h2', class_='u-visuallyHidden')
    visible_title = card.select_one('div[aria-hidden="true"] h2')
    subtitle = card.select_one('div[aria-hidden="true"] div')
    footer = card.select_one('.footer-content')

    entry = {
        "type": "card",
        "title": (visually_hidden_title.get_text(strip=True) if visually_hidden_title else ""),
        "headline": (visible_title.get_text(strip=True) if visible_title else ""),
        "subheadline": (subtitle.get_text(strip=True) if subtitle else ""),
        "footer_note": (footer.get_text(strip=True) if footer else "")
    }
    results.append(entry)

# Pretty print the extracted structure
print(json.dumps(results, indent=2))

[
  {
    "type": "heading",
    "text": "All plans include"
  },
  {
    "type": "content_block",
    "title": "Unlimited talk, text & data",
    "description": "Stay in touch and never worry about overage charges with unlimited talk, text and data on the network more people rely on."
  },
  {
    "type": "content_block",
    "title": "5G and 4G LTE",
    "description": "Get unlimited access to our 5G and 4G LTE networks which provide the high speeds and reliability you've come to expect."
  },
  {
    "type": "content_block",
    "title": "Mexico & Canada talk, text & data",
    "description": "Talk, text and use high-speed data when traveling in Mexico & Canada. After 2 GB/day, get unlimited 3G speeds."
  },
  {
    "type": "content_block",
    "title": "International texting",
    "description": "Unlimited texting from the US to over 200 countries and territories worldwide."
  },
  {
    "type": "content_block",
    "title": "Verizon Family",
    "description": "The Verizon Family 

In [18]:
from bs4 import BeautifulSoup

with open('verizon_first_plan_card.html', 'r', encoding='utf-8') as f:  # or replace with your HTML string
    html = f.read()

In [20]:


soup = BeautifulSoup(html, 'html.parser')

result = {}

# Plan title
title_top = soup.find('div', class_='u-text--xs16')
title_bottom = soup.find('div', class_='u-text--xs32')
result['Plan Title'] = f"{title_top.get_text(strip=True)} {title_bottom.get_text(strip=True)}" if title_top and title_bottom else None

# Features list
features = []
for li in soup.select('li.teaser-logo'):
    feature_title = li.select_one('span.u-text--xs14')
    feature_desc = li.select_one('span.plan-description')
    
    text = feature_title.get_text(strip=True) if feature_title else ''
    if feature_desc:
        # Get inner text including from <ul> or <br>
        feature_desc_text = ' '.join(feature_desc.stripped_strings)
        text += f": {feature_desc_text}"
    
    if text:
        features.append(text)

result['Features'] = features

# Price info
price_container = soup.find('div', class_='pricing-container')
if price_container:
    price_value = price_container.select_one('.planDiscountPrice')
    per_line_note = price_container.select_one('.legal-value')
    result['Price'] = f"${price_value.get_text(strip=True)}/mo" if price_value else None
    result['Price Note'] = per_line_note.get_text(strip=True) if per_line_note else None

# Price guarantee
guarantees = []
for guarantee in soup.select('div.price-guarantee li'):
    guarantees.append(' '.join(guarantee.stripped_strings))
result['Guarantees'] = guarantees

# Display nicely
import pprint
pprint.pprint(result)

{'Features': ['5G Ultra Wideband with Enhanced Video Calling & Streaming: 4X '
              'faster than our regular 5G in the U.S, no matter how much you '
              'use. Plus: Enhanced video calling 4K/Ultra HD video streaming',
              'Unlimited mobile hotspot data1',
              'Unlimited international data, talk & text when traveling '
              'internationally2',
              'International calling from the US with Global Choice',
              'Ultimate Phone Upgrade: New & existing customers get our best '
              'current phone offers now & when you upgrade.',
              'Up to 50% off 2 data plans for a watch, tablet & more'],
 'Guarantees': ['3-year price lock guarantee **'],
 'Plan Title': 'Unlimited Ultimate',
 'Price': '$55/mo',
 'Price Note': 'per line w/ 4 lines'}


In [24]:
from bs4 import BeautifulSoup
import pandas as pd

with open('verizon_table.html', 'r', encoding='utf-8') as f:  # or replace with your HTML string
    html = f.read()

soup = BeautifulSoup(html, "html.parser")

In [28]:
# Extract plan names
header_cells = soup.select("thead tr th div.plan-name")
plans = [cell.get_text(strip=True) for cell in header_cells]

# Extract pricing rows
output_lines = []
rows = soup.select("tbody tr")

for row in rows:
    # Skip legal/footer rows
    lines_cell = row.select_one("th .plan-price-header")
    if not lines_cell:
        continue

    num_lines = lines_cell.get_text(strip=True)

    # Get all price cells
    prices = row.select("td .planDiscountPrice")
    price_values = [f"${price.get_text(strip=True)}/line" for price in prices]

    # Construct readable line
    line_output = f"For {num_lines}:"
    for plan, price in zip(plans, price_values):
        line_output += f" {plan} = {price};"
    
    output_lines.append(line_output.strip(";") + ".")

# Combine all lines
final_output = "\n".join(output_lines)
print(final_output)

For 4 lines: Unlimited Ultimate = $55/line; Unlimited Plus = $45/line; Unlimited Welcome = $30/line.
For 3 lines: Unlimited Ultimate = $65/line; Unlimited Plus = $55/line; Unlimited Welcome = $40/line.
For 2 lines: Unlimited Ultimate = $80/line; Unlimited Plus = $70/line; Unlimited Welcome = $55/line.
For 1 line: Unlimited Ultimate = $90/line; Unlimited Plus = $80/line; Unlimited Welcome = $65/line.


In [30]:
from bs4 import BeautifulSoup

with open('verizon_table.html', 'r', encoding='utf-8') as f:  # or replace with your HTML string
    html = f.read()

soup = BeautifulSoup(html, "html.parser")

In [32]:


# Find all <tr> elements under <tbody>
tbody = soup.find("tbody")
rows = tbody.find_all("tr") if tbody else []

# Extract and clean visible text from each row
extracted_lines = []
for row in rows:
    text = row.get_text(separator=" ", strip=True)
    if text:  # Skip empty rows
        extracted_lines.append(text)

# Join all lines into plain text
final_text = "\n".join(extracted_lines)
print(final_text)

Data
Data 5G Ultra Wideband with Enhanced Video Calling & Streaming 5G Ultra Wideband 5G
4X faster than our regular 5G in the U.S., no matter how much you use. Verizon 5G UWB median download speeds vs VZ low-band 5G median download speeds based on analysis by Verizon of Ookla® Speedtest Intelligence® data, Q1-Q2 2024. Explore our current 5G and 4G LTE coverage maps. Get access to the fastest speeds we offer with 5G Ultra Wideband. Combined with Verizon's 5G network, you get reliable coast to coast coverage. Download apps, games, entire playlists and TV series in seconds. You'll also get 5G Ultra Wideband mobile hotspot and crystal-clear 4K Ultra High Definition video streaming when activated on a capable device. Plus, access experiences from our exclusive 5G partners: Snapchat, Live Nation, Riot Games and Niantic. For full details, visit the 5G experience page. Enhanced video calling Get optimized video and sound quality on apps such as FaceTime, Whatsapp and Zoom in times of network c

In [35]:
with open("verizon_first_plan_detailed_Card.html", "r", encoding="utf-8") as file:
    html_data = file.read()

In [37]:
# Create a BeautifulSoup object
soup = BeautifulSoup(html, 'html.parser')

# Find all elements with a class containing "text"
text_elements = soup.find_all(class_=lambda class_name: class_name and 'text' in class_name)

# Extract text from these elements
text_content = [element.get_text(strip=True) for element in text_elements]

# Print the extracted text
for text in text_content:
    print(text)

Data
Data

Data
5G Ultra Wideband with Enhanced Video Calling & Streaming
5G Ultra Wideband
5G

4X faster than our regular 5G in the U.S., no matter how much you use.Verizon 5G UWB median download speeds vs VZ low-band 5G median download speeds based on analysis by Verizon of Ookla® Speedtest Intelligence® data, Q1-Q2 2024. Explore our current5G and 4G LTE coverage maps.Get access to the fastest speeds we offer with 5G Ultra Wideband. Combined with Verizon's 5G network, you get reliable coast to coast coverage. Download apps, games, entire playlists and TV series in seconds. You'll also get 5G Ultra Wideband mobile hotspot and crystal-clear 4K Ultra High Definition video streaming when activated on a capable device. Plus, access experiences from our exclusive 5G partners: Snapchat, Live Nation, Riot Games and Niantic. For full details, visit the5G experience page.Enhanced video callingGet optimized video and sound quality on apps such as FaceTime, Whatsapp and Zoom in times of network 

In [40]:
from neo4j import GraphDatabase

class Neo4jImporter:
    def __init__(self, uri, user, password):
        self.uri = uri
        self.user = user
        self.password = password
        self.driver = GraphDatabase.driver(self.uri, auth=(self.user, self.password))
    
    def close(self):
        self.driver.close()
    
    def create_plan(self):
        with self.driver.session() as session:
            # Create Plan node
            session.write_transaction(self._create_plan)

    def _create_plan(self, tx):
        # Create Plan node
        tx.run("MERGE (plan:Plan {name: 'Unlimited Ultimate'})")
        
        # Create Pricing node
        tx.run("MERGE (pricing:Pricing {amount: 55.00, unit: '/line'})")
        
        # Create Service node (Disney+, Hulu, ESPN+)
        tx.run("MERGE (service:Service {name: 'Disney+, Hulu, ESPN+ (With Ads)', cost: 16.99})")
        
        # Create Discount node
        tx.run("MERGE (discount:Discount {amount: 6.99, unit: '/line'})")
        
        # Create Discount Type node
        tx.run("MERGE (discount_type:DiscountType {name: 'Auto Pay & Paper-free billing'})")
        
        # Create Feature nodes
        tx.run("MERGE (feature_5g:Feature {name: 'Unlimited 5G/4G LTE'})")
        tx.run("MERGE (feature_video_calling:Feature {name: 'Enhanced Video Calling'})")
        tx.run("MERGE (feature_hotspot:Feature {name: 'Unlimited Mobile Hotspot Data'})")
        
        # Create Data Limit and Speed nodes
        tx.run("MERGE (data_limit:DataLimit {limit: '200 GB/mo (for Unlimited Ultimate)'})")
        tx.run("MERGE (speed:Speed {value: 'Up to 6 Mbps (Mobile Hotspot)'})")
        
        # Create Streaming Quality node
        tx.run("MERGE (streaming_quality:StreamingQuality {quality: 'Up to 4K Ultra HD'})")
        
        # Create Device node
        tx.run("MERGE (device:Device {type: '5G capable device'})")
        
        # Create International Feature node
        tx.run("MERGE (intl_feature:InternationalFeature {name: 'Unlimited international data, talk & text'})")
        
        # Create Home Internet node
        tx.run("MERGE (home_internet:HomeInternet {price: 35.00})")
        
        # Create Phone Upgrade node
        tx.run("MERGE (phone_upgrade:PhoneUpgrade {name: 'Ultimate Phone Upgrade'})")
        
        # Create Price Lock node
        tx.run("MERGE (price_lock:PriceLock {term: '3-year price lock guarantee'})")
        
        # Relationships
        # Plan -> Pricing
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (pricing:Pricing)
            MERGE (plan)-[:HAS_PRICING]->(pricing)
        """)
        
        # Plan -> Service
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (service:Service)
            MERGE (plan)-[:INCLUDES]->(service)
        """)
        
        # Plan -> Discount
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (discount:Discount)
            MERGE (plan)-[:DISCOUNTED_BY]->(discount)
        """)
        
        # Discount -> DiscountType
        tx.run("""
            MATCH (discount:Discount), (discount_type:DiscountType)
            MERGE (discount)-[:APPLIES_WITH]->(discount_type)
        """)
        
        # Plan -> Feature
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (feature_5g:Feature {name: 'Unlimited 5G/4G LTE'})
            MERGE (plan)-[:OFFERS]->(feature_5g)
        """)
        
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (feature_video_calling:Feature {name: 'Enhanced Video Calling'})
            MERGE (plan)-[:HAS_PERK]->(feature_video_calling)
        """)
        
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (feature_hotspot:Feature {name: 'Unlimited Mobile Hotspot Data'})
            MERGE (plan)-[:HAS_FEATURE]->(feature_hotspot)
        """)
        
        # Plan -> Data Limit
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (data_limit:DataLimit)
            MERGE (plan)-[:HAS_DATA_LIMIT]->(data_limit)
        """)
        
        # Plan -> Speed
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (speed:Speed)
            MERGE (plan)-[:HAS_SPEED]->(speed)
        """)
        
        # Plan -> Device
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (device:Device)
            MERGE (plan)-[:REQUIRES]->(device)
        """)
        
        # Plan -> Streaming Quality
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (streaming_quality:StreamingQuality)
            MERGE (plan)-[:HAS_STREAMING_QUALITY]->(streaming_quality)
        """)
        
        # Plan -> International Feature
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (intl_feature:InternationalFeature)
            MERGE (plan)-[:INCLUDES_INTERNATIONAL]->(intl_feature)
        """)
        
        # Plan -> Home Internet
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (home_internet:HomeInternet)
            MERGE (plan)-[:INCLUDES_HOME_INTERNET]->(home_internet)
        """)
        
        # Plan -> Phone Upgrade
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (phone_upgrade:PhoneUpgrade)
            MERGE (plan)-[:OFFERS_PHONE_UPGRADE]->(phone_upgrade)
        """)
        
        # Plan -> Price Lock
        tx.run("""
            MATCH (plan:Plan {name: 'Unlimited Ultimate'}), (price_lock:PriceLock)
            MERGE (plan)-[:HAS_PRICE_LOCK]->(price_lock)
        """)

# Example of usage
# Replace with your Neo4j credentials
NEO4J_URI = "neo4j+s://3ea3293d.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "onNoUtCFlSUAz8p9x6j5aN5fZpi12TIsCgka-ZtQQkg"

neo4j_importer = Neo4jImporter(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
neo4j_importer.create_plan()
neo4j_importer.close()


  session.write_transaction(self._create_plan)
