In [1]:
from bs4 import BeautifulSoup
import requests
import json
import re

In [2]:
website_name='Camp Laurel'
website_url='https://www.camplaurel.com/'
HEADERS =({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
                   'Accept-Language':'en-US, en;q=0.5'})

home = requests.get(website_url,headers=HEADERS)
home_soup = BeautifulSoup(home.content, 'lxml')

In [3]:
description=home_soup.find('div',attrs={'class':'inner-wrapper'}).text.strip()
description

'Quality Maine CampingCamp Laurel, on the shores of Echo Lake in Maine’s Belgrade Lakes region, is known for taking great care of kids! Through our action-packed seven week experience, campers are exposed to a wide range of activities within our unique 6-part core program of Athletics, Waterfront, Adventure, Inter-Arts, Tennis and Equestrian. Our caring, nurturing and professional staff of coaches, educators and college students encourages our campers to develop new interests, gain new skills and discover new strengths.Learn More About OUR PROGRAM'

In [4]:
logo_tag = home_soup.find('div', class_='site-branding').find('img')
logo_url = logo_tag['src'] if logo_tag else 'null'
logo_url

'https://www.camplaurel.com/wp-content/uploads/2020/04/camp-laurel-maine-logo.png'

In [8]:
contact_table = home_soup.find_all('div', attrs={'class': 'vcard'})

# Initialize lists and variables
location = []
zip_code = []
telephone = []
mail = ''

# Iterate through each vcard in the contact table
for vcard in contact_table:
    addresses = vcard.find_all('div', attrs={'class': 'adr'})
    for address in addresses:
        # Extract individual components
        street_address = address.find('span', attrs={'class': 'street-address'})
        locality = address.find('span', attrs={'class': 'locality'})
        region = address.find('abbr', attrs={'class': 'region'})  # Corrected from 'abb' to 'abbr'
        postal_code = address.find('span', attrs={'class': 'postal-code'})
        
        # Build the location string
        location_parts = []
        if street_address:
            location_parts.append(street_address.text)
        if locality:
            location_parts.append(locality.text)
        if region:
            location_parts.append(region.text)
        full_location = ", ".join(location_parts)
        
        # Append to location if not empty
        if full_location:
            location.append(full_location)
        
        # Append postal code
        if postal_code:
            zip_code.append(postal_code.text)

    # Extract telephone numbers
    telephones = vcard.find_all('a', attrs={'class': 'tel phone'})
    for phone in telephones:
        telephone.append(phone.text.strip())

    # Extract email
    email = vcard.find('a', attrs={'class': 'email'})
    if email:
        mail = email.get('href').replace('mailto:', '')

lat=['44.38940','41.14170']
lon=['-69.95240','-73.35850']

# Create location_data dictionary with required fields
location_data = {
    "address": location,  # List of addresses
    "zip_code": zip_code,  # List of ZIP codes
    "latitude": lat,  # Replace with actual latitude if available
    "longitude": lon  # Replace with actual longitude if available
}

# Create contact_data dictionary with email and telephone numbers
contact_data = {
    "mail": mail,
    "telephone": telephone
}

# Print the data
print("Location Data:", location_data)
print("Contact Data:", contact_data)


Location Data: {'address': ['PO Box 327, Readfield', 'PO Box 508, Westport'], 'zip_code': ['04355', '06881'], 'latitude': ['44.38940', '41.14170'], 'longitude': ['-69.95240', '-73.35850']}
Contact Data: {'mail': 'summer@camplaurel.com', 'telephone': ['207-685-4945', '203-227-8866']}


In [9]:
socials = []

social_table = home_soup.find_all('div', attrs={'class': 'social'})

# Iterate through the social table
for social_div in social_table:
    links = social_div.find_all('a')
    for link in links[0:2]:
        platform_name = link.text.strip().lower()  # Extract platform name (e.g., Facebook)
        url = link['href']  # Extract the link URL
        socials.append({platform_name: url})

socials

[{'facebook': 'https://www.facebook.com/CampLaurel'},
 {'instagram': 'https://instagram.com/camplaurelmaine'}]

In [10]:
images = []

# Extract image from 'hero-panel'
hero_panels = home_soup.find_all('section', attrs={'class': 'hero-panel'})
for section in hero_panels:
    # Check for data-poster attribute
    div = section.find('div', attrs={'id': 'home-video-wrap'})
    if div:
        # Extract data-poster
        poster = div.get('data-poster')
        if poster:
            images.append(poster)
        # Extract background-image URL if present
        style = div.get('style')
        if style and 'background-image' in style:
            start_idx = style.find("url('") + 5
            end_idx = style.find("')", start_idx)
            img_url = style[start_idx:end_idx]
            images.append(img_url)
    # Extract video source URL
    video = section.find('video')
    if video:
        source = video.find('source')
        if source and source.get('src'):
            # Ensure absolute URL
            video_url = source.get('src')
            if video_url.startswith('//'):
                video_url = f"https:{video_url}"
            images.append(video_url)

# Extract images from 'footer-photo'
footer_photos = home_soup.find_all('img', attrs={'class': 'footer-photo'})
for img in footer_photos:
    if img.get('src'):
        images.append(img['src'])

images


['https://www.camplaurel.com/wp-content/themes/laurel/img/Camp-Laurel-Home-Montage-Poster-2020.jpg',
 'https://www.camplaurel.com/wp-content/themes/laurel/img/Camp-Laurel-Home-Montage-Poster-2020.jpg',
 'https://laurel-media.s3.amazonaws.com/Camp-Laurel-Home-Montage-2020.mp4',
 'https://www.camplaurel.com/wp-content/themes/laurel/img/home-footer-bg-default-lg.jpg']

In [11]:
rd_url = 'https://www.camplaurel.com/about-camp-laurel/dates-fees/'
rd = requests.get(rd_url, headers=HEADERS)
rd_soup = BeautifulSoup(rd.content, 'lxml')

In [12]:
rd_table= rd_soup.find('div', attrs={'class': 'entry-content'})
rd_table

<div class="entry-content"><div class="full-width-bg bring-it-in"><h2 class="dates-fees-hdr">2025 Camp Dates</h2><p class="camp-dates"><span class="lab">Camp Starts</span> <span class="date">Saturday, June 21st</span></p><p class="camp-dates"><span class="lab">Camp Ends</span> <span class="date">Wednesday, August 6th</span></p><p class="camp-dates"><span class="lab">Visiting Day</span> <span class="date">Saturday, July 19th</span></p><p class="camp-dates tuition"><br/><span class="lab tuit-lab">2025 Tuition</span> <span class="date fee">$17,100</span></p></div><div class="additional-charges bring-it-in"><h3>Additional Charges:</h3><p class="camp-dates addt"><span class="lab">Trip Day/Personal Expenses</span> <span class="date fee">$550</span></p><p class="camp-dates addt"><span class="lab">Equestrian (3 days a week, optional)</span> <span class="date fee">$1500</span></p><p class="camp-dates addt"><span class="lab">Equestrian (5 days a week, optional)</span> <span class="date fee">$250

In [13]:
sessions = []

rd_table = rd_soup.find('div', attrs={'class': 'entry-content'})

if rd_table:
    camp_session = {}
    # Extract name
    camp_session['name'] = "2025 Summer Camp" 
    
    start_date = rd_table.find('span', string='Camp Starts').find_next('span', attrs={'class': 'date'})
    end_date = rd_table.find('span', string='Camp Ends').find_next('span', attrs={'class': 'date'})
    camp_session['start_date'] = start_date.text if start_date else None
    camp_session['end_date'] = end_date.text if end_date else None

    # Calculate duration (manual calculation needed if possible)
    camp_session['duration'] = "null"

    # Extract cost
    tuition = rd_table.find('span', string='2025 Tuition').find_next('span', attrs={'class': 'date fee'})
    camp_session['cost'] = tuition.text if tuition else None

    camp_session['deposit'] = 'null'

    # Location and Eligibility (not available in the HTML)
    camp_session['location'] = "Readfield, ME"
    camp_session['eligible'] = "null"
    
    sessions.append(camp_session)

sessions


[{'name': '2025 Summer Camp',
  'start_date': 'Saturday, June 21st',
  'end_date': 'Wednesday, August 6th',
  'duration': 'null',
  'cost': '$17,100',
  'deposit': 'null',
  'location': 'Readfield, ME',
  'eligible': 'null'}]

In [14]:
categories = []
activities = []

# Iterate over each div containing activity buttons
cat_table = home_soup.find_all('div', attrs={'class': 'flex-it activities-buttons'})

for cat in cat_table:
    links = cat.find_all('a', class_='btn-img')
    
    for link in links:
        category_name = link.get('title')
        category_url = link.get('href')
        
        # Make request to category link
        full_url = f"https://www.camplaurel.com{category_url}"
        category_page = requests.get(full_url, headers=HEADERS)
        category_soup = BeautifulSoup(category_page.content, 'lxml')
        
        # Extract the image URL from the category page
        image_tag = category_soup.find('img', class_='featured-image')
        image_url = image_tag['src'] if image_tag else 'null'
        
        # Extract the description from the category page (assuming description is within a <div>)
        description_tag = category_soup.find('div', class_='entry-content')
        description = description_tag.get_text(strip=True) if description_tag else 'null'
        
        # Add the category to the categories list
        categories.append({
            'name': category_name,
            'description': description,
            'image': image_url
        })
        
        # Extract the list of activities from the category page
        activity_list = category_soup.find_all('ul', class_='wp-block-list')
        
        # Iterate through each <ul> tag and get the <li> items
        for ul in activity_list:
            for li in ul.find_all('li'):
                activity_name = li.get_text(strip=True)
                activities.append({
                    'name': activity_name,
                    'description': 'null',
                    'image': 'null'
                })

categories

[{'name': 'Waterfront and lake activities',
  'description': 'Our expansive dock system anchors a dynamic swimming program featuring American Red Cross instruction. We have a fleet of 5 Championship Mastercraft Ski Boats, 6 Hobies, 10 Sunfish, 2 Lasers, a Vanguard 420 and Hunter 170, 30 canoes and kayaks, and 20 Stand-Up Paddleboards. The camp fronts on Echo Lake, a 3-mile long spring-fed lake. Laurel occupies more than 2500 feet of private shoreline.WaterskiingWakeboardingWakesurfingSailingStand-Up PaddleboardingFishingCanoeingKayakingSnorkelingSwim TeamSwimming InstructionRecreational Swimming',
  'image': 'https://www.camplaurel.com/wp-content/uploads/2020/04/hero-waterfront.jpg'},
 {'name': 'Boys Sports and Athletics activities',
  'description': 'Camp Laurel’s extensive sports program offers something for everyone. Five large athletic fields for soccer, baseball, football, lacrosse, softball and field hockey; a pro-sized hockey arena; a gymnastics center; numerous sports courts; a

In [86]:
activities

[{'name': 'Waterskiing', 'description': 'null', 'image_url': 'null'},
 {'name': 'Wakeboarding', 'description': 'null', 'image_url': 'null'},
 {'name': 'Wakesurfing', 'description': 'null', 'image_url': 'null'},
 {'name': 'Sailing', 'description': 'null', 'image_url': 'null'},
 {'name': 'Stand-Up Paddleboarding',
  'description': 'null',
  'image_url': 'null'},
 {'name': 'Fishing', 'description': 'null', 'image_url': 'null'},
 {'name': 'Canoeing', 'description': 'null', 'image_url': 'null'},
 {'name': 'Kayaking', 'description': 'null', 'image_url': 'null'},
 {'name': 'Snorkeling', 'description': 'null', 'image_url': 'null'},
 {'name': 'Swim Team', 'description': 'null', 'image_url': 'null'},
 {'name': 'Swimming Instruction', 'description': 'null', 'image_url': 'null'},
 {'name': 'Recreational Swimming', 'description': 'null', 'image_url': 'null'},
 {'name': 'Soccer', 'description': 'null', 'image_url': 'null'},
 {'name': 'Flag Football', 'description': 'null', 'image_url': 'null'},
 {'n

In [16]:
Camp_Laurel= [{
    'name': website_name,  
    'url': website_url,
    'logo':logo_url,
    'description':description,
    'location': location_data,
    'contact': contact_data,
    'socials': socials,
    'image_urls':images,
    'sessions': sessions,
    'categories': categories,
    'activities': activities
}]
file_path = 'camp_laurel.json'

# Export the dictionary to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(Camp_Laurel, json_file, indent=4)