In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the web page containing the table
url = "https://aa.usno.navy.mil/calculated/rstt/year?ID=AA&year=2024&task=0&lat=0.0000&lon=0.0000&label=New+Delhi&tz=0.00&tz_sign=-1&submit=Get+Data"

# Fetch the content of the web page
response = requests.get(url)
web_content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(web_content, "html.parser")

# Find the preformatted text (assuming it is inside a <pre> tag)
pre_tag = soup.find("pre")
if not pre_tag:
    raise ValueError("No <pre> tag found on the page")

preformatted_text = pre_tag.get_text()

# Split the preformatted text into lines
lines = preformatted_text.splitlines()

# Ensure that there are enough lines for header and data
if len(lines) < 9:
    raise ValueError("The preformatted text does not contain enough lines")

# Extract the header line and the data lines
header_line = lines[5]  # Adjust the index as needed to get the correct header
data_lines = lines[8:]  # Adjust the index as needed to start from the correct data line

# Define the months
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# Create headers for the DataFrame
headers = ["Day"]
for month in months:
    headers.append(f"{month}_Rise")
    headers.append(f"{month}_Set")

# Process the data lines
data = []
for line in data_lines:
    parts = line.split()
    if len(parts) < 2:
        continue  # Skip lines that don't have enough data
    day = parts[0]
    times = parts[1:]
    # Ensure we have the correct number of data points
    if len(times) != 24:
        continue  # Skip lines that don't have exactly 24 time entries (12 rise/set pairs for 12 months)
    data_row = [day] + times
    data.append(data_row)

# Create a DataFrame
df = pd.DataFrame(data, columns=headers)

# Save the DataFrame to a CSV file
df.to_csv("sun_rise_set_2024.csv", index=False)

print("CSV file has been created successfully.")