In [None]:
import requests
import json
from datetime import datetime, timedelta
import pytz
import time
import os
import csv

def fetch_weather_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {url} - {e}")
        return None

def extract_json_data(html_content):
    start_index = html_content.find('<script id="app-root-state" type="application/json">')
    if start_index == -1:
        return None
    start_index += len('<script id="app-root-state" type="application/json">')
    end_index = html_content.find('</script>', start_index)
    if end_index == -1:
        return None
    json_string = html_content[start_index:end_index]
    try:
        return json.loads(json_string)
    except json.JSONDecodeError:
        print("Error decoding JSON")
        return None

def analyze_weather_for_school_hours(json_data, date_str):
    la_timezone = pytz.timezone('America/Los_Angeles')
    school_start_time = la_timezone.localize(datetime.strptime(f"{date_str} 08:00:00", "%Y-%m-%d %H:%M:%S"))
    school_end_time = la_timezone.localize(datetime.strptime(f"{date_str} 15:00:00", "%Y-%m-%d %H:%M:%S"))
    daily_data = []

    if "39526823" in json_data and "b" in json_data["39526823"] and "observations" in json_data["39526823"]["b"]:
        for obs in json_data["39526823"]["b"]["observations"]:
            obs_time_local_str = obs.get("obsTimeLocal")
            if obs_time_local_str:
                try:
                    obs_time_local = datetime.strptime(obs_time_local_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=la_timezone)
                    if school_start_time <= obs_time_local <= school_end_time:
                        temperature = obs.get("imperial", {}).get("temp")
                        condition = obs.get("wxPhraseShort", None)
                        precip_rate = obs.get("imperial", {}).get("precipRate", 0)
                        rain = "yes" if precip_rate > 0 else "no"
                        daily_data.append({
                            "temperature": temperature,
                            "condition": condition,
                            "rain": rain
                        })
                except ValueError as e:
                    print(f"Error parsing time string: {obs_time_local_str} - {e}")

    if "71113034" in json_data and "b" in json_data["71113034"] and "daypart" in json_data["71113034"]["b"]:
        for part in json_data["71113034"]["b"]["daypart"]:
            narratives = part.get("narrative", [])
            temperatures = part.get("temperature", [])
            precip_chances = part.get("precipChance", [])
            times_of_day = part.get("daypartName", [])

            for i in range(len(times_of_day)):
                period_name = times_of_day[i]
                narrative = narratives[i] if i < len(narratives) else None
                temp = temperatures[i] if i < len(temperatures) else None
                precip_chance = precip_chances[i] if i < len(precip_chances) else None

                rain = "yes" if isinstance(precip_chance, (int, float)) and precip_chance > 50 else "no"

                daily_data.append({
                    "temperature": temp,
                    "condition": narrative,
                    "rain": rain
                })

    return daily_data

year = 2024
start_date = datetime(year, 1, 1)
num_days_to_scrape = 366
all_year_data = {}

for i in range(num_days_to_scrape):
    current_date = start_date + timedelta(days=i)
    date_str = current_date.strftime("%Y-%m-%d")
    url = f"https://www.wunderground.com/history/daily/us/ca/hawthorne/KHHR/date/{date_str}"
    print(f"Fetching data for {date_str}...")
    html_content = fetch_weather_data(url)
    if html_content:
        json_data = extract_json_data(html_content)
        if json_data:
            print(f"Analyzing weather for {date_str}...")
            daily_school_hours_data = analyze_weather_for_school_hours(json_data, date_str)
            all_year_data[date_str] = daily_school_hours_data
        else:
            print(f"Could not extract JSON data for {date_str}")
    else:
        print(f"Failed to fetch data for {date_str}")
    time.sleep(1)

os.makedirs("data", exist_ok=True)
csv_path = os.path.join("data", "weather_data_hourly.csv")

with open(csv_path, mode="w", newline="") as csv_file:
    fieldnames = ["Date", "Temperature", "Condition", "Rain (yes/no)"]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    for date, entries in all_year_data.items():
        for entry in entries:
            writer.writerow({
                "Date": date,
                "Temperature": entry.get("temperature"),
                "Condition": entry.get("condition"),
                "Rain (yes/no)": entry.get("rain")
            })

print(f"\nCSV saved at: {csv_path}")


Fetching data for 2024-01-01...
Analyzing weather for 2024-01-01...
Fetching data for 2024-01-02...
Analyzing weather for 2024-01-02...
Fetching data for 2024-01-03...
Analyzing weather for 2024-01-03...
Fetching data for 2024-01-04...
Analyzing weather for 2024-01-04...
Fetching data for 2024-01-05...
Analyzing weather for 2024-01-05...
Fetching data for 2024-01-06...
Analyzing weather for 2024-01-06...
Fetching data for 2024-01-07...
Analyzing weather for 2024-01-07...
Fetching data for 2024-01-08...
Analyzing weather for 2024-01-08...
Fetching data for 2024-01-09...
Analyzing weather for 2024-01-09...
Fetching data for 2024-01-10...
Analyzing weather for 2024-01-10...
Fetching data for 2024-01-11...
Analyzing weather for 2024-01-11...
Fetching data for 2024-01-12...
Analyzing weather for 2024-01-12...
Fetching data for 2024-01-13...
Analyzing weather for 2024-01-13...
Fetching data for 2024-01-14...
Analyzing weather for 2024-01-14...
Fetching data for 2024-01-15...
Analyzing weathe

In [20]:
import csv
import os
from collections import Counter

def analyze_weather_from_csv(csv_filepath):
    """
    Reads weather data from a CSV file, calculates average temperature,
    finds the most common condition, and checks for rain.
    """
    daily_data = {}
    with open(csv_filepath, mode='r', newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            date = row['Date']
            try:
                temperature = float(row['Temperature']) if row['Temperature'] else None
            except ValueError:
                temperature = None
            condition = row['Condition']
            rain = row['Rain (yes/no)']

            if date not in daily_data:
                daily_data[date] = {'temperatures': [], 'conditions': [], 'rained': 'no'}

            if temperature is not None:
                daily_data[date]['temperatures'].append(temperature)
            if condition:
                daily_data[date]['conditions'].append(condition)
            if rain == 'yes':
                daily_data[date]['rained'] = 'yes'

    summary_data = {}
    for date, data in daily_data.items():
        avg_temp = sum(data['temperatures']) / len(data['temperatures']) if data['temperatures'] else None
        most_common_condition = Counter(data['conditions']).most_common(1)[0][0] if data['conditions'] else None
        summary_data[date] = {
            'Date': date,
            'Average Temperature': avg_temp,
            'Most Common Condition': most_common_condition,
            'Rained': data['rained']
        }
    return list(summary_data.values())  

def save_summary_to_csv(summary_data, output_csv_filepath):
    """
    Saves the analyzed weather summary data to a new CSV file.
    """
    csv_columns = ['Date', 'Average Temperature', 'Most Common Condition', 'Rained']
    try:
        with open(output_csv_filepath, mode='w', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
            writer.writeheader()
            writer.writerows(summary_data)
        print(f"\nSummary data saved to: {output_csv_filepath}")
    except IOError:
        print("I/O error while writing to CSV")

input_csv_file_path = os.path.join("data", "weather_data_hourly.csv")  
output_csv_file_path = os.path.join("data", "wunderground_daily_summary_analyzed.csv")  

if os.path.exists(input_csv_file_path):
    analyzed_data_list = analyze_weather_from_csv(input_csv_file_path)
    save_summary_to_csv(analyzed_data_list, output_csv_file_path)
else:
    print(f"Input CSV file not found at: {input_csv_file_path}")


Summary data saved to: data\wunderground_daily_summary_analyzed.csv



Summary data saved to: data\wunderground_daily_summary_analyzed.csv
