In [10]:
from config import API_KEY

In [11]:
import requests
import os
import pandas as pd
from datetime import datetime, timedelta

---

<h2 align="center">Gather New Data</h2>


---


In [14]:
def get_weather_data(city_name, state_code):
    
    '''
        This function does the following:
            - Gathers latitude and longitude based on city_name and state_code
            - Takes the latitude and longitude and collects the city and state name
            - Requests weather data for the past year
            - Collects useful data and saves as a csv file
    '''
    
    response = requests.get(f'http://api.openweathermap.org/geo/1.0/direct?q={city_name},{state_code},US&limit=1&appid={API_KEY}')
    
    lat = (response.json()[0]['lat'])
    lon = (response.json()[0]['lon'])
    
    response = requests.get(f"http://api.openweathermap.org/geo/1.0/reverse?lat={lat}&lon={lon}&limit=1&appid={API_KEY}")

    city, state = response.json()[0]['name'], response.json()[0]['state']

    end_date = datetime.now() - timedelta(days=1)
    start_date = end_date - timedelta(days=2)

    data = []

    while start_date <= end_date:
        date_str = start_date.strftime('%Y-%m-%d')
        url = f'https://api.openweathermap.org/data/3.0/onecall/day_summary?lat={lat}&lon={lon}&date={date_str}&appid={API_KEY}&units=imperial'

        response = requests.get(url)
        
        if response.status_code == 200:
            data.append(
                {
                    'city': city,
                    'state': state,
                    'date': datetime.strptime(response.json()['date'], '%Y-%m-%d').strftime('%m-%d-%Y'),
                    'humidity (%)': round(response.json()['humidity']['afternoon']),
                    'precipitation (in)': response.json()['precipitation']['total'],
                    'avg_temp (°F)': round((response.json()['temperature']['min'] + response.json()['temperature']['max']) / 2),
                    'pressure (hPa)': round(response.json()['pressure']['afternoon']),
                    'wind (mph)': round(response.json()['wind']['max']['speed'])
                }
            )
            
            
        start_date += timedelta(days=1)
        
    return pd.DataFrame(data).to_csv(f'../data/{city_name.lower().replace(" ", "-")}_{state_code.lower().strip(" ")}.csv', index=False)

In [15]:
get_weather_data('Las Vegas', 'NV')

In [17]:
df = pd.read_csv('../data/las-vegas_nv.csv')
df

Unnamed: 0,city,state,date,humidity (%),precipitation (in),avg_temp (°F),pressure (hPa),wind (mph)
0,Las Vegas,Nevada,03-09-2024,20,0.0,54,1020,23
1,Las Vegas,Nevada,03-10-2024,27,0.0,55,1019,23
2,Las Vegas,Nevada,03-11-2024,31,0.0,62,1015,14


---

<h2 align="center">Gather Most Recent Data</h2>


---


In [18]:
def get_new_weather_data(file_path):
    
    '''
        This function is like the previous get_weather_data but now it only collects new data. 
        It looks for the latest date in the CSV file and gathers information from dates after that.
        New data is added to the existing CSV file.
    '''
    city, state_ext = os.path.basename(file_path).split('_')[:2]
    city_name, state_code = city.replace('-', ' ').title(), state_ext.upper().split('.')[0]
    
    response = requests.get(f'http://api.openweathermap.org/geo/1.0/direct?q={city_name},{state_code},US&limit=1&appid={API_KEY}')
    
    lat, lon = (response.json()[0]['lat']), (response.json()[0]['lon'])
    
    response = requests.get(f"http://api.openweathermap.org/geo/1.0/reverse?lat={lat}&lon={lon}&limit=1&appid={API_KEY}")

    city, state = response.json()[0]['name'], response.json()[0]['state']

    data = pd.read_csv(file_path)
    data['date'] = pd.to_datetime(data['date'], format='%m-%d-%Y')
    
    end_date = datetime.now() - timedelta(days=1)
    start_date = max(data['date']) + timedelta(days=1)

    data = []

    while start_date <= end_date:
        date_str = start_date.strftime('%Y-%m-%d')
        url = f'https://api.openweathermap.org/data/3.0/onecall/day_summary?lat={lat}&lon={lon}&date={date_str}&appid={API_KEY}&units=imperial'

        response = requests.get(url)
        
        if response.status_code == 200:
            data.append(
                {
                    'city': city,
                    'state': state,
                    'date': datetime.strptime(response.json()['date'], '%Y-%m-%d').strftime('%m-%d-%Y'),
                    'humidity (%)': round(response.json()['humidity']['afternoon']),
                    'precipitation (in)': response.json()['precipitation']['total'],
                    'avg_temp (°F)': round((response.json()['temperature']['min'] + response.json()['temperature']['max']) / 2),
                    'pressure (hPa)': round(response.json()['pressure']['afternoon']),
                    'wind (mph)': round(response.json()['wind']['max']['speed'])
                }
            )
             
        start_date += timedelta(days=1)
        
    return pd.DataFrame(data).to_csv(file_path, index=False, mode='a', header=False)

In [20]:
files = [os.path.join('../data', file) for file in os.listdir('../data') if file.endswith('.csv')]

for file_path in files:
    get_new_weather_data(file_path)