In [16]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import json
import pandas as pd
from datetime import timedelta, date
from tqdm import tqdm

start_date = date(2019, 1, 1)
end_date = date(2019, 10, 1)

def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

def daterange(start_date, end_date):
    for n in tqdm(range(int ((end_date - start_date).days))):
        yield start_date + timedelta(n)

write_header = 1
write_header_file = 1 # change to 0 to continue without writing header to csv
for single_date in daterange(start_date, end_date):
    df_daily = pd.DataFrame()
    ymd = single_date.strftime("%Y%m%d")
    url_test = "https://api-ak.wunderground.com/api/606f3f6977348613/history_"+ymd+"null/units:metric/v:2.0/q/pws:IKAMPHAE2.json"
#     url_test ="http://api.wunderground.com/api/91b595b129984cc5b595b12998bcc57f/history/q/TH/IBANGY1.json"
    req = requests.get(url_test)
    js = json.loads(req.text)
    if js['history']['days'] == []:
        print("empty data on",ymd)
        continue
    for ob in js['history']['days'][0]['observations']:
        ob['date'] = ob['date']['iso8601']
        df_row = pd.DataFrame(ob, index=[0]) 
        df_row.index = df_row['date']
        df_row = df_row.drop(columns=['date'])
        if write_header == 1:
            df_daily = df_row
            write_header = 0
        else:
            df_daily = df_daily.append(df_row)

    with open('salaya_weather.csv','a+') as fd:
        fd.write(df_daily.to_csv(header=write_header_file))
        write_header_file = 0

  0%|                                                                                          | 0/273 [00:01<?, ?it/s]


KeyError: 'history'

In [1]:
import requests
import pandas as pd
from dateutil import parser, rrule
from datetime import datetime, time, date
import time

def getRainfallData(station, day, month, year):
    """
    Function to return a data frame of minute-level weather data for a single Wunderground PWS station.
    
    Args:
        station (string): Station code from the Wunderground website
        day (int): Day of month for which data is requested
        month (int): Month for which data is requested
        year (int): Year for which data is requested
    
    Returns:
        Pandas Dataframe with weather data for specified station and date.
    """
    url = "http://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID={station}&day={day}&month={month}&year={year}&graphspan=day&format=1"
    full_url = url.format(station=station, day=day, month=month, year=year)
    # Request data from wunderground data
    response = requests.get(full_url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
    data = response.text
    # remove the excess <br> from the text data
    data = data.replace('<br>', '')
    # Convert to pandas dataframe (fails if issues with weather station)
    try:
        dataframe = pd.read_csv(io.StringIO(data), index_col=False)
        dataframe['station'] = station
    except Exception as e:
        print("Issue with date: {}-{}-{} for station {}".format(day,month,year, station))
        return None
    return dataframe
    
# Generate a list of all of the dates we want data for
start_date = "2019-01-01"
end_date = "2019-11-01"
start = parser.parse(start_date)
end = parser.parse(end_date)
dates = list(rrule.rrule(rrule.DAILY, dtstart=start, until=end))

# Create a list of stations here to download data for
stations = ["VTBD"]
# Set a backoff time in seconds if a request fails
backoff_time = 10
data = {}

# Gather data for each station in turn and save to CSV.
for station in stations:
    print("Working on {}".format(station))
    data[station] = []
    for date in dates:
        # Print period status update messages
        if date.day % 10 == 0:
            print("Working on date: {} for station {}".format(date, station))
        done = False
        while done == False:
            try:
                weather_data = getRainfallData(station, date.day, date.month, date.year)
                done = True
            except ConnectionError as e:
                # May get rate limited by Wunderground.com, backoff if so.
                print("Got connection error on {}".format(date))
                print("Will retry in {} seconds".format(backoff_time))
                time.sleep(10)
        # Add each processed date to the overall data
        data[station].append(weather_data)
    # Finally combine all of the individual days and output to CSV for analysis.
    pd.concat(data[station]).to_csv("data/{}_weather.csv".format(station))


Working on VTBD
Issue with date: 1-1-2019 for station VTBD
Issue with date: 2-1-2019 for station VTBD
Issue with date: 3-1-2019 for station VTBD
Issue with date: 4-1-2019 for station VTBD
Issue with date: 5-1-2019 for station VTBD
Issue with date: 6-1-2019 for station VTBD
Issue with date: 7-1-2019 for station VTBD
Issue with date: 8-1-2019 for station VTBD
Issue with date: 9-1-2019 for station VTBD
Working on date: 2019-01-10 00:00:00 for station VTBD
Issue with date: 10-1-2019 for station VTBD
Issue with date: 11-1-2019 for station VTBD
Issue with date: 12-1-2019 for station VTBD
Issue with date: 13-1-2019 for station VTBD
Issue with date: 14-1-2019 for station VTBD
Issue with date: 15-1-2019 for station VTBD
Issue with date: 16-1-2019 for station VTBD
Issue with date: 17-1-2019 for station VTBD
Issue with date: 18-1-2019 for station VTBD
Issue with date: 19-1-2019 for station VTBD
Working on date: 2019-01-20 00:00:00 for station VTBD
Issue with date: 20-1-2019 for station VTBD
Issue

Issue with date: 19-6-2019 for station VTBD
Working on date: 2019-06-20 00:00:00 for station VTBD
Issue with date: 20-6-2019 for station VTBD
Issue with date: 21-6-2019 for station VTBD
Issue with date: 22-6-2019 for station VTBD
Issue with date: 23-6-2019 for station VTBD
Issue with date: 24-6-2019 for station VTBD
Issue with date: 25-6-2019 for station VTBD
Issue with date: 26-6-2019 for station VTBD
Issue with date: 27-6-2019 for station VTBD
Issue with date: 28-6-2019 for station VTBD
Issue with date: 29-6-2019 for station VTBD
Working on date: 2019-06-30 00:00:00 for station VTBD
Issue with date: 30-6-2019 for station VTBD
Issue with date: 1-7-2019 for station VTBD
Issue with date: 2-7-2019 for station VTBD
Issue with date: 3-7-2019 for station VTBD
Issue with date: 4-7-2019 for station VTBD
Issue with date: 5-7-2019 for station VTBD
Issue with date: 6-7-2019 for station VTBD
Issue with date: 7-7-2019 for station VTBD
Issue with date: 8-7-2019 for station VTBD
Issue with date: 9-7

ValueError: All objects passed were None

In [None]:
from wwo_hist import 
import os
os.chdir("C:\Users\Sunat\Documents\GitHub\PM2.5")
frequency=3
start_date = '11-DEC-2018'
end_date = '11-NOV-2019'
api_key = '2eb5775ee94e4b648e2171109190811'
location_list = ['thailand','california']

hist_weather_data = retrieve_hist_data(api_key,
                                location_list,
                                start_date,
                                end_date,
                                frequency,
                                location_label = False,
                                export_csv = True,
                                store_df = True)