In [2]:
"""
Source: github.com/akrherz/iem/
Example script that scrapes data from the IEM ASOS download service
"""
from __future__ import print_function
import json
import time
import datetime
import numpy as np

# Python 2 and 3: alternative 4
try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

# Number of attempts to download data
MAX_ATTEMPTS = 6
# HTTPS here can be problematic for installs that don't have Lets Encrypt CA
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"


def download_data(uri):
    """Fetch the data from the IEM
    The IEM download service has some protections in place to keep the number
    of inbound requests in check.  This function implements an exponential
    backoff to keep individual downloads from erroring.
    Args:
      uri (string): URL to fetch
    Returns:
      string data
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        try:
            data = urlopen(uri, timeout=300).read().decode("utf-8")
            if data is not None and not data.startswith("ERROR"):
                return data
        except Exception as exp:
            print("download_data(%s) failed with %s" % (uri, exp))
            time.sleep(5)
        attempt += 1

    print("Exhausted attempts to download, returning empty data")
    return ""


def get_stations_from_filelist(filename):
    """Build a listing of stations from a simple file listing the stations.
    The file should simply have one station per line.
    """
    stations = []
    for line in open(filename):
        stations.append(line.strip())
    return stations


def get_stations_from_networks():
    """Build a station list by using a bunch of IEM networks."""
    stations = []
    states = """TX"""
    networks = []
    for state in states.split():
        networks.append("%s_ASOS" % (state,))

    for network in networks:
        # Get metadata
        uri = (
            "https://mesonet.agron.iastate.edu/geojson/network/%s.geojson"
        ) % (network,)
        data = urlopen(uri)
        jdict = json.load(data)
        for site in jdict["features"]:
            stations.append(site["properties"]["sid"])
    return stations


def download_alldata():
    """An alternative method that fetches all available data.
    Service supports up to 24 hours worth of data at a time."""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2021, 3, 15)
    endts = datetime.datetime(2022, 4, 30)
    interval = datetime.timedelta(hours=24)

    service = SERVICE + "data=skyc1&tz=Etc/UTC&format=comma&latlon=yes&"

    now = startts
    while now < endts:
        thisurl = service
        thisurl += now.strftime("year1=%Y&month1=%m&day1=%d&")
        thisurl += (now + interval).strftime("year2=%Y&month2=%m&day2=%d&")
        print(thisurl)
        print("Downloading (alldata): %s" % (now,))
        data = download_data(thisurl)
        outfn = "%s.txt" % (now.strftime("%Y%m%d"),)
        with open(outfn, "w") as fh:
            fh.write(data)
        now += interval


def main():
    """Our main method"""
    # timestamps in UTC to request data for
    startts = datetime.datetime(2021, 3, 15)
    endts = datetime.datetime(2022, 4, 30)

    service = SERVICE + "data=skyc1&tz=Etc/UTC&format=comma&latlon=yes&"

    service += startts.strftime("year1=%Y&month1=%m&day1=%d&")
    service += endts.strftime("year2=%Y&month2=%m&day2=%d&")

    # Two examples of how to specify a list of stations
    stations = get_stations_from_networks()
    print(len(stations))
    
#     num_stations = 10
#     # select 10 random stations
#     np.random.seed(1)
#     stations = np.random.randint(low=0, high=len(stations))
#     stations = 1
    #stations = ["TME", "FWS", "BSM"]
    stations = ["TME", "ATT", "EDC"]
    # stations = get_stations_from_filelist("mystations.txt")
    for station in stations:
        uri = "%s&station=%s" % (service, station)
        print("Downloading (main): %s" % (station,))
        data = download_data(uri)
        outfn = "%s_%s_%s.txt" % (
            station,
            startts.strftime("%Y%m%d%H%M"),
            endts.strftime("%Y%m%d%H%M"),
        )
        out = open(outfn, "w")
        out.write(data)
        out.close()


if __name__ == "__main__":
    #download_alldata()
    main()

230
Downloading (main): TME
we are here?
Downloading (main): ATT
we are here?
Downloading (main): EDC
we are here?


In [6]:
import pandas as pd
# Use the data of one station to fill in missing data of the other station:
data0 = pd.read_csv("./TX_weather/EDC_202103150000_202204300000.txt")
data1 = pd.read_csv("./TX_weather/ATT_202103150000_202204300000.txt")
data2 = pd.read_csv("./TX_weather/TME_202103150000_202204300000.txt")

In [8]:
print(len(data0), len(data1), len(data2))

24523 10924 25796


In [7]:
timelist = []
skyc1 = []

for year in [2021, 2022]:
    for month in range(12):
        for day in range(31):
            for hour in range(24):
                for minute in range(60):
                    time = str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2) + " " + str(hour).zfill(2) + ":" + str(minute).zfill(2)
                    
                    if time in data0.valid.values:                       
                        i = data0[(data0.valid == time)].index
                        if data0.skyc1[i[0]] != "M": # Check for missing value
                            skyc1.append(data0.skyc1[i[0]])
                            timelist.append(time)

                    if time in data1.valid.values:                       
                        i = data1[(data1.valid == time)].index
                        if data1.skyc1[i[0]] != "M": # Check for missing value
                            skyc1.append(data1.skyc1[i[0]])
                            timelist.append(time)
                        
                    if time in data2.valid.values:                       
                        i = data2[(data2.valid == time)].index
                        if data2.skyc1[i[0]] != "M": # Check for missing value
                            skyc1.append(data2.skyc1[i[0]])
                            timelist.append(time)

0
1
2
3
4
5
6
7
8
9
10
11
0
1
2
3
4
5
6
7
8
9
10
11


In [8]:
with open("weather_times_march15_april30.txt", "w") as f:
    for item in timelist:
        f.write("%s\n" % item)

In [9]:
with open("weather_skyc1_march15_april30.txt", "w") as f:
    for item in skyc1:
        f.write("%s\n" % item)