In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [10]:
# Function used for generating a URL for a specific station of a given period from http://www.rcc-acis.org/docs_webservices.html.

def URLGenerate(Station, StartDate, EndDate):
    URLgenerated = f"http://data.rcc-acis.org/StnData?sid={Station}&sdate={StartDate}&edate={EndDate}&elems=1,2,43,4&output=csv"
    
    return URLgenerated

In [11]:
# Try the funciton "URLGenerate" with a given station and period including start date and end date.

Station = "LAX"
StartDate = "2018-01-01"
EndDate = "2018-12-31"
URL = URLGenerate(Station, StartDate, EndDate)

print(URL)

http://data.rcc-acis.org/StnData?sid=LAX&sdate=2018-01-01&edate=2018-12-31&elems=1,2,43,4&output=csv


In [12]:
# Try to read the generated URL by pd.read_csv.

Data = pd.read_csv(URL, sep = ",", skiprows = 1, header = None)
Data["Station"] = Station

print(Data)

              0   1   2     3     4 Station
0    2018-01-01  67  48  57.5  0.00     LAX
1    2018-01-02  76  54  65.0  0.00     LAX
2    2018-01-03  76  54  65.0  0.00     LAX
3    2018-01-04  74  55  64.5  0.00     LAX
4    2018-01-05  69  56  62.5  0.00     LAX
..          ...  ..  ..   ...   ...     ...
360  2018-12-27  67  48  57.5  0.00     LAX
361  2018-12-28  61  49  55.0  0.00     LAX
362  2018-12-29  61  40  50.5  0.00     LAX
363  2018-12-30  63  42  52.5  0.00     LAX
364  2018-12-31  64  48  56.0  0.00     LAX

[365 rows x 6 columns]


In [13]:
# Function used to read the URL that specifified a station and a given period including start date and end date.

def ReadURLData(Station, StartDate, EndDate):
    URL = URLGenerate(Station, StartDate, EndDate)
    df = pd.read_csv(URL, sep = ",", skiprows = 1, header = None)
    df.columns = ["Date", "MaxTemp", "MinTemp", "AverTemp", "Preci"]
    df["Station"] = Station
    df.Date = pd.to_datetime(df.Date)
    df.MaxTemp = pd.to_numeric(df.MaxTemp, errors = "coerce", downcast = "float")
    df.MinTemp = pd.to_numeric(df.MinTemp, errors = "coerce", downcast = "float")
    df.AverTemp = pd.to_numeric(df.AverTemp, errors = "coerce", downcast = "float")
    df.Preci = pd.to_numeric(df.Preci, errors = "coerce")
    df.set_index("Date",inplace = True)
    
    return df

In [14]:
# Try to read the generated URL with the function in the previous cell. 

Stationlist = ["LAX", "LNK", "ORD", "SFO"] # the given list of the stations in this project.
StartDate = "2018-01-01"                   # the given start date in this project.
EndDate = "2018-12-31"                     # the given start date in this project.

Lax = ReadURLData(Stationlist[0], StartDate, EndDate)

Lax

Unnamed: 0_level_0,MaxTemp,MinTemp,AverTemp,Preci,Station
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-01,67.0,48.0,57.5,0.0,LAX
2018-01-02,76.0,54.0,65.0,0.0,LAX
2018-01-03,76.0,54.0,65.0,0.0,LAX
2018-01-04,74.0,55.0,64.5,0.0,LAX
2018-01-05,69.0,56.0,62.5,0.0,LAX
...,...,...,...,...,...
2018-12-27,67.0,48.0,57.5,0.0,LAX
2018-12-28,61.0,49.0,55.0,0.0,LAX
2018-12-29,61.0,40.0,50.5,0.0,LAX
2018-12-30,63.0,42.0,52.5,0.0,LAX


In [15]:
# Require 1: return a dataframe that includes the daily climate data for a list of stations between a given period

all_df = pd.DataFrame(columns = ["MaxTemp", "MinTemp", "AverTemp", "Preci", "Station"])
for each_station in Stationlist:
    each_station = ReadURLData(each_station, StartDate, EndDate)
    all_df = pd.concat([all_df, each_station])

all_df

Unnamed: 0,MaxTemp,MinTemp,AverTemp,Preci,Station
2018-01-01,67.0,48.0,57.5,0.0,LAX
2018-01-02,76.0,54.0,65.0,0.0,LAX
2018-01-03,76.0,54.0,65.0,0.0,LAX
2018-01-04,74.0,55.0,64.5,0.0,LAX
2018-01-05,69.0,56.0,62.5,0.0,LAX
...,...,...,...,...,...
2018-12-27,59.0,47.0,53.0,0.0,SFO
2018-12-28,58.0,44.0,51.0,0.0,SFO
2018-12-29,60.0,41.0,50.5,0.0,SFO
2018-12-30,59.0,45.0,52.0,0.0,SFO


In [16]:
# Require 2: return a dataframe that includes the pentad mean climate data for a list of stations between a given period
Pentad_mean_data = all_df.groupby(["Station"]).resample('5D').mean()
Pentad_mean_data

Unnamed: 0_level_0,Unnamed: 1_level_0,MaxTemp,MinTemp,AverTemp,Preci
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LAX,2018-01-01,72.400002,53.400002,62.900002,0.000
LAX,2018-01-06,65.400002,55.400002,60.400002,0.350
LAX,2018-01-11,73.400002,53.799999,63.599998,0.000
LAX,2018-01-16,68.000000,53.799999,60.900002,0.000
LAX,2018-01-21,68.400002,47.000000,57.700001,0.000
...,...,...,...,...,...
SFO,2018-12-07,57.799999,44.400002,51.099998,0.004
SFO,2018-12-12,61.599998,48.400002,55.000000,0.118
SFO,2018-12-17,58.400002,48.200001,53.299999,0.022
SFO,2018-12-22,58.000000,46.799999,52.400002,0.088


In [21]:
# Require 3: return a dataframe that includes the monthly mean climate data for a given county and a given period

# The code of a county can be found by searching "FIPS county code" in Google, which was 5-digit id.
# Here is a link as well. https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt

Countycode = "31109" # code of Lancaster County, NE used in this project

url = f"http://data.rcc-acis.org/StnMeta?county={Countycode}&output=csv"
url


'http://data.rcc-acis.org/StnMeta?county=31109&output=csv'

In [22]:
Meta_county = pd.read_csv(url, sep = ",")
Meta_county

Unnamed: 0,252935,FIRTH,NE,-96.608060,40.535000,1340
0,253523,HALLAM 3 W,NE,-96.85000,40.53333,1401.0
1,253923,HOLLAND,NE,-96.58333,40.58333,1352.0
2,253829,HICKMAN 2 SW,NE,-96.63333,40.58333,
3,256948,PRINCETON,NE,-96.70000,40.56667,1430.0
4,253524,HALLAM 2 N,NE,-96.78333,40.56667,1450.0
...,...,...,...,...,...,...
136,US1NELA0052,LINCOLN 1.2 ENE,NE,-96.66780,40.82411,1207.0
137,US1NELA0055,LINCOLN 4.4 ESE,NE,-96.61607,40.78303,1289.0
138,US1NELA0054,LINCOLN 4.6 N,NE,-96.69430,40.88230,1210.0
139,US1NELA0053,HICKMAN 1.8 NNE,NE,-96.62077,40.64552,1369.0
