In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [5]:
# Function used for generating a URL for a station of a given period from http://www.rcc-acis.org/docs_webservices.html.

def URLGenerate(Station, StartDate, EndDate):
    URLgenerated = f"http://data.rcc-acis.org/StnData?sid={Station}&sdate={StartDate}&edate={EndDate}&elems=1,2,43,4&output=csv"
    
    return URLgenerated

In [6]:
# Function used to read the URL that specifified a station and a given period including start date and end date.

def ReadURLData(Station, StartDate, EndDate):
    
    # create the URL using the URLGenerate function for the specific station and a given period.
    URL = URLGenerate(Station, StartDate, EndDate)
    
    # read the url as a dataframe.
    df = pd.read_csv(URL, sep = ",", skiprows = 1, header = None)
    
    # create a empty dataframe with columns' names.
    df.columns = ["Date", "MaxTemp", "MinTemp", "AverTemp", "Preci"]
    
    # add a columns named Station to identify the Station.
    df["Station"] = Station
    
    # set the datetime column as the index of the dataframe.
    df.Date = pd.to_datetime(df.Date)
    df.set_index("Date", inplace = True)
    
    # change the type of other columns in the dataframe to numeric for the calculation,
    df.MaxTemp = pd.to_numeric(df.MaxTemp, errors = "coerce", downcast = "float")
    df.MinTemp = pd.to_numeric(df.MinTemp, errors = "coerce", downcast = "float")
    df.AverTemp = pd.to_numeric(df.AverTemp, errors = "coerce", downcast = "float")
    df.Preci = pd.to_numeric(df.Preci, errors = "coerce", downcast = "float")
    
    
    return df

In [7]:
# Specifying the given period and a list of station for question 1 and 2, and a given county for question 3.

# The given start date in this project.
StartDate = "2018-01-01"

# hTe given start date in this project.
EndDate = "2018-12-31"

# Stationlist used for the question 1 and question 2.
# The given list of the stations in this project.
Stationlist = ["LAX", "LNK", "ORD", "SFO"] 

# County used for the question 3.
# The code of a county can be found by searching "FIPS county code" in Google, which was 5-digit id.
# Here is a link as well. https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt
# Code of Lancaster County, NE used in this project
Countycode = "31109" 


In [8]:
# Require 1: return a dataframe that includes the daily climate data for a list of stations between a given period

def dailydata(Stationlist, StartDate, EndDate):
    
    # create a empty dataframe with columns' names.
    all_df = pd.DataFrame(columns = ["MaxTemp", "MinTemp", "AverTemp", "Preci", "Station"])
    
    # run the ReadURLData function iteratively for the stations in the Stationlist.
    for each_station in Stationlist:
        each_station = ReadURLData(each_station, StartDate, EndDate)
        all_df = pd.concat([all_df, each_station])
        
    return all_df

# Run the function
Daily_da = dailydata(Stationlist, StartDate, EndDate)
Daily_da

Unnamed: 0,MaxTemp,MinTemp,AverTemp,Preci,Station
2018-01-01,67.0,48.0,57.5,0.0,LAX
2018-01-02,76.0,54.0,65.0,0.0,LAX
2018-01-03,76.0,54.0,65.0,0.0,LAX
2018-01-04,74.0,55.0,64.5,0.0,LAX
2018-01-05,69.0,56.0,62.5,0.0,LAX
...,...,...,...,...,...
2018-12-27,59.0,47.0,53.0,0.0,SFO
2018-12-28,58.0,44.0,51.0,0.0,SFO
2018-12-29,60.0,41.0,50.5,0.0,SFO
2018-12-30,59.0,45.0,52.0,0.0,SFO


In [9]:
# Require 2: return a dataframe that includes the pentad mean climate data for a list of stations between a given period
def pentadata(Daily_data):
    
    # using groupby, based on Station column to calculate the mean data for 5 days.
    Pentad_mean_data = Daily_data.groupby(["Station"]).resample('5D').mean()

    return Pentad_mean_data

# Run the function
Pentad_da = pentadata(Daily_da)
Pentad_da

Unnamed: 0_level_0,Unnamed: 1_level_0,MaxTemp,MinTemp,AverTemp,Preci
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LAX,2018-01-01,72.400002,53.400002,62.900002,0.000
LAX,2018-01-06,65.400002,55.400002,60.400002,0.350
LAX,2018-01-11,73.400002,53.799999,63.599998,0.000
LAX,2018-01-16,68.000000,53.799999,60.900002,0.000
LAX,2018-01-21,68.400002,47.000000,57.700001,0.000
...,...,...,...,...,...
SFO,2018-12-07,57.799999,44.400002,51.099998,0.004
SFO,2018-12-12,61.599998,48.400002,55.000000,0.118
SFO,2018-12-17,58.400002,48.200001,53.299999,0.022
SFO,2018-12-22,58.000000,46.799999,52.400002,0.088


In [10]:
# Require 3: return a dataframe that includes the monthly mean climate data for a given county and a given period

def monthlyCounty(Countycode, StartDate, EndDate):
    
    # create the url according to the countycode.
    url = f"http://data.rcc-acis.org/StnMeta?county={Countycode}&output=csv"
    
    # define the columns' names for the dataframe.
    columns = ["StationID", "StationName", "State", "Lon", "Lat", "Elevation"]
    
    # read the county metadata.
    Meta_county = pd.read_csv(url, sep = ",", names = columns)
    
    # create a empty dataframe with columns' names.
    all_df = pd.DataFrame(columns = ["MaxTemp", "MinTemp", "AverTemp", "Preci", "Station"])
    
    # try to use the function ReadURLData to read data for each station in the county.
    # reading data of some station raised a error, so using a try-except to read the data. 
    for index, row in Meta_county.iterrows():
        try:
            each_station = ReadURLData(row.StationID, StartDate, EndDate)
            all_df = pd.concat([all_df, each_station])
        except:
            continue
            
    print(f"Monthly mean climate including maximum temperature, minimum temperature, average temperature and precipitation, \
for the county of which the code was {Countycode} during the period starting on {StartDate}, ending on {EndDate}.")
    
    # calculate the monthly mean data for the county.
    monthlyCountyMean = all_df.resample("M").mean()
    
    return monthlyCountyMean

# Run the function
monthly_county_data = monthlyCounty(Countycode, StartDate, EndDate)
monthly_county_data

Monthly mean climate including maximum temperature, minimum temperature, average temperature and precipitation, for the county of which the code was 31109 during the period starting on 2018-01-01, ending on 2018-12-31.


Unnamed: 0,MaxTemp,MinTemp,AverTemp,Preci
2018-01-31,34.012905,12.658065,23.335484,0.02087
2018-02-28,34.221428,12.642858,23.432142,0.035883
2018-03-31,51.232258,29.23871,40.235485,0.097737
2018-04-30,57.466667,29.853333,43.66,0.027034
2018-05-31,81.593552,56.174194,68.883873,0.114284
2018-06-30,86.706665,65.046669,75.876663,0.2987
2018-07-31,85.941933,63.799999,74.870964,0.22277
2018-08-31,84.627449,62.764706,73.696075,0.216845
2018-09-30,77.933334,57.799999,67.866669,0.374116
2018-10-31,61.458065,38.793549,50.125805,0.159743
