In [None]:
"""
Hameed Abdul
10/3/2018
Artificial neural networks
Homework 1 - Weather station data
"""

import pandas as pd
import numpy as np
from pathlib import Path
np.set_printoptions(suppress=True)

In [None]:
def generate_site_files(path_to_master_csv: str, printOutput=False):
    """
    This function generates separate csv files for each station found in the master_csv. 
    
    path_to_master_csv: (string) that contains path to the 2017-2.csv file 
    
    return: new csv file for each unique station found in the master_csv.
        Filename is 2017-(StationID).csv
    """
    
    try:
        # Use Path class for compatibility with both Unix and Windows platforms
        # Ensure file path exists
        path_to_master_csv = Path(path_to_master_csv)
        if Path.exists(path_to_master_csv) == False: 
            raise ValueError
    
         # Open and read the file into a pandas csv
        master_file = pd.read_csv(path_to_master_csv)
        print(f"Reading {path_to_master_csv} ... ")

        # Grab unique stations 
        unique_stations = master_file.StationID.unique()

        for i in np.nditer(unique_stations):

            # Create a csv for the current station
            # Write all entries for current station in one file
            curr_file = open(Path(f"2017-{i}.csv"), 'w')
            curr_series = master_file[master_file.StationID == i]
            curr_series.to_csv(curr_file)

            # Inform user of progress
            print(f"Entries for StationID: {i}")
            print("=" * 30)
            print(f"Contains {len(curr_series)} entries")

            if printOutput:
                print(master_file[master_file.StationID == i])
                print("\n\n\n")    
            print("\n")

            curr_file.close()

        print("Successfully Separated all Stations into individual CSVs")
        
    # If bad input catch and prompt user with appriopiate error
    except TypeError:
        print("Path_to_master_csv in must be a string...")
        return
    
    except ValueError:
        print(f"{path_to_master_csv} is not a valid path...")
        return
    
    except Exception:
        print("Something unexpected happened... Something might be on fire!!")
        return 

In [None]:
generate_site_files("2017-2.csv")

In [None]:
def map_local_time_to_month(station_file, month_dict, path_to_csv):
    """
    station_file: pandas dataframe
    month_dict: dictionary to be used for lookups
    path_to_csv: path where the csv is saved
    
    Returns: Updated Csv file with month index for future queries
    """
    print("Using Timestamp to map entry to appriopraite month....")
    
    station_record_time = station_file.local_eastern_time.values
    
    # Empty numpy arrays to be filled
    month_names = np.array([],dtype=str)
    month_index = np.array([],dtype=int)
    
    # Map each record to a month index and populate arrays
    for i in range(0, len(station_record_time)):
        cur_month = station_record_time[i][3:6]
        month_names = np.append(month_names, cur_month)
        month_index = np.append(month_index, month_dict[cur_month])

    # Add the mapped month value to the dataframe
    # Write to the csv
    station_file.insert(1, 'MonthID', month_index)
    station_file.to_csv(path_to_csv)

In [None]:
class NotNumber(Exception):
    pass

class BadPath(Exception):
    pass

class OutOfIndex(Exception):
    pass

In [None]:
def stations_stats_by_month(path_to_csv, lookup_month_index):
    """
        path_to_csv: path where the csv is store
        lookup_month_index: query month for month stats
        
        Returns: Prints the max,mean and min of each station attribute that took place during the month given by the user 
    """

    try:
        # Ensure the path exist and the user input is valid
        path_to_csv = Path(path_to_csv)
        
        if Path.exists(path_to_csv) == False:
            raise BadPath()
        
        if lookup_month_index is int == False:
            raise NotNumber()
        
        if lookup_month_index > 12 or lookup_month_index < 1:
            raise OutOfIndex()
        
        # Dictionary to map string value to month index
        month_dict = {'Jan': 1 , 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}
        
        # Open and read csv into a panda dataframe
        station_file = pd.read_csv(path_to_csv, index_col= 0)
        print(f"Reading data from {path_to_csv} ....")    
            
        # Check to see if this file has been touched for monthly stats before
        
        try: 
            (station_file.MonthID is None)
         
        except AttributeError:
            # If there is no Month Column
            # Iterate through file only once ever (add column to avoid redoing the same computation)
            print("This file does not have a Month Index. Creating one for monthly report")
            map_local_time_to_month(station_file, month_dict, path_to_csv)
        
        # Store entries for matching month
        query_month = station_file[station_file.MonthID == lookup_month_index]
        
        # Reference each individual column for stats
        station_id = station_file.StationID.values[0]
        soil = query_month.temp_soil_10cm_C
        air_60cm = query_month.temp_air_60cm_C
        air_2m = query_month.temp_air_2m_C
        air_10m = query_month.temp_air_10m_C
        rh_2m = query_month.rh_2m_pct
        dp = query_month.temp_dp_2m_C
        rain = query_month.rain_2m_inches
        wind_speed = query_month.wind_speed_10m_mph
        wind_dir =query_month.wind_direction_10m_deg
        rfd = query_month.rfd_2m_wm2
        
        # Store numpy refs in list for elegantly calculate stats
        vars_to_call = [ soil, air_60cm, air_2m, air_10m, rh_2m, dp, rain, wind_speed, wind_dir, rfd] 

        # Look up stats for each column and store in numpy array
        max_vals = np.array([vars_to_call[i].max() for i in range(len(vars_to_call))])
        mean_vals = np.array([vars_to_call[i].mean() for i in range(len(vars_to_call))])
        min_vals = np.array([vars_to_call[i].min() for i in range(len(vars_to_call))])

        # Reshape and combine individual metric arrays into a table for the Pandas Dataframe
        stats = max_vals.reshape(1,10)
        stats = np.append(stats, mean_vals.reshape(1,10), axis=0)
        stats = np.append(stats, min_vals.reshape(1,10), axis=0)

        # Labels for the Pandas dataframe
        col_names = ['temp_soil_10cm_C', 'temp_air_60cm_C','temp_air_2m_C', 'temp_air_10m_C', 'rh_2m_pct', 'temp_dp_2m_C', 'rain_2m_inches', 'wind_speed_10m_mph', 'wind_direction_10m_deg', 'rfd_2m_wm2']
        index_names = ['Max', 'Mean', 'Min']

        # Store every thing in the Dataframe 
        monthly_report = pd.DataFrame(data=stats, index=index_names, columns=col_names)
        
        month = list(month_dict.keys())[list(month_dict.values()).index(lookup_month_index)]
        print('\n'*2)
        print(f"Station {station_id} Stats for {month} 2017")
        print('==' * 20)
    
        return monthly_report
    
    
    # If bad input catch and prompt user with appriopiate error
    except BadPath:
        print(f"{path_to_csv} is not a valid path...")
        return
    
    except OutOfIndex:
        print(f"Lookup Month Index must be between 1 and 12(including). You entered in {lookup_month_index}")
        return 
    
    except NotNumber:
        print(f"Month index must be a number. {lookup_month_index} is not a number")
        return 
    
    except Exception:
        print("Something unexpected happened... Something might be on fire!!")
        return 
    

In [None]:
stations_stats_by_month("2017-410.csv", 2)

In [None]:
def solar_rad_stats_station410():
    """
    Returns: Solar Radiation Stats for Station 410
    """
    
    csv_path = Path("2017-410.csv")
    df = pd.read_csv(csv_path, index_col= 0)

    sol_rad = df.rfd_2m_wm2.values
    max_sol = sol_rad.max()
    mean_sol = sol_rad.mean()
    min_sol = sol_rad.min()

    print(f"Station 410")
    print("=" * 15)
    print(f"Max: {max_sol}")
    print(f"Mean: {mean_sol}")
    print(f"Min: {min_sol}")

In [None]:
solar_rad_stats_station410()