In [95]:
import pandas as pd
import numpy as np
from pathlib import Path

In [197]:
def generate_site_files(path_to_master_csv: str, printOutput=False):
    """
    This function generates separate csv files for each station found in the master_csv. 
    
    path_to_master_csv: (string) that contains path to the 2017-2.csv file 
    
    return: new csv file for each unique station found in the master_csv.
        Filename is 2017-(StationID).csv
    """
    
    try:
        # Use Path class for compatibility with both Unix and Windows platforms
        # Ensure file path exists
        path_to_master_csv = Path(path_to_master_csv)
        if Path.exists(path_to_master_csv) == False: 
            raise ValueError
    
         # Open and read the file into a pandas csv
        master_file = pd.read_csv(path_to_master_csv)
        print(f"Reading {path_to_master_csv} ... ")

        # Grab unique stations 
        unique_stations = master_file.StationID.unique()

        for i in np.nditer(unique_stations):

            # Create a csv for the current station
            # Write all entries for current station in one file
            curr_file = open(Path(f"2017-{i}.csv"), 'w')
            curr_series = master_file[master_file.StationID == i]
            curr_series.to_csv(curr_file)

            # Inform user of progress
            print(f"Entries for StationID: {i}")
            print("=" * 30)
            print(f"Contains {len(curr_series)} entries")

            if printOutput:
                print(master_file[master_file.StationID == i])
                print("\n\n\n")    
            print("\n")

            curr_file.close()

        print("Successfully Separated all Stations into individual CSVs")
        
    # If bad input catch and prompt user with appriopiate error
    except TypeError:
        print("Path_to_master_csv in must be a string...")
        return
    
    except ValueError:
        print(f"{path_to_master_csv} is not a valid path...")
        return
    
    except Exception:
        print("Something unexpected happened... Something might be on fire!!")
        return 

In [196]:
generate_site_files("2017-2.csv")

Path_to_master_csv in must be a string...


In [476]:
def one_hot_encode_months():
    pass

In [556]:
test = pd.read_csv("2017-330.csv")
test.local_eastern_time[0][3:6] # give value I looking for
times = test.local_eastern_time.values
months = np.array([],dtype=str)

# If there is no Month Column
# Iterate through file only once ever (add column to avoid redoing the same computation)
# Map Months to One-Hot encoded integers

# Preform lookup and monthly stats

for i in range(0, len(test), 800):
    cur_month = times[i][3:6]
    months= np.append(months, cur_month)
    
    print(cur_month, ('Jan' in month))



Jan False
Jan False
Jan False
Jan False
Feb False
Feb False
Feb False
Feb False
Mar False
Mar False
Mar False
Apr False
Apr False
Apr False
Apr False
May False
May False
May False
May False
Jun False
Jun False
Jun False
Jul False
Jul False
Jul False
Jul False
Aug False
Aug False
Aug False
Aug False
Sep False
Sep False
Sep False
Oct False
Oct False
Oct False
Oct False
Nov False
Nov False
Nov False
Nov False
Dec False
Dec False
Dec False


In [656]:
months
uni_index = np.unique(months,return_index=True)[1]
uni = [months[index] for index in sorted(uni_index)]
x = [i for i in range(len(uni))]
print(x)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]


In [645]:
# mon = pd.DataFrame()
mon = pd.DataFrame({'Months':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep','Oct', 'Nov', 'Dec']})
# pd.get_dummies(test, prefix='Months', drop_first=True)
# for i in range(len(months)):
#     mon.append( {'Months': months[i]},ignore_index=True)
data = pd.Series(data=months, name="Step")
# mon.append(data, ignore_index=True)
freq = data.groupby(data).size()

# print(freq)
# mon.append({'Counts':freq}, ,ignore_index=True)
# mon = pd.concat([mon,freq], ignore_index=True, , join='outer')
mon.merge(freq,how="inner")

ValueError: can not merge DataFrame with instance of type <class 'pandas.core.series.Series'>

In [586]:
# pd.get_dummies(mon, prefix='Months')
mon

Unnamed: 0,Months
0,Jan
1,Feb
2,Mar
3,Apr
4,May
5,Jun
6,Jul
7,Aug
8,Sep
9,Oct


In [566]:
# pd.get_dummies(test,prefix=['Months'])
safe = test.copy(deep=True)

safe = pd.concat([safe, mon])
safe

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  after removing the cwd from sys.path.


Unnamed: 0.1,Months,StationID,Unnamed: 0,local_eastern_time,rain_2m_inches,rfd_2m_wm2,rh_2m_pct,temp_air_10m_C,temp_air_2m_C,temp_air_60cm_C,temp_dp_2m_C,temp_soil_10cm_C,wind_direction_10m_deg,wind_speed_10m_mph
0,,330.0,0.0,01-Jan-2017 00:00:00,0.0,0.001,86.4,15.95,15.90,14.40,13.63,18.12,176.5,0.903
1,,330.0,1.0,01-Jan-2017 00:15:00,0.0,0.000,87.9,16.03,15.46,14.08,13.47,18.06,157.4,1.571
2,,330.0,2.0,01-Jan-2017 00:30:00,0.0,0.000,89.1,16.07,15.39,14.01,13.60,17.99,153.1,1.687
3,,330.0,3.0,01-Jan-2017 00:45:00,0.0,0.000,89.8,15.81,15.19,13.96,13.53,17.93,160.3,1.221
4,,330.0,4.0,01-Jan-2017 01:00:00,0.0,0.000,90.6,15.68,15.06,13.90,13.54,17.87,175.6,1.207
5,,330.0,5.0,01-Jan-2017 01:15:00,0.0,0.000,91.4,15.85,15.31,14.11,13.92,17.81,165.0,1.096
6,,330.0,6.0,01-Jan-2017 01:30:00,0.0,0.000,91.8,15.86,15.36,14.10,14.03,17.75,166.4,0.981
7,,330.0,7.0,01-Jan-2017 01:45:00,0.0,0.000,92.2,15.67,15.22,13.95,13.96,17.69,170.1,0.691
8,,330.0,8.0,01-Jan-2017 02:00:00,0.0,0.000,92.7,15.35,15.14,13.82,13.97,17.63,140.4,0.631
9,,330.0,9.0,01-Jan-2017 02:15:00,0.0,0.000,93.1,15.29,14.91,13.63,13.81,17.58,153.8,0.782
