In [2]:
import os   #used for directory work
import matplotlib.pyplot as plt #used for creating plots
import pandas as pd
import numpy as np
%matplotlib inline

In [2]:
def continuous_df(df_raw,  frequency = ".1S"):
    """
    This function takes a df and make sure that the timestamps are continous,
    If not it creates one that is, with NaN values is missing timestamps.
    
    Inputs:
        df_raw - the df with timestamestamps trimed 
        t_s - time that the df should start
        t_e - time that the df should end
        frequency - the frequency of the timestamps, def is 10Hz 
    
    Outputs:
        df/df_raw - the edited/non-edited continuous dataframe
    """
    t_s, t_e = df_raw["TIMESTAMP"][0], df_raw["TIMESTAMP"][len(df_raw)-1]
    t=list(pd.date_range(t_s, t_e, freq=frequency)) #timestamps wanted
    if len(t)==len(df_raw):
        print("There Were 0 Missing Timestamps")
        
        return(df_raw)
    
    if len(t) != len(df_raw):
        col_order = list(df_raw.columns)
        df_raw = df_raw.set_index("TIMESTAMP") 


        df = pd.DataFrame(columns = list(df_raw.columns), index = t) #create NaN df
        df.update(df_raw) # Add the data onto NaN df


        df=df.reset_index(drop=True)
        df['TIMESTAMP']= t
        df = df[col_order]
        ### Show what was accomplished
        #gaps = 0 
        #for j in range(len(df)):
        #    if np.isnan(df["RECORD"][j]):
        #        gaps+=1
        #print("There Were",gaps,"Missing Timestamps")

        return df

In [3]:
def text_finder(path):
    txt_files=[]
    all_files = os.listdir(path) #tells what directory the files are in
    for i in all_files: #reading only the .txt files 
        if i[-4:] == ".txt":
            txt_files.append(i)
    txt_files.sort() #alphabetizes them
    
    return txt_files

def file_to_df(path):
    df= pd.read_csv(path,na_values = ['NAN', "00nan"], sep=" ")
    timestamp_lst = []
    for t in range(len(df)):
        timestamp_lst.append(pd.Timestamp(str(df["YYYY"][t])+"-"+str(df["MM"][t])+"-"+str(df["DD"][t]) +" "+str(df["Hr"][t])+":"+str(df["Min"][t])+":"+str(df["Sec"][t]),freq = ".1S"))
    df["TIMESTAMP"] = timestamp_lst
    return df

In [4]:
def fire_start(df):    
    ### Finding the start of fire
    x_fire, fire_5sig = 9e10, np.nanmean(df["T"])+5*np.std(df["T"]) 
    for i in range(len(df["T"])):
        if df["T"][i] > fire_5sig:
            x_fire = i
            break
    
    return x_fire

In [18]:
n_list = [21000,15000,20000,30000,15000,6000,25000,18000,30000,60000,50000,180000,40000,25000,25000,27000,\
         45000,20000,25000,25000,35000,27000,25000,15000,35000,45000,20000,40000]
#path = "/home/JoeySeitz/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-"
#path="/home/joey/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-" #79 
path = "/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-" #w10
path_end = "/sonics/"
burns = [1,2,3,4,5,6,7,8,11,12,13,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34]

In [19]:
for file_count in range(len(burns)):
    burn_path= str(path + str(burns[file_count])+path_end)
    print(burn_path)
    txt_files = text_finder(burn_path)
    ### Load in the DF and find the begining of the fire of all sonics
    df_list, x_fire_lst = [], []
    for i in range(16):

        df = file_to_df(burn_path + txt_files[i])
        x_fire_lst.append(fire_start(df))
        df_list.append(df)

    n_fire = min(x_fire_lst)
    print(df["TIMESTAMP"][n_list[file_count]])
    print(df["TIMESTAMP"][n_fire])

/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-1/sonics/
2018-03-05 14:07:28.500000
2018-03-05 14:09:37.400000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-2/sonics/
2018-03-06 11:29:26.900000
2018-03-06 11:29:24.100000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-3/sonics/
2018-03-06 13:27:10.900000
2018-03-06 13:27:40.500000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-4/sonics/
2018-03-17 10:45:10.300000
2018-03-17 10:53:28.200000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-5/sonics/
2018-03-17 12:40:58.500000
2018-03-17 12:42:35.900000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-6/sonics/
2018-03-17 14:43:33.100000
2018-03-17 14:44:27.500000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-7/sonics/
2018-05-09 10:57:18.500000
2018-05-09 11:02:00.700000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-8/sonics/


  This is separate from the ipykernel package so we can avoid doing imports until


2018-05-09 12:49:29.400000
2018-05-09 12:52:30.400000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-11/sonics/
2018-05-11 09:41:20.700000
2018-05-11 09:42:28.800000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-12/sonics/
2018-05-11 11:56:11.300000
2018-05-11 12:01:05
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-13/sonics/
2018-05-11 14:03:37.400000
2018-05-11 14:12:10.500000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-18/sonics/
2018-09-22 14:05:43
2018-09-22 11:14:35
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-19/sonics/
2018-09-22 16:32:56
2018-09-22 15:30:33.700000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-20/sonics/
2019-05-20 14:24:50.400000
2019-05-20 13:57:54.600000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-21/sonics/
2019-05-20 16:05:47.600000
2019-05-20 16:10:29.800000
/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_D

In [22]:
burn_path= str(path + str(9)+path_end)
txt_files = text_finder(burn_path)
### Load in the DF and find the begining of the fire of all sonics
df_list, x_fire_lst = [], []
for i in range(16):

    df = file_to_df(burn_path + txt_files[i])
    x_fire_lst.append(fire_start(df))
    df_list.append(df)

n_fire = min(x_fire_lst)
print(df["TIMESTAMP"][20000])
print(df["TIMESTAMP"][n_fire])

2018-05-10 10:43:06.400000
2018-05-10 10:51:31.100000


In [7]:
pathA1 = "/Users/joeyp/Desktop/Data_Cleaning_Code/Burn-1/sonics/A1_UVWT_Burn-1.txt"
dfA1= continuous_df(pd.read_csv(pathA1,na_values = ['NAN', "00nan","NaN"], sep=" "))
pathB1 = "/Users/joeyp/Desktop/Data_Cleaning_Code/Burn-1/sonics/B1_UVWT_Burn-1.txt"
dfB1 = continuous_df(pd.read_csv(pathB1,na_values = ['NAN', "00nan","NaN"], sep=" "))
#path = "/Users/joeyp/Desktop/Data_Cleaning_Code/Burn-1/sonics/B1_UVWT_Burn-1.txt"
#dfC1 =  pd.read_csv(pathA1,na_values = ['NAN', "00nan","NaN"], sep=" ")

There Were 0 Missing Timestamps
There Were 0 Missing Timestamps


In [8]:
print(len(dfB1))
print(len(dfA1))

76453
76453


In [8]:
df2 = file_to_df("http://35.12.130.8/study/Seitz/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-1/sonics/B1_UVWT_Burn-1.txt")

In [9]:
len(df2)

76453

In [10]:
dfC = file_to_df("http://35.12.130.8/study/Seitz/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Burn-1/sonics/C1_UVWT_Burn-1.txt")

In [12]:
len(dfC)

76453

In [67]:
fill_nan = input("What to replace NaN's with? ex: 9999:")
print(type(fill_nan))

What to replace NaN's with? ex: 9999:NaN
<class 'str'>


In [52]:
path = "/Users/joeyp/Desktop/10X10_Truss_SERDP_Burns/JS_Cleaned_Data/Full-Timestamp/Burn-8/sonics/"
path_end = "_UVWT_Burn-8.txt"
nam_snc=["A1", "A2", "A3", "A4", "B1", "B2", "B3", "B4", "C1","C2",\
                   "C3","C4","D1","D2","D3","D4"]
temp_mean = []
for i in nam_snc:
    df= pd.read_csv(path+i+path_end,na_values = ['NAN','NaN' "00nan"], sep=" ")
    print(i,round(df["T"][:19810].mean(),2))
    temp_mean.append(df["T"][:19810].mean())

A1 24.08
A2 25.12
A3 23.63
A4 23.86
B1 24.99
B2 nan
B3 25.89
B4 24.07
C1 24.96
C2 24.24
C3 23.77
C4 24.31
D1 23.3
D2 23.73
D3 24.41
D4 24.4


In [53]:
print("A Truss mean:", round(np.mean(temp_mean[:4]),2))
print("B Truss mean:", round(np.nanmean(temp_mean[4:8]),2))
print("C Truss mean:", round(np.mean(temp_mean[8:12]),2))
print("D Truss mean:", round(np.mean(temp_mean[12:]),2))

A Truss mean: 24.17
B Truss mean: 24.98
C Truss mean: 24.32
D Truss mean: 23.96


In [25]:
nam_snc[1:8]

['A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']

In [26]:
lst = [1,2,3,4,5,6,7,8]
lst[1:8]

[2, 3, 4, 5, 6, 7, 8]

In [47]:
for i in range(len(df)):
    if df["TIMESTAMP"][i]=="2018-05-09 12:52:30.400":
        print(i)

19810
