In [13]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.collections as collections
import matplotlib.dates as dates
import glob
import pandas as pd
import os
from scipy.interpolate import CubicSpline
import datetime
import csv
from dateutil.parser import parse
import math
import ipywidgets as widgets
from ipywidgets import interactive


In [9]:
# Get data file names
path = r'.\2022-2023'
filenames = glob.glob(path + "/*.csv")

In [10]:
#Split out names of all files
names = [os.path.splitext(x)[0] for x in [os.path.basename(x) for x in filenames]]
print(names)
print("Number of Sensors= " + str( int(len(names))))

['North_202', 'North_203', 'North_205', 'Outdoor_1', 'South_219', 'South_221', 'South_222', 'South_223', 'South_224', 'South_226']
Number of Sensors= 10


In [11]:
print(filenames)

def is_date(string, fuzzy=False):
    """
    Return whether the string can be interpreted as a date.

    :param string: str, string to check for date
    :param fuzzy: bool, ignore unknown tokens in string if True
    """
    try: 
        parse(string, fuzzy=fuzzy)
        return True

    except ValueError:
        return False
    

['.\\2022-2023\\North_202.csv', '.\\2022-2023\\North_203.csv', '.\\2022-2023\\North_205.csv', '.\\2022-2023\\Outdoor_1.csv', '.\\2022-2023\\South_219.csv', '.\\2022-2023\\South_221.csv', '.\\2022-2023\\South_222.csv', '.\\2022-2023\\South_223.csv', '.\\2022-2023\\South_224.csv', '.\\2022-2023\\South_226.csv']


In [12]:
#combine all files and covert all datetime sequences to float for easier handling of cubic spline.
dfs = []
for filename in filenames:
    print(filename)
    head_index = 0
    with open (filename,'r') as csv_file:
        reader =csv.reader(csv_file)
        #next(reader) # skip first row
        for row in reader:
            if row != []:
                if is_date(row[0]):
                    #print(head_index)
                    break
                head_index = head_index + 1
            else:
                head_index = head_index + 1

    head_index = head_index - 1

    tail_index = 0
    with open (filename,'r') as csv_file:
        reader =csv.reader(csv_file)
        for row in reversed(list(csv.reader(csv_file))):
            #print(row)
            if row != []:
                if is_date(row[0]):
                    #print(tail_index)
                    break
                # make sure it is not a "bad line"
                #if(len(row) < 4):
                tail_index = tail_index + 1
            else:
                tail_index = tail_index + 1

    df= pd.read_csv(filename, header=head_index, on_bad_lines='skip', infer_datetime_format=True, skip_blank_lines=False, usecols=[i for i in range(3)], low_memory = False)
    df.drop(df.tail(tail_index).index,inplace = True)

    #df[['Date Time']] = dates.date2num(df[['Date Time']]) #look at dates to understand whats under the hood here
    
    #replace the column names with more friendly and shortened descriptors
    df.rename(columns={df.columns[0]: 'Date', df.columns[1]: 'Temp', df.columns[2]: 'RH'}, inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df['Temp']= pd.to_numeric(df["Temp"])
    df['RH']= pd.to_numeric(df["RH"])
    dfs.append(df)

.\2022-2023\North_202.csv
.\2022-2023\North_203.csv
.\2022-2023\North_205.csv
.\2022-2023\Outdoor_1.csv
.\2022-2023\South_219.csv
.\2022-2023\South_221.csv
.\2022-2023\South_222.csv
.\2022-2023\South_223.csv
.\2022-2023\South_224.csv
.\2022-2023\South_226.csv


In [14]:


#downsample data for a sensor before plotting

num_samp = str('1D')


%matplotlib inline

#Plot Data
def f(sensor):
    
    #Average RH
    AvRH = dfs[sensor].set_index('Date').resample(num_samp).mean().reset_index().RH 
    #Standard error RH
    Std_RH = dfs[sensor].set_index('Date').resample(num_samp).std().reset_index().RH 
    #Average Temp
    AvT = dfs[sensor].set_index('Date').resample(num_samp).mean().reset_index().Temp #average T for Main Gallery 
    #Standard error Temp
    Std_T = dfs[sensor].set_index('Date').resample(num_samp).std().reset_index().Temp #standard error across all sensor and downsamples
    #Average Date
    Avdate = dfs[sensor].set_index('Date').resample(num_samp).mean().reset_index().Date 

    #Calculate standard deviation interval of RH and Temp
    PlusRHSTD=[x+y for x,y in zip(AvRH, Std_RH)]
    MinusRHSTD=[x-y for x,y in zip(AvRH, Std_RH)]
    PlusTSTD=[x+y for x,y in zip(AvT, Std_T)]
    MinusTSTD=[x-y for x,y in zip(AvT, Std_T)]
    
    
    f, ax = plt.subplots(2,1,figsize=(10,9),sharex=True)

    ax[0].axes.get_xaxis().set_visible(False) #set X axis 
    ax[0].axes.get_yaxis().set_visible(True) #set Y axis
    ax[0].scatter(Avdate,AvRH, s = 2, color="None", edgecolor ="blue", alpha =1)
    ax[0].fill_between(Avdate,MinusRHSTD,PlusRHSTD, edgecolor ="None", alpha =0.3 )
    ax[0].set_title(names[sensor], size = 14)
    ax[0].set_ylabel('% RH', size = 14)
    ax[0].axhline(50,linestyle='--', lw=1, color="blue", alpha =0.3)
    ax[0].fill_between(Avdate,45,55, color="blue", edgecolor ="None", alpha =0.05 )
    ax[0].spines['top'].set_visible(False)
    ax[0].spines['right'].set_visible(False)
    ax[0].spines['bottom'].set_visible(False)
    ax[0].spines['left'].set_visible(True)


    ax[1].axes.get_xaxis().set_visible(True) #set X axis 
    ax[1].axes.get_yaxis().set_visible(True) #set Y axis
    ax[1].scatter(Avdate,AvT, s = 2, color="None", edgecolor ="red", alpha =0.8)
    ax[1].fill_between(Avdate,MinusTSTD,PlusTSTD, color="red", edgecolor ="None", alpha =0.3)
    ax[1].axhline(20,linestyle='--', lw=1, color="blue", alpha =0.3)
    ax[1].fill_between(Avdate,18,22, color="blue", edgecolor ="None", alpha =0.05)
    ax[1].set_ylabel('Temp (C)', size = 14)
    ax[1].spines['top'].set_visible(False)
    ax[1].spines['right'].set_visible(False)
    ax[1].spines['bottom'].set_visible(True)
    ax[1].spines['left'].set_visible(True)


    f.tight_layout()
    plt.show()

interactive_plot = interactive(f, sensor=(0, len(names)-1))
output = interactive_plot.children[-1]
interactive_plot

interactive(children=(IntSlider(value=4, description='sensor', max=9), Output()), _dom_classes=('widget-intera…

In [7]:
%matplotlib inline 

num_samp = str('1D')

def g(sensor):
    #Average RH
    AvRH = dfs[sensor].set_index('Date').resample(num_samp).mean().reset_index().RH 
    #Standard error RH
    Std_RH = dfs[sensor].set_index('Date').resample(num_samp).std().reset_index().RH 
    #Average Temp
    AvT = dfs[sensor].set_index('Date').resample(num_samp).mean().reset_index().Temp #average T for Main Gallery 
    #Standard error Temp
    Std_T = dfs[sensor].set_index('Date').resample(num_samp).std().reset_index().Temp #standard error across all sensor and downsamples
    #Average Date
    Avdate = dfs[sensor].set_index('Date').resample(num_samp).mean().reset_index().Date 
    
    counts, xedges, yedges = np.histogram2d(AvT, AvRH, bins=40)
    
    fig, ax = plt.subplots(1,2,figsize=(20,10))
    #sns.despine(left=True, bottom=True, right=True)

    #2D histogram density plot
    ax[0].set_title('Temp-RH ' + names[sensor], size = 24)
    #ax[0].set_facecolor((150/255, 150/255, 150/255)) #set gray background
    ax[0].scatter(AvT,AvRH, s = 30, color="None", edgecolor ="black", alpha =0.8)

    #Color Scatter with sampled pixels
    ax[1].set_title('Density map', size = 24)
    ax[1].imshow(np.rot90(counts))

    plt.show()

interactive_plot2 = interactive(g, sensor=(0, len(names)-1))
output = interactive_plot2.children[-1]
interactive_plot2

interactive(children=(IntSlider(value=22, description='sensor', max=45), Output()), _dom_classes=('widget-inte…

In [17]:
#Find and print longest column
l=[]

for i in range(len(dfs)):
    date = dfs[i][['Date']]
    l.append(len(date)) #find length of vector and append

longest_column = names[l.index(max(l))]

#apply cubic spline interpolation to entire dataset- based on longest column- 
#for even sampling for all galleries or rooms.


for i in range(len(dfs)):
    cs_T = CubicSpline(dfs[i].Date,dfs[i].Temp)
    cs_RH = CubicSpline(dfs[i].Date,dfs[i].RH)
    index = pd.RangeIndex(start=0, stop=max(l))
    dfs[i] = dfs[i].reindex(index).fillna(method='ffill') #reindex dataframe
    dfs[i].Date = dfs[l.index(max(l))].Date
    dfs[i].Temp = cs_T(dfs[l.index(max(l))].Date)
    dfs[i].RH = cs_RH(dfs[l.index(max(l))].Date)

#convert date floats back to date time format   
for i in range(len(dfs)):
    dfs[i][['Date']]=dates.num2date(dfs[i][['Date']])
   

ValueError: `x` must be strictly increasing sequence.