In [219]:
import numpy as np
from scipy import stats
from scipy.fft import fft
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [220]:
temphumiDF = pd.read_excel("DSP_Data_with_ID.xlsx", "Sheet1", skiprows=0, nrows= 1350, usecols="B:D")
temphumiDF_c = temphumiDF.copy()
temphumiDF_c

Unnamed: 0,Date,Humidity (%),Temperature (°C)
0,10/17/2023_20:26:35,66.1,32.6
1,10/17/2023_20:41:37,66.0,32.6
2,10/17/2023_20:56:40,61.1,34.8
3,10/17/2023_21:11:43,59.5,35.2
4,10/17/2023_21:26:46,61.5,33.7
...,...,...,...
1343,10/31/2023_23:20:31,48.7,33.1
1344,10/31/2023_23:35:34,47.3,34.8
1345,10/31/2023_23:50:37,46.8,34.9
1346,11/01/2023_00:05:40,47.2,35.0


In [221]:
def getIndexOfMissing(data):
    missingRange = []

    for index in range(len(data)):
        if np.isnan(data[index]):
            missingRange.append(index)
            
    return missingRange 

In [276]:
temperature = temphumiDF_c['Temperature (°C)'].to_numpy()

temperature

array([32.6, 32.6, 34.8, ..., 34.9, 35. , 34.8])

In [251]:
indexes = getIndexOfMissing(temperature)

print(indexes)

[9, 10, 11, 12, 13, 14, 15, 16, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854]


In [253]:
tempre = temperature.reshape(-1,1)
print(tempre)


[[32.6]
 [32.6]
 [34.8]
 ...
 [34.9]
 [35. ]
 [34.8]]


In [254]:
# temperature[indexes] = temperature.isna()
# type(temperature)
# temperature


In [255]:
imputer = IterativeImputer(max_iter=10, random_state=0)

In [256]:
imputed_values = imputer.fit_transform(tempre)

In [257]:
tempre[indexes] = imputed_values[indexes]

In [314]:
with np.printoptions(threshold=np.inf):
    print(tempre)


[[32.6       ]
 [32.6       ]
 [34.8       ]
 [35.2       ]
 [33.7       ]
 [35.2       ]
 [34.3       ]
 [34.7       ]
 [34.8       ]
 [32.89423077]
 [32.89423077]
 [32.89423077]
 [32.89423077]
 [32.89423077]
 [32.89423077]
 [32.89423077]
 [32.89423077]
 [30.4       ]
 [30.4       ]
 [32.        ]
 [31.7       ]
 [33.        ]
 [32.9       ]
 [32.7       ]
 [32.5       ]
 [31.2       ]
 [31.3       ]
 [31.9       ]
 [31.8       ]
 [32.8       ]
 [32.7       ]
 [32.9       ]
 [32.8       ]
 [32.3       ]
 [32.1       ]
 [30.9       ]
 [31.1       ]
 [31.2       ]
 [31.7       ]
 [31.7       ]
 [31.7       ]
 [31.7       ]
 [31.7       ]
 [31.9       ]
 [31.9       ]
 [31.9       ]
 [31.9       ]
 [32.2       ]
 [32.3       ]
 [32.5       ]
 [32.8       ]
 [32.3       ]
 [33.3       ]
 [33.1       ]
 [33.4       ]
 [32.3       ]
 [33.7       ]
 [34.9       ]
 [34.7       ]
 [34.1       ]
 [34.2       ]
 [34.3       ]
 [35.8       ]
 [36.2       ]
 [35.4       ]
 [36.2       ]
 [36.6    

In [294]:
date = temphumiDF_c['Date'].to_numpy()

combi = np.concatenate((date.reshape(-1,1), tempre), axis=1)

combi

array([['10/17/2023_20:26:35', 32.6],
       ['10/17/2023_20:41:37', 32.6],
       ['10/17/2023_20:56:40', 34.8],
       ...,
       ['10/31/2023_23:50:37', 34.9],
       ['11/01/2023_00:05:40', 35.0],
       ['11/01/2023_00:20:42', 34.8]], dtype=object)

In [309]:
df = pd.DataFrame(combi, columns=['Date','Temperature (°C)'])

df

Unnamed: 0,Date,Temperature (°C)
0,10/17/2023_20:26:35,32.6
1,10/17/2023_20:41:37,32.6
2,10/17/2023_20:56:40,34.8
3,10/17/2023_21:11:43,35.2
4,10/17/2023_21:26:46,33.7
...,...,...
1343,10/31/2023_23:20:31,33.1
1344,10/31/2023_23:35:34,34.8
1345,10/31/2023_23:50:37,34.9
1346,11/01/2023_00:05:40,35.0


In [268]:
def getMean(x):
    return np.mean(x)

def getMedian(x):
    return np.median(x)

def getMode(x):
    vals, counts = np.unique(x, return_counts=True)
    mode = np.argwhere(counts == np.max(counts))
    return vals[mode].flatten()

def getVariance(x):
    return np.var(x)

def getStandardDeviation(x):
    # standard deviation represents noise and other interference.
    #  the standard deviation is not important in itself, but only in comparison to the mean.
    return np.std(x)

def getSignalToNoiseRatio(mean, std):
    return mean / std

def getCoefficientOfVariation(mean, std):
    #  a signal (or other group of measure values) with a CV of 2%, has an SNR of 50.  Better data means a higher value for the SNR and a lower value for the CV.
    return (std / mean) * 100


In [269]:
mean = getMean(tempre)
median = getMedian(tempre)
mode = getMode(tempre)
var = getVariance(tempre)
sd = getStandardDeviation(tempre)
str = getSignalToNoiseRatio(mean, sd) 
cv = getCoefficientOfVariation(mean, sd)

print(f'mean = {mean}')
print(f'median = {median}')
print(f'mode = {mode}')
print(f'variance = {var}')
print(f'sd = {sd}')
print(f'str = {str}')
print(f'cv = {cv}')

mean = 32.894230769230774
median = 32.6
mode = [32.89423077]
variance = 4.448010728144259
sd = 2.109030755618386
str = 15.596847358248175
cv = 6.411552136343528


In [313]:
idx = pd.date_range(start='10/17/2023', end='11/01/2023', periods=30)

print(pd.infer_freq(idx))

None
