In [14]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import os
import random
import copy
import matplotlib.pyplot as plt

In [1]:
import numpy as np
import pandas as pd

# This class performs a Fourier transformation on the data to find frequencies that occur
# often and filter noise.
class FourierTransformation:
    
    def __init__(self):
        self.temp_list = []
        self.freqs = None

    def find_fft_transformation(self, series):
        transformation = np.fft.rfft(series, len(series))
        real_ampl = transformation.real
        max_freq = self.freqs[np.argmax(real_ampl[0:len(real_ampl)])]
        freq_weigthed = float(np.sum(self.freqs * real_ampl)) / np.sum(real_ampl)

        PSD = np.divide(np.square(real_ampl), float(len(real_ampl)))
        PSD_pdf = np.divide(PSD, np.sum(PSD))

        if np.count_nonzero(PSD_pdf) == PSD_pdf.size:
            pse = -np.sum(np.log(PSD_pdf) * PSD_pdf)
        else:
            pse = 0

        real_ampl = np.insert(real_ampl, 0, max_freq)
        real_ampl = np.insert(real_ampl, 0, freq_weigthed)
        row = np.insert(real_ampl, 0, pse)

        self.temp_list.append(row)

        return 0

    def abstract_frequency(self, data_table, columns, window_size, sampling_rate):
        self.freqs = (sampling_rate * np.fft.rfftfreq(int(window_size))).round(3)

        for col in columns:
            collist = []
            # prepare column names
            collist.append(col + '_max_freq')
            collist.append(col + '_freq_weighted')
            collist.append(col + '_pse')
            
            collist = collist + [col + '_freq_' +
                    str(freq) + '_Hz_ws_' + str(window_size) for freq in self.freqs]
           
            # rolling statistics to calculate frequencies, per window size. 
            # Pandas Rolling method can only return one aggregation value. 
            # Therefore values are not returned but stored in temp class variable 'temp_list'.

            # note to self! Rolling window_size would be nicer and more logical! In older version windowsize is actually 41. (ws + 1)
            data_table[col].rolling(
                window_size + 1).apply(self.find_fft_transformation)

            # Pad the missing rows with nans
            frequencies = np.pad(np.array(self.temp_list), ((window_size, 0), (0, 0)),
                        'constant', constant_values=np.nan)
            # add new freq columns to frame
            
            data_table[collist] = pd.DataFrame(frequencies, index=data_table.index)

            # reset temp-storage array
            del self.temp_list[:]
            

        
        return data_table

In [18]:
Viable_Cols = ['Acceleration x (m/s^2)_kalman', 'Acceleration y (m/s^2)_kalman', 'Acceleration z (m/s^2)_kalman', 'Gyroscope x (rad/s)_kalman',
              'Gyroscope y (rad/s)_kalman', 'Gyroscope z (rad/s)_kalman', 'Linear Acceleration x (m/s^2)_kalman',
              'Linear Acceleration y (m/s^2)_kalman', 'Linear Acceleration z (m/s^2)_kalman']
files = os.listdir('./Fourrier/')
files2 = []
for i in range(int(len(files)/10)):
    files2.append(files[i*10])
for file in files2:
    df = pd.read_csv('./Fourrier/'+str(file))
    df2 = df[Viable_Cols]
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df2)
    pca = PCA(n_components=6)
    pcaColumns = pca.fit_transform(scaled_data)
    pcaColumns = pd.DataFrame(pcaColumns)
    for col in pcaColumns.columns:
        df[col] = pcaColumns[col]
    df.to_csv('./PCA_STEP/PCA '+str(file))

In [None]:
fourrier = FourierTransformation()
Viable_Cols = ['0', '1', '2', '3', '4', '5']
files = os.listdir('./PCA_STEP')
milliseconds_per_instance = 50
fs = float(1000)/milliseconds_per_instance
ws = int(float(10000)/milliseconds_per_instance)
for file in files:
    print("A")
    frame = pd.read_csv('./PCA_STEP/'+str(file))
    newFrame = copy.deepcopy(frame)
    for column in Viable_Cols:
        fourrier.abstract_frequency(newFrame, [column], ws, fs)
        newCol = str(column+'_max_freq')
        frame[newCol] = newFrame[newCol]
        #plt.figure()
        #plt.plot(range(len(newFrame[newCol])),newFrame[newCol])
        #lt.xlabel(str(file)+" Fourrier "+str(column))
        #plt.savefig("./Fourrier/Fourrier "+str(file)+" "+str(i)+".png")
    frame.to_csv("./PCA Fourrier/Fourrier "+str(file))

In [31]:
#plt.figure()
#plt.plot(frame['0_max_freq'])
#plt.plot(frame['Gyroscope x (rad/s)_kalman_max_freq'])
#plt.show()
PCA_Cols = ['0_max_freq', '1_max_freq', '2_max_freq', '3_max_freq', '4_max_freq', '5_max_freq']
file = pd.read_csv('./PCA Fourrier/Fourrier PCA Fourrier Austin Running 150spm, 10kph.csv')
votes = []
votes2 = []
for i in range(202,len(file['0'])):
    vote = np.mean([file[col].iloc[i] for col in PCA_Cols])
    votes.append(vote)
print(np.mean(votes)*60)
for j in range(202,len(file['0'])):
    vote2 = np.mean([file[str(col)+'_max_freq'].iloc[j] for col in Viable_Cols])
    votes2.append(vote2)
print(np.mean(votes2)*60)

142.79284397904442
142.79284397904442
