In [1]:
import numpy as np
import pandas as pd
from scipy import fftpack
from scipy import signal
import time
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
WORKSPACE_PATH = '/home/takeyama/pywork/ipython/2016-06-13/'

mozartの環境設定

In [3]:
#p_path="/home/takeyama/pywork/ipython/2016-05-30/"

In [4]:
#cd ~/Documents/SyncRecord/cleaning-addingLABEL/

workstationの環境設定

In [4]:
cd ~/Documents/ALTIMA/20160617-103037/

/home/takeyama/Documents/ALTIMA/20160617-103037


In [7]:
class SensorData:
    
    def __init__(self):
        print "__class__"
        # raw data
        self._RawData={}    
        # fft data
        self._FFTData={}
        # power spectol data
        self._PowerData={}    
        # flag exsist data
        self._Flag_exist_data=False
        
        self._columns=['AccX','AccY','AccZ','GyrX','GyrY','GyrZ']
        self._fft_col=['fft_AccX','fft_AccY','fft_AccZ','fft_GyrX','fft_GyrY','fft_GyrZ']
        self._power_col=['power_AccX','power_AccY','power_AccZ','power_GyrX','power_GyrY','power_GyrZ']
        
    def ImportCSV(self,Sclass,csv_file):
        self.ClassName=Sclass
        self.Flag_exist_data=True
        # design dataframe
        data = pd.read_csv(csv_file,encoding="SHIFT-JIS")
        data.columns=[u'Type',u'Time',u'AccX',u'AccY',u'AccZ',u'GyrX',u'GyrY',u'GyrZ']
        data = data[data.Type=='ags']
        data = pd.pivot_table(data,values=[u'AccX',u'AccY',u'AccZ',u'GyrX',u'GyrY',u'GyrZ'],index=[u'Time'])
        
        # convert numpy.darray
        AccX=data.AccX.values*0.0001
        AccY=data.AccY.values*0.0001
        AccZ=data.AccZ.values*0.0001
        GyrX=data.GyrX.values*0.01
        GyrY=data.GyrY.values*0.01
        GyrZ=data.GyrZ.values*0.01
        Time=data.index
        
        # regist each raw data 
        self._RawData['AccX'] = AccX
        self._RawData['AccY'] = AccY
        self._RawData['AccZ'] = AccZ
        self._RawData['GyrX'] = GyrX
        self._RawData['GyrY'] = GyrY
        self._RawData['GyrZ'] = GyrZ
        self._RawData['Time'] = Time
    
    def ShowFlagExistData(self):
        return self.Flag_exist_data
    
    def GetColumns(self):
        return self._columns
    
    def GetTime(self):
        return self._RawData['Time']

    def ShowAllDf(self):
        print 'AccX : ';print self._RawData['AccX']
        print 'AccY : ';print self._RawData['AccY']
        print 'AccZ : ';print self._RawData['AccZ']
        print 'GyrX : ';print self._RawData['GyrX']
        print 'GyrY : ';print self._RawData['GyrY']
        print 'GyrZ : ';print self._RawData['GyrZ']
        
    def _Time2Num(self,time):
        return np.where(self._RawData['Time']==np.datetime64(time) )[0][0]
    
    def ShowQuery(self,Sname,rng=[]):
        data = self._RawData[Sname]
        print Sname+':'+str( data[rng[0]:rng[1]])
            
    def _sliding_window(self,Sname,samp, overlap):
        count =0
        s =self._RawData['Time'][0]
        start=self._Time2Num(s)
        g = s+np.timedelta64(samp*10,'ms')
        goal= self._Time2Num(g)
        yield self._RawData[Sname][start:goal]
        
        add=overlap*0.01
        
        while True:
            try:
                count +=1
                s =s+np.timedelta64(samp*10,'ms')
                start=self._Time2Num(s)
                g  =s+np.timedelta64(samp*10,'ms')
                goal= self._Time2Num(g)
                yield self._RawData[Sname][start:goal]
            except StopIteration:
                print '_sliding_window StopIteration'
                break   
            except IndexError:
                print '_sliding_window IndexError'
                break  
                
# Fast Frier transaction            
    def GetFFT(self,Sfft,samp):
        return np.load(p_path+'fft/'+self.ClassName+'_'+Sfft+'_'+str(samp)+'.npz')['arr_0']
    
    def CalcFFT(self,samp,overlap=0.5):
        start = time.time()
        fft_data = np.array([])
        
        for n,f in zip( self._columns,self._fft_col):
            print 'start'+n+'->'+f
            sw = self._sliding_window(n,samp,overlap)
            while True:
                try:
                    d=sw.next()
                    fft_data = np.append(fft_data, fftpack.fft(d)[1:(samp/2)+1] )   # fftの直流成分を除くsample/2の
                except StopIteration:
                    print 'CalcFFTStopIteration'
                    fft_data = fft_data.reshape(len(fft_data)/(samp/2),(samp/2) )
                    self._FFTData[f] = fft_data
                    np.savez(p_path+'fft/'+self.ClassName+'_'+str(f)+'_'+str(samp),self._FFTData[f])
                    break   
        elapsed_time = time.time() - start
        print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"
           

# Spectol Power 
    def GetPower(self,Spower,samp):
        return np.load(p_path+'power/'+self.ClassName+'_'+Spower+'_'+str(samp)+'.npz')['arr_0']
    
    def _power(self,fft_array):
        p=lambda x,y : np.sqrt(x**2+y**2)
        power_array=np.array([])
        for vector in fft_array:
            tmp = p( np.real(vector),np.imag(vector) )
            tmp = tmp/np.sum(tmp)
            power_array = np.append(power_array,tmp)
        return power_array
        
    def CalcPower(self,samp,overlap=0.5):
        start = time.time()
        for fft_name,power_name in zip( self._fft_col, self._power_col):
            print 'start'+fft_name+'->'+power_name
            fft_data = self.GetFFT(fft_name,samp)
            power_data=self._power(fft_data)
            np.savez(p_path+'power/'+self.ClassName+'_'+power_name+'_'+str(samp),power_data)
        elapsed_time = time.time() - start
        print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"

In [228]:
'''
csv_file -> ファイル名
mode Round -> 四捨五入
     Roundup -> 切り上げ
     Rounddown -> 切り捨て
'''
def ImportCSV(csv_file,freq,mode='Round'):
        # data dictionary 
        RawData={}   
        
        # design dataframe and import csv
        data = pd.read_csv(csv_file,encoding="SHIFT-JIS")
        data.columns=[u'Type',u'Time',u'AccX',u'AccY',u'AccZ',u'GyrX',u'GyrY',u'GyrZ']
        data = data[ data['Type']=='ags']
        
        # convert numpy.darray 
        # Acc Data  [0.1mG]=>[G]
        # Gyr Data  [0.01dps]=>[dps]   ...dps=degree per second
        AccX=data.AccX.values*0.0001
        AccY=data.AccY.values*0.0001
        AccZ=data.AccZ.values*0.0001
        GyrX=data.GyrX.values*0.01
        GyrY=data.GyrY.values*0.01
        GyrZ=data.GyrZ.values*0.01
        
        # regist each raw data 
        RawData['AccX'] = AccX
        RawData['AccY'] = AccY
        RawData['AccZ'] = AccZ
        RawData['GyrX'] = GyrX
        RawData['GyrY'] = GyrY
        RawData['GyrZ'] = GyrZ
        
        # import time by using numpy
        time = data.Time.values #時間の列だけを抽出       
        
        if mode == 'Roundup':
            func = lambda x: int(x/freq)*freq
        elif mode == 'Rounddown':
            func = lambda x: int(x/freq)*freq
        elif mode == 'Round':
            func = lambda x: int((x+freq/2)/freq)*freq
        #ERROR
        else:
            print 'check mode and inputed word is caused error'
            return -1
        
        output = map(func,time)
        RawData['Time'] = np.array(output)
        return RawData
            

In [264]:
data1=ImportCSV('left-hand.csv',10)
data2=ImportCSV('left-leg.csv',10)
data3=ImportCSV('right-hand.csv',10)
data4=ImportCSV('right-leg.csv',10)
rawdata5=ImportCSV('west.csv',10)

In [230]:
def CalcStartTime(array):
    MAX = min(array[0])
    
    for i in range(len(array)):
        if MAX < min(array[i]):
            MAX = min(array[i])
     
    return  MAX

In [231]:
def CalcGoalTime(array):
    MIN = max(array[0])
    
    for i in range(len(array)):
        if MIN > max(array[i]):
            MIN = max(array[i])
     
    return  MIN

**2016-06-14**  
もう１度、前処理について考える。特に、入力データの特徴、出力データの仕様を改める。  
*feacher of Precondition*
1. センサデータは{time, accx, accy, accz, gyrx, gyry, gyrz}が揃っている
2. また、センサデータはすべて整数型となっている。
3. nanデータはない

*output Data*
1. accは[0.1mG]から[G]に単位変換、gyrは[0.01dpg]は[dpg]に変換する。
2. timeはサンプリング周期で丸める。
3. Nanデータは存在する。


In [282]:
"""
    checkData に入れるものは辞書型にする
    freqは計測周期
"""
def NanPating(DicData,freq):
    import time
    start_time = time.time()
    
    # detection for hidden Nan Data
    diffNum =np.array([])
    diffIndex=np.array([])
    checkData = DicData['Time']
    width = len(checkData)
    for i in range(0,width-1):
        if ( checkData[i+1]-checkData[i] )!=freq:
            diffNum=np.append(diffNum, int(checkData[i+1]-checkData[i]) )
            diffIndex=np.append(diffIndex,i)
   
    # insert NAN data to SensorData
    # insert time_data
    def Insert(data,dI,dN,f,mode):
        StartIndex= 0
        tmp =np.array([])
        if mode =='Sensor':
            # insert NAN DATA
            adding = np.nan
            for count,l in enumerate(dI):
                tmp = np.append(tmp, data[StartIndex:int(l)])
                for i in range(0,int(dN[count]/f) ):
                    tmp = np.append(tmp,np.nan)
                StartIndex = int(l)+1
            tmp=np.append(tmp, data[StartIndex:])
        elif mode =='Time':
            # insert 
            for count,l in enumerate(dI):
                tmp = np.append(tmp, data[StartIndex:int(l)])
                for i in range(0,int(dN[count]/f) ):
                    t = int( tmp[-1]+f )
                    tmp = np.append(tmp,t)
                StartIndex = int(l)+1
            tmp=np.append(tmp, data[StartIndex:])
        else:
            print 'mode name error'
        return tmp
    Array ={}
    tmpArrayAccX=Insert(DicData['AccX'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayAccY=Insert(DicData['AccY'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayAccZ=Insert(DicData['AccZ'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayGyrX=Insert(DicData['GyrX'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayGyrY=Insert(DicData['GyrY'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayGyrZ=Insert(DicData['GyrZ'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayTime=Insert(DicData['Time'],diffIndex,diffNum,freq,mode='Time')
    Array['AccX'] = tmpArrayAccX
    Array['AccY'] = tmpArrayAccY
    Array['AccZ'] = tmpArrayAccZ
    Array['GyrX'] = tmpArrayGyrX
    Array['GyrY'] = tmpArrayGyrY
    Array['GyrZ'] = tmpArrayGyrZ
    Array['Time'] = tmpArrayTime
    #Array=[Time:tmpArrayTime,tmpArrayAccX,tmpArrayAccY,tmpArrayAccZ,tmpArrayGyrX,tmpArrayGyrY,tmpArrayGyrZ]
    elapsed_time = time.time() -start_time
    print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"
    return Array

In [283]:
data5=NanPating(rawdata5,10)

elapsed_time:0.0606710910797[sec]


In [284]:
data5

{'AccX': array([ 0.1782,  0.1667,  0.1418, ..., -0.0311, -0.0291, -0.0401]),
 'AccY': array([-1.0457, -1.0386, -1.0523, ..., -0.9976, -1.0147, -1.0189]),
 'AccZ': array([ 0.0745,  0.0806,  0.0784, ..., -0.259 , -0.2599, -0.2702]),
 'GyrX': array([-12.92, -13.85, -14.16, ...,   5.07,   3.58,   1.69]),
 'GyrY': array([ -2.79,  -5.73,  -5.75, ..., -50.27, -52.05, -54.74]),
 'GyrZ': array([ 0.33, -0.22,  0.01, ...,  0.49,  1.73,  4.9 ]),
 'Time': array([ 65175760.,  65175770.,  65175780., ...,  65730600.,  65730610.,
         65730620.])}

In [285]:
time5

array([ 65175760.,  65175770.,  65175780., ...,  65730600.,  65730610.,
        65730620.])

**2016-06-14 & 2016-06-15**  
[ AIM ]NANパティングをされたデータ群を作成する。  
[ ERROR ] なぜか、四捨五入とかデータが丸めれていない。

In [286]:
time1= data1['Time']
time2= data2['Time']
time3= data3['Time']
time4= data4['Time']
time5= data5['Time']

In [287]:
timeArray=[time1,time2,time3,time4,time5]

In [288]:
StartTime = CalcStartTime(timeArray)
StartTime

65175890

In [289]:
GoalTime = CalcGoalTime(timeArray)
GoalTime

65730150

In [290]:
def CalcSearchIndexFromTime(checkData,CompareTime):
    count = 0
    for i in range(0, len(checkData)):
        if CompareTime == checkData[i]:
            print str(CompareTime)+' is much in the index  whose number is '+str(i)
            return i
        

In [291]:
StartIndex =np.array([])
for d in timeArray:
    StartIndex = np.append(StartIndex,CalcSearchIndexFromTime(d,65175890))
StartIndex = StartIndex.astype(int)

65175890 is much in the index  whose number is 18
65175890 is much in the index  whose number is 0
65175890 is much in the index  whose number is 22
65175890 is much in the index  whose number is 8
65175890 is much in the index  whose number is 13


In [292]:
GoalIndex =np.array([])
for d in timeArray:
    GoalIndex = np.append(GoalIndex,CalcSearchIndexFromTime(d,65730150))
GoalIndex =GoalIndex.astype(int)

65730150 is much in the index  whose number is 55444
65730150 is much in the index  whose number is 55426
65730150 is much in the index  whose number is 55448
65730150 is much in the index  whose number is 55434
65730150 is much in the index  whose number is 55439


In [293]:
print StartIndex

[18  0 22  8 13]


In [294]:
NanpatingTimeArray=np.array([])

for i, d in enumerate(timeArray):
    print StartIndex[i]
    print GoalIndex[i]
    print str(i)+'s length='+str( len(d[StartIndex[i]:GoalIndex[i]] ) )
    NanpatingTimeArray =np.append(NanpatingTimeArray,d[StartIndex[i]:GoalIndex[i]])
NanpatingTimeArray = NanpatingTimeArray.reshape(len(timeArray),len(NanpatingTimeArray)/len(timeArray))
print NanpatingTimeArray.shape    

18
55444
0s length=55426
0
55426
1s length=55426
22
55448
2s length=55426
8
55434
3s length=55426
13
55439
4s length=55426
(5, 55426)


In [308]:
data5.keys()[0]

'Time'

In [321]:
def Extraction(dic,start,goal):
    keys = dic.keys()
    output = {}
    for i in range( len(dic)):
        output[keys[i]] = dic[keys[i]][start:goal]
        if keys[i]=='Time': output[keys[i]] = output[keys[i]].astype(int)
    return output
    

In [324]:
edata5 = Extraction(data5,StartIndex[4],GoalIndex[4])
print test
print test.keys()
print test['Time']

{'Time': array([65175890, 65175900, 65175910, ..., 65730120, 65730130, 65730140]), 'AccY': array([-0.9727, -0.9737, -0.9857, ..., -0.9742, -0.971 , -0.8929]), 'AccX': array([ 0.1064,  0.1179,  0.1337, ...,  0.0124,  0.0624, -0.0225]), 'AccZ': array([ 0.055 ,  0.0599,  0.055 , ..., -0.2216, -0.2753, -0.3469]), 'GyrZ': array([ 11.96,  11.  ,  10.75, ..., -14.48, -11.45,  -9.12]), 'GyrX': array([-14.3 , -14.3 , -14.33, ...,  -7.18,  -3.  ,  -0.3 ]), 'GyrY': array([  0.41,   1.95,   2.21, ..., -71.92, -74.72, -72.04])}
['Time', 'AccY', 'AccX', 'AccZ', 'GyrZ', 'GyrX', 'GyrY']
[65175890 65175900 65175910 ..., 65730120 65730130 65730140]


In [325]:
edata1 = Extraction(data1,StartIndex[0],GoalIndex[0])
edata2 = Extraction(data2,StartIndex[1],GoalIndex[1])
edata3 = Extraction(data3,StartIndex[2],GoalIndex[2])
edata4 = Extraction(data4,StartIndex[3],GoalIndex[3])
edata5 = Extraction(data5,StartIndex[4],GoalIndex[4])

In [303]:
print len(data5['AccX'])
print len(data5['AccY'])
print len(data5['AccZ'])
print len(data5['Time'])

55487
55487
55487
55487


とりあえず、前処理について大方終わった.
ここで、なぜできたのかを振り返るためにアルゴリズムや処理の流れについて考える。
まず、対象とする入力データからNANデータを加えてから共通区間を導くという考えである。
では、NANデータを加えるまでの流れを述べていく。
入力データは
* センサデータは{time, accx, accy, accz, gyrx, gyry, gyrz}が揃っている
* また、センサデータはすべて整数型となっている。
* nanデータはない

よって、まずはtimeで前後の差がサンプリング周期と異なるもの(今回はサンプリング周期は１０msとしている。）
インデックスを探す。そして、そのときのインデックスと前後の差を算出する。
つぎに、その算出したものを使って加速度、角速度データはNANを加える。時計には差の文だけインデックスを追加する。
そして、{timeData,nan,nan,nan,nan,nan,nan}というような行を加える。
次に、共通区間のスタート、ゴールを探す。このやり方は今までできたやり方で行ったのでここでは省略。
そして、共通区間のスタート、ゴールのデータを抽出できた。
