In [1]:
import numpy as np
import pandas as pd
from scipy import fftpack
from scipy import signal
import time
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
WORKSPACE_PATH = '/home/takeyama/pywork/ipython/2016-06-13/'

mozartの環境設定

In [3]:
#p_path="/home/takeyama/pywork/ipython/2016-05-30/"

In [4]:
#cd ~/Documents/SyncRecord/cleaning-addingLABEL/

workstationの環境設定

In [5]:
cd ~/Documents/ALTIMA/20160617-103037/

/home/takeyama/Documents/ALTIMA/20160617-103037


In [6]:
class SensorData:
    
    def __init__(self):
        print "__class__"
        # raw data
        self._RawData={}    
        # fft data
        self._FFTData={}
        # power spectol data
        self._PowerData={}    
        # flag exsist data
        self._Flag_exist_data=False
        
        self._columns=['AccX','AccY','AccZ','GyrX','GyrY','GyrZ']
        self._fft_col=['fft_AccX','fft_AccY','fft_AccZ','fft_GyrX','fft_GyrY','fft_GyrZ']
        self._power_col=['power_AccX','power_AccY','power_AccZ','power_GyrX','power_GyrY','power_GyrZ']
        
    def ImportCSV(self,Sclass,csv_file):
        self.ClassName=Sclass
        self.Flag_exist_data=True
        # design dataframe
        data = pd.read_csv(csv_file)
        data.columns=[u'Type',u'Time',u'AccX',u'AccY',u'AccZ',u'GyrX',u'GyrY',u'GyrZ']
        data = data[data.Type=='ags']
        data = pd.pivot_table(data,values=[u'AccX',u'AccY',u'AccZ',u'GyrX',u'GyrY',u'GyrZ'],index=[u'Time'])
        
        # convert numpy.darray
        AccX=data.AccX.values*0.0001
        AccY=data.AccY.values*0.0001
        AccZ=data.AccZ.values*0.0001
        GyrX=data.GyrX.values*0.01
        GyrY=data.GyrY.values*0.01
        GyrZ=data.GyrZ.values*0.01
        Time=data.index
        
        # regist each raw data 
        self._RawData['AccX'] = AccX
        self._RawData['AccY'] = AccY
        self._RawData['AccZ'] = AccZ
        self._RawData['GyrX'] = GyrX
        self._RawData['GyrY'] = GyrY
        self._RawData['GyrZ'] = GyrZ
        self._RawData['Time'] = Time
    
    def ShowFlagExistData(self):
        return self.Flag_exist_data
    
    def GetColumns(self):
        return self._columns
    
    def GetTime(self):
        return self._RawData['Time']

    def ShowAllDf(self):
        print 'AccX : ';print self._RawData['AccX']
        print 'AccY : ';print self._RawData['AccY']
        print 'AccZ : ';print self._RawData['AccZ']
        print 'GyrX : ';print self._RawData['GyrX']
        print 'GyrY : ';print self._RawData['GyrY']
        print 'GyrZ : ';print self._RawData['GyrZ']
        
    def _Time2Num(self,time):
        return np.where(self._RawData['Time']==np.datetime64(time) )[0][0]
    
    def ShowQuery(self,Sname,rng=[]):
        data = self._RawData[Sname]
        print Sname+':'+str( data[rng[0]:rng[1]])
            
    def _sliding_window(self,Sname,samp, overlap):
        count =0
        s =self._RawData['Time'][0]
        start=self._Time2Num(s)
        g = s+np.timedelta64(samp*10,'ms')
        goal= self._Time2Num(g)
        yield self._RawData[Sname][start:goal]
        
        add=overlap*0.01
        
        while True:
            try:
                count +=1
                s =s+np.timedelta64(samp*10,'ms')
                start=self._Time2Num(s)
                g  =s+np.timedelta64(samp*10,'ms')
                goal= self._Time2Num(g)
                yield self._RawData[Sname][start:goal]
            except StopIteration:
                print '_sliding_window StopIteration'
                break   
            except IndexError:
                print '_sliding_window IndexError'
                break  
                
# Fast Frier transaction            
    def GetFFT(self,Sfft,samp):
        return np.load(p_path+'fft/'+self.ClassName+'_'+Sfft+'_'+str(samp)+'.npz')['arr_0']
    
    def CalcFFT(self,samp,overlap=0.5):
        start = time.time()
        fft_data = np.array([])
        
        for n,f in zip( self._columns,self._fft_col):
            print 'start'+n+'->'+f
            sw = self._sliding_window(n,samp,overlap)
            while True:
                try:
                    d=sw.next()
                    fft_data = np.append(fft_data, fftpack.fft(d)[1:(samp/2)+1] )   # fftの直流成分を除くsample/2の
                except StopIteration:
                    print 'CalcFFTStopIteration'
                    fft_data = fft_data.reshape(len(fft_data)/(samp/2),(samp/2) )
                    self._FFTData[f] = fft_data
                    np.savez(p_path+'fft/'+self.ClassName+'_'+str(f)+'_'+str(samp),self._FFTData[f])
                    break   
        elapsed_time = time.time() - start
        print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"
           

# Spectol Power 
    def GetPower(self,Spower,samp):
        return np.load(p_path+'power/'+self.ClassName+'_'+Spower+'_'+str(samp)+'.npz')['arr_0']
    
    def _power(self,fft_array):
        p=lambda x,y : np.sqrt(x**2+y**2)
        power_array=np.array([])
        for vector in fft_array:
            tmp = p( np.real(vector),np.imag(vector) )
            tmp = tmp/np.sum(tmp)
            power_array = np.append(power_array,tmp)
        return power_array
        
    def CalcPower(self,samp,overlap=0.5):
        start = time.time()
        for fft_name,power_name in zip( self._fft_col, self._power_col):
            print 'start'+fft_name+'->'+power_name
            fft_data = self.GetFFT(fft_name,samp)
            power_data=self._power(fft_data)
            np.savez(p_path+'power/'+self.ClassName+'_'+power_name+'_'+str(samp),power_data)
        elapsed_time = time.time() - start
        print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"

In [7]:
'''
csv_file -> ファイル名
mode Round -> 四捨五入
     Roundup -> 切り上げ
     Rounddown -> 切り捨て
'''
def ImportCSV(csv_file,freq,mode='Round'):
        # data dictionary 
        RawData={}   
        
        # design dataframe and import csv
        data = pd.read_csv(csv_file)
        data.columns=[u'Type',u'Time',u'AccX',u'AccY',u'AccZ',u'GyrX',u'GyrY',u'GyrZ']
        data = data[ data['Type']=='ags']
        
        # convert numpy.darray 
        # Acc Data  [0.1mG]=>[G]
        # Gyr Data  [0.01dps]=>[dps]   ...dps=degree per second
        AccX=data.AccX.values*0.0001
        AccY=data.AccY.values*0.0001
        AccZ=data.AccZ.values*0.0001
        GyrX=data.GyrX.values*0.01
        GyrY=data.GyrY.values*0.01
        GyrZ=data.GyrZ.values*0.01
        
        # regist each raw data 
        RawData['AccX'] = AccX
        RawData['AccY'] = AccY
        RawData['AccZ'] = AccZ
        RawData['GyrX'] = GyrX
        RawData['GyrY'] = GyrY
        RawData['GyrZ'] = GyrZ
        
        # import time by using numpy
        time = data.Time.values #時間の列だけを抽出       
        
        if mode == 'Roundup':
            func = lambda x: int(x/freq)*freq
        elif mode == 'Rounddown':
            func = lambda x: int(x/freq)*freq
        elif mode == 'Round':
            func = lambda x: int((x+freq/2)/freq)*freq
        #ERROR
        else:
            print 'check mode and inputed word is caused error'
            return -1
        
        output = map(func,time)
        RawData['Time'] = np.array(output)
        return RawData
            

In [8]:
def CalcStartTime(array):
    MAX = min(array[0])
    
    for i in range(len(array)):
        if MAX < min(array[i]):
            MAX = min(array[i])
     
    return  MAX

In [9]:
def CalcGoalTime(array):
    MIN = max(array[0])
    
    for i in range(len(array)):
        if MIN > max(array[i]):
            MIN = max(array[i])
     
    return  MIN

**2016-06-14**  
もう１度、前処理について考える。特に、入力データの特徴、出力データの仕様を改める。  
*feacher of Precondition*
1. センサデータは{time, accx, accy, accz, gyrx, gyry, gyrz}が揃っている
2. また、センサデータはすべて整数型となっている。
3. nanデータはない

*output Data*
1. accは[0.1mG]から[G]に単位変換、gyrは[0.01dpg]は[dpg]に変換する。
2. timeはサンプリング周期で丸める。
3. Nanデータは存在する。


In [10]:
"""
    checkData に入れるものは辞書型にする
    freqは計測周期
"""
def NanPating(DicData,freq):
    import time
    start_time = time.time()
    
    # detection for hidden Nan Data
    diffNum =np.array([])
    diffIndex=np.array([])
    checkData = DicData['Time']
    width = len(checkData)
    for i in range(0,width-1):
        if ( checkData[i+1]-checkData[i] )!=freq:
            diffNum=np.append(diffNum, int(checkData[i+1]-checkData[i]) )
            diffIndex=np.append(diffIndex,i)
   
    # insert NAN data to SensorData
    # insert time_data
    def Insert(data,dI,dN,f,mode):
        StartIndex= 0
        tmp =np.array([])
        if mode =='Sensor':
            # insert NAN DATA
            adding = np.nan
            for count,l in enumerate(dI):
                tmp = np.append(tmp, data[StartIndex:int(l)])
                for i in range(0,int(dN[count]/f) ):
                    tmp = np.append(tmp,np.nan)
                StartIndex = int(l)+1
            tmp=np.append(tmp, data[StartIndex:])
        elif mode =='Time':
            # insert 
            for count,l in enumerate(dI):
                tmp = np.append(tmp, data[StartIndex:int(l)])
                for i in range(0,int(dN[count]/f) ):
                    t = int( tmp[-1]+f )
                    tmp = np.append(tmp,t)
                StartIndex = int(l)+1
            tmp=np.append(tmp, data[StartIndex:])
        else:
            print 'mode name error'
        return tmp
    Array ={}
    tmpArrayAccX=Insert(DicData['AccX'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayAccY=Insert(DicData['AccY'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayAccZ=Insert(DicData['AccZ'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayGyrX=Insert(DicData['GyrX'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayGyrY=Insert(DicData['GyrY'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayGyrZ=Insert(DicData['GyrZ'],diffIndex,diffNum,freq,mode='Sensor')
    tmpArrayTime=Insert(DicData['Time'],diffIndex,diffNum,freq,mode='Time')
    Array['AccX'] = tmpArrayAccX
    Array['AccY'] = tmpArrayAccY
    Array['AccZ'] = tmpArrayAccZ
    Array['GyrX'] = tmpArrayGyrX
    Array['GyrY'] = tmpArrayGyrY
    Array['GyrZ'] = tmpArrayGyrZ
    Array['Time'] = tmpArrayTime
    #Array=[Time:tmpArrayTime,tmpArrayAccX,tmpArrayAccY,tmpArrayAccZ,tmpArrayGyrX,tmpArrayGyrY,tmpArrayGyrZ]
    elapsed_time = time.time() -start_time
    print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"
    return Array

In [11]:
def CalcSearchIndexFromTime(checkData,CompareTime):
    count = 0
    for i in range(0, len(checkData)):
        if CompareTime == checkData[i]:
            print str(CompareTime)+' is much in the index  whose number is '+str(i)
            return i
        

In [12]:
def Extraction(dic,start,goal):
    keys = dic.keys()
    output = {}
    for i in range( len(dic)):
        output[keys[i]] = dic[keys[i]][start:goal]
        if keys[i]=='Time': output[keys[i]] = output[keys[i]].astype(int)
    return output
    

とりあえず、前処理について大方終わった.
ここで、なぜできたのかを振り返るためにアルゴリズムや処理の流れについて考える。
まず、対象とする入力データからNANデータを加えてから共通区間を導くという考えである。
では、NANデータを加えるまでの流れを述べていく。
入力データは
* センサデータは{time, accx, accy, accz, gyrx, gyry, gyrz}が揃っている
* また、センサデータはすべて整数型となっている。
* nanデータはない

よって、まずはtimeで前後の差がサンプリング周期と異なるもの(今回はサンプリング周期は１０msとしている。）
インデックスを探す。そして、そのときのインデックスと前後の差を算出する。
つぎに、その算出したものを使って加速度、角速度データはNANを加える。時計には差の文だけインデックスを追加する。
そして、{timeData,nan,nan,nan,nan,nan,nan}というような行を加える。
次に、共通区間のスタート、ゴールを探す。このやり方は今までできたやり方で行ったのでここでは省略。
そして、共通区間のスタート、ゴールのデータを抽出できた。


In [13]:
ls

11411715-20160617-103037714.csv  14011714-20160617-103037714.csv  settings.sgu
14011711-20160617-103037714.csv  14011716-20160617-103037714.csv  settings.sse
14011712-20160617-103037714.csv  14011717-20160617-103037714.csv  [0m[01;35mvideo1.wmv[0m
14011713-20160617-103037714.csv  [01;34mmem[0m/                             [01;35mvideo2.wmv[0m


In [14]:
DataCsv1= ImportCSV('14011711-20160617-103037714.csv',1)
DataCsv2= ImportCSV('14011712-20160617-103037714.csv',1)
DataCsv3= ImportCSV('14011713-20160617-103037714.csv',1)
DataCsv4= ImportCSV('14011714-20160617-103037714.csv',1)
DataCsv5= ImportCSV('11411715-20160617-103037714.csv',1)
DataCsv6= ImportCSV('14011716-20160617-103037714.csv',1)
DataCsv7= ImportCSV('14011717-20160617-103037714.csv',1)

In [15]:
DataCsv1

{'AccX': array([ 0.0043,  0.0021, -0.0027, ...,  0.0493,  0.0568,  0.0534]),
 'AccY': array([-0.0268, -0.0263, -0.0342, ...,  0.0286,  0.0333,  0.0345]),
 'AccZ': array([ 0.9306,  0.8986,  0.9028, ...,  0.9223,  0.9221,  0.9248]),
 'GyrX': array([ 1.11,  0.91,  0.92, ...,  0.59,  0.5 ,  0.45]),
 'GyrY': array([-0.6 , -0.57, -0.69, ..., -0.85, -0.89, -0.86]),
 'GyrZ': array([-0.31, -0.31, -0.34, ..., -0.35, -0.29, -0.45]),
 'Time': array([37838721, 37838722, 37838723, ..., 37969461, 37969462, 37969463])}

In [16]:
def SearchNAN(array,freq):
    diffNum =np.array([])
    diffIndex=np.array([])
    width = len(array)
    for i in range(0,width-1):
        if ( array[i+1]-array[i] )!=freq:
            diffNum=np.append(diffNum, int(array[i+1]-array[i]) )
            diffIndex=np.append(diffIndex,i)
    print 'diffNum='+str(diffNum)
    print 'diffIndex'+str(diffIndex)

In [17]:
SearchNAN(DataCsv1['Time'],1)

diffNum=[   38.   874.     4.  1014.    26.   963.   554.   326.   663.   200.
   707.   255.   616.   338.   736.   290.   843.   346.   616.   341.
   553.   341.   605.   358.   719.   414.   603.   377.   667.   427.
   687.   396.   609.   349.   919.   305.   831.   513.  1085.   780.
  1175.   445.   978.   382.   762.   376.   911.   343.   792.   510.
   857.   378.  1266.  1033.  1038.   783.   751.   740.   981.   400.
   733.   524.   545.   541.  1051.   276.   853.   560.   834.   373.
   762.   533.   573.   631.   691.   548.   423.   566.   764.   518.
   724.   332.   596.   613.   509.   505.   432.   512.   644.  1237.
   915.   899.  1394.   859.   877.   931.  1177.   962.  1112.   774.
  1271.  1134.  1209.  1143.   857.  1033.  1473.  1140.   829.   847.
   928.   883.  1403.   902.   810.   935.  1268.  1169.   906.  1143.
  2187.  1395.   826.   531.   599.   503.   564.   517.   111.  1040.
     4.  1068.    89.  1277.    15.  1106.    64.  1060.  1177.   156

In [18]:
DataCsv1['Time'][309]

37839030

In [19]:
DataCsv1['Time'][310]

37839068

In [20]:
MemDataCsv1= ImportCSV('mem/mem-14011711-20160617-103038606.csv',1)
MemDataCsv2= ImportCSV('mem/mem-14011712-20160617-103038627.csv',1)
MemDataCsv3= ImportCSV('mem/mem-14011713-20160617-103038690.csv',1)
MemDataCsv4= ImportCSV('mem/mem-14011714-20160617-103038731.csv',1)
MemDataCsv5= ImportCSV('mem/mem-11411715-20160617-103038767.csv',1)
MemDataCsv6= ImportCSV('mem/mem-14011716-20160617-103038807.csv',1)
MemDataCsv7= ImportCSV('mem/mem-14011717-20160617-103037850.csv',1)

In [21]:
SearchNAN(DataCsv1['Time'],1)

diffNum=[   38.   874.     4.  1014.    26.   963.   554.   326.   663.   200.
   707.   255.   616.   338.   736.   290.   843.   346.   616.   341.
   553.   341.   605.   358.   719.   414.   603.   377.   667.   427.
   687.   396.   609.   349.   919.   305.   831.   513.  1085.   780.
  1175.   445.   978.   382.   762.   376.   911.   343.   792.   510.
   857.   378.  1266.  1033.  1038.   783.   751.   740.   981.   400.
   733.   524.   545.   541.  1051.   276.   853.   560.   834.   373.
   762.   533.   573.   631.   691.   548.   423.   566.   764.   518.
   724.   332.   596.   613.   509.   505.   432.   512.   644.  1237.
   915.   899.  1394.   859.   877.   931.  1177.   962.  1112.   774.
  1271.  1134.  1209.  1143.   857.  1033.  1473.  1140.   829.   847.
   928.   883.  1403.   902.   810.   935.  1268.  1169.   906.  1143.
  2187.  1395.   826.   531.   599.   503.   564.   517.   111.  1040.
     4.  1068.    89.  1277.    15.  1106.    64.  1060.  1177.   156

In [22]:
SearchNAN(MemDataCsv2['Time'],1)

diffNum=[]
diffIndex[]


In [23]:
SearchNAN(MemDataCsv3['Time'],1)

diffNum=[]
diffIndex[]


In [24]:
SearchNAN(MemDataCsv4['Time'],1)

diffNum=[]
diffIndex[]


In [25]:
SearchNAN(MemDataCsv5['Time'],1)

diffNum=[]
diffIndex[]


In [26]:
SearchNAN(MemDataCsv5['Time'],1)

diffNum=[]
diffIndex[]


In [27]:
SearchNAN(MemDataCsv6['Time'],1)

diffNum=[]
diffIndex[]


In [28]:
SearchNAN(MemDataCsv7['Time'],1)

diffNum=[]
diffIndex[]


**2016-06-18**  
必要な前処理を見極める

In [29]:
print MemDataCsv1['Time'][0]
print MemDataCsv2['Time'][0]
print MemDataCsv3['Time'][0]
print MemDataCsv4['Time'][0]
print MemDataCsv5['Time'][0]
print MemDataCsv6['Time'][0]
print MemDataCsv7['Time'][0]

37838721
37838742
37838805
37838846
37838882
37838922
37837965


In [33]:
print DataCsv1['Time'][0]
print DataCsv2['Time'][0]
print DataCsv3['Time'][0]
print DataCsv4['Time'][0]
print DataCsv5['Time'][0]
print DataCsv6['Time'][0]
print DataCsv7['Time'][0]

37838721
37838742
37838805
37838846
37838882
37838922
37837965
