In [6]:
import os
import glob
import numpy as np
import pandas as pd
from datetime import datetime
import tsfresh 
from tsfresh import extract_features
import pickle
from tsfresh.utilities.dataframe_functions import impute
from sklearn.preprocessing import StandardScaler

In [7]:
def dynamic_tsfresh (output_id):

    #Changing Work Folder    
    # paths
    raspberry_path = os.getcwd()
    kernel_path = raspberry_path + '/.Kernel'
    input_path = raspberry_path + '/Input'
    processed_path =  input_path + '/Processed_Data'
    
    # Change folder to Kernel
    
    #os.chdir( Kernel_path )

    # Load the the filtered features from the seed data-set

    features_filtered = pd.read_csv('{}/features_filtered_{}.csv'.format(kernel_path, output_id))

    # Extract the useful information of it

    columns = np.array(features_filtered.columns)
    kind_to_fc_parameters = tsfresh.feature_extraction.settings.from_columns(features_filtered.columns)

    sensors_names = [None] * int(features_filtered.shape[1]);


    for i in range (columns.shape[0]):
        name = columns[i]
        c = '__'
        words = name.split(c)

        sensors_names[i] = words[0]

        '''if i < 20:
            print(name)
            print(words)
            print(features_names[i])
            print(sensors_names[i])
            print('_______')'''

    columns = columns.tolist()
    unique_sensors_names = np.unique(np.array(sensors_names))

    # Change folder to Input

    #os.chdir( Input_path )

    # Load the incoming data
    
    all_files = glob.glob(processed_path + '/new_data_*.csv')
    latest_csv = max(all_files, key=os.path.getctime)
    Data = np.genfromtxt(latest_csv, delimiter=',')
    data_frame = pd.DataFrame(Data[:,0:8], columns= ['id','time'] + ['Sensor_' + str(x) for x in range(1,(Data.shape[1]-1))])

    # Feature extraction guided by the seed data-set

    extraction_df = pd.DataFrame(data_frame.loc[::,'id':unique_sensors_names[0]].values,columns= ['id','time','Sensor'])
    #print(extraction_df.head())
    arrayList = [] 

    for sensor in unique_sensors_names:
        
        #print(extraction_df.head())
        #print('_____')
        extraction_df.loc[::,'Sensor'] = data_frame.loc[::,sensor]
        
        #print(extraction_df.head())
        #print('_____')
        
        extraction_df = extraction_df.rename(columns={'Sensor': sensor})
        
        tsfresh_parameters = kind_to_fc_parameters[sensor]
        
        extracted_features = extract_features(extraction_df, column_id="id", column_sort="time", default_fc_parameters=tsfresh_parameters)

        arrayList.append(extracted_features)

        extraction_df = extraction_df.rename(columns={sensor : 'Sensor'})    

    original_space_features = pd.concat(arrayList,axis=1)

    # Sort the features in accordance with the seed data-set
    
    original_space_features = original_space_features[columns]
    impute(original_space_features)
    original_space_features.sort_index(inplace = True)

    # Change folder to origin
    
    #os.chdir( base_path )
    
    return original_space_features

In [8]:
def PCA_projection (features):
    
    #Changing Work Folder
    # paths
    raspberry_path = os.getcwd()
    kernel_path = raspberry_path + '/.Kernel'
        
    # Now change to PCA Figures directory

    #os.chdir( Kernel_path )

    # load the model from disk
    loaded_pca = pickle.load(open('{}/pca.sav'.format(kernel_path), 'rb'))

    scaler = StandardScaler().fit(features)
    features_padronizadas = scaler.transform(features)

    features_reduzidas = loaded_pca.transform(features_padronizadas)
    
    #print('Filtered Features')
    #print('-' * 20)
    #print(np.size(features_padronizadas,0))
    #print(np.size(features_padronizadas,1))
    #print('-' * 20)
    #print('Reduced Features')
    #print('-' * 20)
    #print(np.size(features_reduzidas,0))
    #print(np.size(features_reduzidas,1))
    
    # Now chance to base directory
    
    #os.chdir( base_path )
    
    return features_reduzidas

In [None]:
def Model_Predict (projected_data):
    #Changing Work Folder 
    # paths
    raspberry_path = os.getcwd()
    kernel_path = raspberry_path + '/.Kernel'
    input_path = raspberry_path + '/Input'
    processed_path =  input_path + '/Processed_Data'  
    classification_path = raspberry_path + '/Classification'
    
    # Now change to Kernel directory
    
    #os.chdir( Kernel_path )
    
    model = pickle.load(open('{}/model.sav'.format(kernel_path), 'rb'))
    
    target = []

    for i in range (projected_data.shape[0]):
        
        y_predict = model.predict(projected_data[i,:].reshape(1, -1))
    
        if y_predict[0] == 0:
            print('Ferramenta Boa')
        else:
            print('Ferramenta Ruim')

        target.append(y_predict[0])
        #print ('Label de Teste: %d' % int (projected_data[i]))
        print ('Label dado pela NN: %d' % int (y_predict[0]))
        print('___________________')
        print('                   ')
    
    
    
    # Now change to the base directory
    
    #os.chdir( base_path )

    return y_predict

In [10]:
#### CODIGO PARA RODAR EM TEMPO REAL ####
# Antes de rodar esse codigo deve rodar a o 'Unified_Code.ipynb' presente nessa mesma pagina para treinar e salvar o modelo

output_id = 50 # id da base de dados utilizada como seed 

# paths
raspberry_path = os.getcwd()
kernel_path = raspberry_path + '/.Kernel' # path onde estara salvo o modelo
input_path = raspberry_path + '/Input' # path onde ira buscar as timeseries brutas, onde o codigo do Marcos esta salvando
processed_path =  input_path + '/Processed_Data' # path onde estara salvando as timeseries processadas para entrar no modelo
classification_path = raspberry_path + '/Classification' # path onde esta salvando um csv com as classificações

N = 250 # numero de leituras para cada serie temporal
time_id = np.arange(1,N+1) # vetor sequencial para o id de tempo da serie temporal

In [12]:
old_csv = 0 # variavel de controle para saber se existe leituras novas
header = 0 # modificar depois de ter o contato com o Marcos, por enquanto os testes estão sendo feitos com o .csv que chega sem header

#try:
if True:# coloquei esse if True para testar, ja que o ipynb não reconhece o ctrl+C para encerrar o while True
    while True:
        try:
            # procura o arquivo .csv mais recente
            all_files = glob.glob(input_path + '/*.csv')
            latest_csv = max(all_files, key=os.path.getctime)
            existe_csv = True
        except:
            existe_csv = False # caso não seja encontrado nenhum arquivo .csv na pasta, não entrara no if

        # TALVEZ SEJA BOM COLOCAR UM TIME.SLEEP() AQUI, MAS ISSO PODE ACABAR DEIXANDO O MODELO LENTO

        # verifica se esse arquivo .csv mais recente ja foi verificado antes
        if latest_csv != old_csv and existe_csv:
            old_csv = latest_csv # modifica a variavel de controle old_csv para verificar nas proximas leituras
            raw_data = np.genfromtxt(latest_csv, skip_header=header, delimiter=',') # recebe em formato de np.array o .csv mais recente
            raw_data = raw_data[:,0:6] # como esta usando só 6 canais, mantem só as 6 primeiras colunas

            rows, columns = raw_data.shape 
            data = np.zeros((rows, 8)) # a partir do tamanho dos dados brutos cria um novo array para os dados processados

            # preenche as duas primeiras colunas desse novo array com o id da serie temporal e como o time_id
            # rows/N deve sempre um numero inteiro, ja que as series temporais salvas devem ter todas o mesmo tamanho N
            for i in range(int(rows/N)):
                data[i*N:(i+1)*N,0] = N*[i+1]
                data[i*N:(i+1)*N,1] = time_id
                    
            for i in range(6):
                num = 2*(raw_data[:,i] - raw_data[:,i].min())
                den = (raw_data[:,i].max() - raw_data[:,i].min())
                data[:,i+2] = num/den - 1 # os dados brutos dos 6 primeiros canais são passados para o array com os dados normalizados

            # salva o array formatado na pasta input
            now = datetime.now()
            timestr = now.strftime("%Y-%m-%d__%H-%M-%S")  
            np.savetxt(processed_path+'/new_data_{}.csv'.format(timestr), data, delimiter=',')
            
            ### -------- final da formatação -------- ###
            
            ### MODELO EM TEMPO REAL ###
            features = dynamic_tsfresh(output_id)
            projected_data = PCA_projection(features)
            target = Model_Predict(projected_data)


            # salva os targets utilizando a mesma data para caso queira comparar depois
            np.savetxt(classification_path+'/target_{}.csv'.format(timestr), target, delimiter=',')


#except KeyboardInterrupt: # finaliza o loop infinito com ctrl+C
#    print("EOF")     

c:\Users\mathe\Documents\GitHub\Lathes_Tool_Project\RaspberryPi\Model/Input\test.csv
Feature Extraction: 100%|██████████| 20/20 [00:03<00:00,  5.24it/s]
Feature Extraction: 100%|██████████| 20/20 [00:04<00:00,  4.36it/s]
Feature Extraction: 100%|██████████| 20/20 [00:05<00:00,  3.90it/s]
Feature Extraction: 100%|██████████| 20/20 [00:05<00:00,  3.51it/s]
Feature Extraction: 100%|██████████| 20/20 [00:06<00:00,  3.23it/s]
Feature Extraction: 100%|██████████| 20/20 [00:06<00:00,  2.95it/s]
Ferramenta Boa
Label dado pela NN: 0
___________________
                   
Ferramenta Boa
Label dado pela NN: 0
___________________
                   
Ferramenta Boa
Label dado pela NN: 0
___________________
                   
Ferramenta Boa
Label dado pela NN: 0
___________________
                   
Ferramenta Boa
Label dado pela NN: 0
___________________
                   
Ferramenta Boa
Label dado pela NN: 0
___________________
                   
Ferramenta Boa
Label dado pela NN: 0
________