# Importing necessery libraries

In [4]:
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
import glob
import pandas as pd
import utils

# Data info

In [5]:
data = pickle.load(open('separeted_db/inadequate_tools.pkl', "rb"))

df  = pd.DataFrame(data, columns=["ID",
                                "Time",
                                "Voltage A",
                                "Voltage B",
                                "Voltage C",
                                "current A",
                                "current B",
                                "current C"])

df.head()


Unnamed: 0,ID,Time,Voltage A,Voltage B,Voltage C,current A,current B,current C
0,16777.0,1.0,3.135681,2.657776,2.406921,1.834717,2.068481,1.734314
1,16777.0,2.0,3.135681,2.657776,2.406921,1.834717,2.068481,1.734314
2,16777.0,3.0,2.376404,2.355042,2.488403,1.972961,1.911316,1.852112
3,16777.0,4.0,2.064514,1.598511,2.168274,2.68158,1.673279,3.349304
4,16777.0,5.0,2.450256,2.393799,2.546692,2.945557,2.971497,2.659302


In [41]:
class db_reduction:

    def __init__(self,input_folder,data_output_folder='iteration_data/', target_output_folder='iteration_target/',n_iterations = 33,maintained_percent=0.3):
  
        self.file_list = glob.glob(input_folder+'*')
        self.file_list.sort()

        self.input_folder = input_folder
        self.target_output_folder = target_output_folder
        self.data_output_folder = data_output_folder
        self.maintained_percent = maintained_percent
        self.n_iterations = n_iterations

    def transform(self):

        bar = utils.ProgBar(len(self.file_list*self.n_iterations),"Reducing number of time series...")

        for i in range(self.n_iterations):
            for j,file in enumerate(self.file_list):

                data = pickle.load(open(file, "rb"))

                n_measures = int(np.max(data[:, 1]))
                n_ids = len(np.unique(data[:,0]))

                expanded_dimensions = data.reshape(n_ids, n_measures, 8)

                aux,reduced_data = train_test_split(expanded_dimensions,test_size=self.maintained_percent,
                                                    shuffle=False)
                
                n_ids = len(np.unique(reduced_data[:,0]))
                
                reduced_data = reduced_data.reshape(len(reduced_data)*n_measures, 8)

                del data
                del aux

                if j < 12:

                    if j == 0:

                        it_data = reduced_data

                        it_target = np.zeros(len(reduced_data))

                    else:

                        it_data = np.vstack((it_data,reduced_data))

                        target = np.zeros(len(reduced_data))
                        
                        it_target = np.hstack((it_target,target))


                elif j >= 12 and j < 14:

                    it_data = np.vstack((it_data,reduced_data))

                    target = np.ones(len(reduced_data))
                    
                    it_target = np.hstack((it_target,target))

                elif j >= 14:

                    it_data = np.vstack((it_data,reduced_data))

                    target = np.ones(len(reduced_data)) + 1
                    
                    it_target = np.hstack((it_target,target))

                bar.update()

            pickle.dump(it_data,
                        open(self.data_output_folder + "data_mainteined_percent__{}__it__{}.pkl".format(
                            self.maintained_percent,i
                            ), "wb"), 
                            )

            pickle.dump(it_target,
                        open(self.target_output_folder + "target_mainteined_percent__{}__it__{}.pkl".format(
                            self.maintained_percent,i
                            ), "wb"), 
                            )



In [42]:
tool = db_reduction('pickle/',n_iterations=10,maintained_percent=0.2)

In [43]:
tool.transform()

Reducing number of time series...
[-----                                             ] 10.00% - 16 of 160(12898816, 8)
(12898816,)
[----------                                        ] 20.00% - 32 of 160(12898816, 8)
(12898816,)
[---------------                                   ] 30.00% - 48 of 160(12898816, 8)
(12898816,)
[--------------------                              ] 40.00% - 64 of 160(12898816, 8)
(12898816,)
[-------------------------                         ] 50.00% - 80 of 160(12898816, 8)
(12898816,)
[------------------------------                    ] 60.00% - 96 of 160(12898816, 8)
(12898816,)
[-----------------------------------               ] 70.00% - 112 of 160(12898816, 8)
(12898816,)
[----------------------------------------          ] 80.00% - 128 of 160(12898816, 8)
(12898816,)
[---------------------------------------------     ] 90.00% - 144 of 160(12898816, 8)
(12898816,)
[--------------------------------------------------] 100.00% - 160 of 160(12898816, 8)
(12