In [1]:
## INTERACTIVE DATA VISUALIZATION OF ELECTRONIC NOSE: data preparation part
    # Data from:       [1] R. Huerta et al., Chemom. Intell. Lab. Syst. 157, 169-176 (2016).
    # Dowloaded from:  [2] UCI Machine Learning Repository:
    #                      https://archive.ics.uci.edu/ml/datasets/Gas+sensors+for+home+activity+monitoring 


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
## PREPARATION OF METADATA FILE

metadata = pd.read_table('HT_Sensor_metadata.dat', usecols=[0,2,3,4])
metadata.rename(columns={'Unnamed: 2': 'class', 'class': 't0', 't0': 'dt [h]'}, inplace=True)
    # Metadata consisit of following columns [2]:
        # id: identification of the induction maching the id column in dataset
        # t0: time in hours, in which the inductionn started(represents the time zero in file HT_Sensor_dataset.dat)
        # dt [h]: interval that this induction lasted

# Creating metadata_subsampled file with removed inductions, shorter then treshold = 10 min (i.e. ind_33, 76, 95)
inductions_sub = np.load('Dataset_Split10min_hashtable.npy', allow_pickle=True)
metadata_sub = metadata.copy()

to_delete = []

for ind in range(metadata.shape[0]):
    if int(metadata.iloc[ind, 0]) not in inductions_sub:
        to_delete.append(ind)

metadata_sub.drop(index=to_delete, inplace=True) 
metadata_sub.drop(columns=['id'], inplace=True)
metadata_sub.reset_index(drop=True, inplace=True)

metadata_sub.to_pickle('metadata_sub.pkl')
display(metadata_sub.head())

Unnamed: 0,class,t0,dt [h]
0,banana,13.49,1.64
1,wine,19.61,0.54
2,wine,19.99,0.66
3,banana,6.49,0.72
4,wine,20.07,0.53


In [3]:
## PREPARATION OF DATASET FILE

# Loading dataset with moving window
dataset_mv = np.load('Dataset_Split10min.npy', allow_pickle=True) 

# Transforming dataset_mv numpy array to pandas dataframe
dataset_pd = pd.DataFrame()

for ind in range(dataset_mv.shape[0]):
    data = dataset_mv[ind]
    dataset_ind = pd.DataFrame({
                               'id' : ind, 
                               'time' : np.arange(0, data.shape[0], 1), 
                               'R1' : data[:,0], 
                               'R2' : data[:,1], 
                               'R3' : data[:,2],
                               'R4' : data[:,3],
                               'R5' : data[:,4],
                               'R6' : data[:,5],
                               'R7' : data[:,6],
                               'R8' : data[:,7],
                               'Temp.' : data[:,8],
                               'Humidity' : data[:,9]
                          })
    
    dataset_pd = pd.concat([dataset_pd, dataset_ind], axis=0, ignore_index=True)

dataset_pd.to_pickle('dataset_pd.pkl')  

print(dataset_pd.shape)
display(dataset_pd.head())
display(dataset_pd.tail())

(3401, 12)


Unnamed: 0,id,time,R1,R2,R3,R4,R5,R6,R7,R8,Temp.,Humidity
0,0,0,12.810492,10.3664,10.453442,11.674575,13.494583,13.275242,8.305328,9.045543,26.422667,59.470167
1,0,1,12.354258,9.678809,8.329226,8.973556,7.940278,10.751592,4.836595,5.932837,26.524783,60.481617
2,0,2,12.393392,9.731926,8.536201,9.042302,8.126959,10.784767,4.80277,5.906617,26.525275,60.077433
3,0,3,12.432925,9.77114,8.39775,9.08899,7.429536,10.764858,4.788543,5.896372,26.506642,59.916733
4,0,4,12.460583,9.778931,7.932641,9.022253,6.724887,10.728892,4.773467,5.890961,26.545908,59.960725


Unnamed: 0,id,time,R1,R2,R3,R4,R5,R6,R7,R8,Temp.,Humidity
3396,96,25,12.580592,10.246433,10.4092,11.774558,12.193242,17.23205,6.185085,7.171213,27.516567,55.202433
3397,96,26,12.574267,10.257442,10.4224,11.788508,12.208475,17.225883,6.180369,7.168371,27.511092,55.095692
3398,96,27,12.562667,10.249067,10.416692,11.777117,12.198383,17.2312,6.170956,7.160426,27.588042,54.975267
3399,96,28,12.544067,10.247525,10.417825,11.788858,12.1979,17.23065,6.172762,7.159166,27.41545,55.3992
3400,96,29,12.552458,10.254075,10.432167,11.7939,12.210792,17.231167,6.172204,7.153398,27.5715,54.942858
