<a href="https://colab.research.google.com/github/JohnTaco93/Foxconn-industrial-AI-Data-Challenge/blob/master/2.Feature_processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#In this kernel the vibration-current data is processing for cutting tool #1. There are 48 files in total that have to be joined.
#Also for each file, features will be created (mean, standard derivation, peak, Root Mean Square,  and Crest Factor)
#Conclusions made in the Exploratory Data Analysis are taken into account.

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import warnings
warnings.filterwarnings('ignore')
import datetime
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt 
import seaborn as sns


In [4]:
path_data = "/content/gdrive/My Drive/Foxconn2020-tool-wear/Data-Original/"

In [5]:
# Reading and joining all 48 vibration-current datasets
data01_sensor=pd.DataFrame()
for i in range(48):
  data_sensor=pd.read_csv(path_data+'/01-Training Data/01/Sensor/'+str(i+1)+'.csv')
  #there are signals that are outliers (10^3, 10^4, 10^5,10^(-5)), so the absolute value of vibration is limited to 30. 
  #Values that are higher than 30 are replaced by the mean of the vibration signal.
  data_sensor.loc[abs(data_sensor.vibration_2)>30,'vibration_2']=data_sensor.loc[abs(data_sensor.vibration_2)<30]["vibration_2"].mean()
  data_sensor.loc[abs(data_sensor.vibration_3)>30,'vibration_3']=data_sensor.loc[abs(data_sensor.vibration_3)<30]["vibration_3"].mean()
  #csv_no is the order of the dataset. There are 48 datasets in total
  data_sensor['csv_no']=i+1
  data01_sensor=data01_sensor.append(data_sensor, ignore_index=True)


In [6]:
data01_sensor.head()

Unnamed: 0,vibration_1,vibration_2,vibration_3,current,csv_no
0,0.024189,-0.013899,0.045651,-31.837665,1
1,0.043586,-0.012801,0.027327,-32.089322,1
2,0.038096,-0.000727,0.02977,-31.975896,1
3,0.032973,-0.014143,0.050782,-32.031778,1
4,0.037608,-0.027193,0.064097,-31.94601,1


In [7]:
data_sensor.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1548800 entries, 0 to 1548799
Data columns (total 5 columns):
 #   Column       Non-Null Count    Dtype  
---  ------       --------------    -----  
 0   vibration_1  1548800 non-null  float64
 1   vibration_2  1548800 non-null  float64
 2   vibration_3  1548800 non-null  float64
 3   current      1548800 non-null  float64
 4   csv_no       1548800 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 59.1 MB


In [8]:
def agg_vibr(df,aggs):
  df=  df.reset_index().groupby('csv_no').agg(aggs)
  df.columns = df.columns.map('_'.join)
  df=df.reset_index()  
  return df

In [9]:
#Creating features:  mean, standartd derivation and peak value for each csv_no 
aggs={'vibration_1': ['mean', 'std','max'],
 'vibration_2': ['mean', 'std','max'],
 'vibration_3': ['mean', 'std','max'],
 'current': ['mean', 'std','max']}

In [10]:
#Data Aggregation
data01_sensor_agg=agg_vibr(data01_sensor,aggs)

In [11]:
data01_sensor_agg.head()

Unnamed: 0,csv_no,vibration_1_mean,vibration_1_std,vibration_1_max,vibration_2_mean,vibration_2_std,vibration_2_max,vibration_3_mean,vibration_3_std,vibration_3_max,current_mean,current_std,current_max
0,1,0.032055,1.257466,10.526876,-0.013657,1.023135,9.440342,-0.008531,1.807261,29.979248,-11.138611,33.050992,68.091055
1,2,0.03386,1.815351,13.055378,-0.0092,1.575724,12.979081,0.012159,2.029509,28.497853,-0.319054,36.35598,59.412173
2,3,0.030849,1.904693,15.540936,-0.012517,1.649458,15.523182,0.013061,1.992761,15.933651,-0.330611,35.652936,58.783423
3,4,0.029016,1.872942,15.947795,-0.013855,1.653137,14.723691,-0.229712,3.973331,29.999038,-0.322389,35.504967,58.372149
4,5,0.03049,1.928085,14.67781,-0.012181,1.715352,16.262056,-0.346211,5.426306,29.999771,-0.316911,35.359442,58.205663


In [12]:
data01_sensor_agg.tail()

Unnamed: 0,csv_no,vibration_1_mean,vibration_1_std,vibration_1_max,vibration_2_mean,vibration_2_std,vibration_2_max,vibration_3_mean,vibration_3_std,vibration_3_max,current_mean,current_std,current_max
43,44,0.030124,2.614379,19.496066,-0.008376,2.424867,23.448816,0.003938,2.565022,29.838643,-0.390831,35.311493,59.556551
44,45,0.029789,2.67636,20.792646,-0.010998,2.395521,22.401367,-0.011678,2.602493,29.998916,-0.393054,35.290951,59.476836
45,46,0.03139,2.806505,22.521704,-0.008335,2.702773,22.102061,0.004186,2.785188,29.968132,-0.393852,35.387117,60.273169
46,47,0.032388,2.797463,21.481683,-0.007612,2.549905,27.342478,-0.022015,2.80383,29.97424,-0.390759,35.287985,59.855842
47,48,0.041799,0.022109,0.165339,0.003637,0.016938,0.082333,0.021226,0.02818,0.15816,-0.356437,0.07288,-0.014952


In [13]:
#Creating features for vibrational signals for each csv_no

#Root Mean Square
data01_sensor['vibration_1_sqr']=data01_sensor['vibration_1']**2
data01_sensor['vibration_2_sqr']=data01_sensor['vibration_2']**2
data01_sensor['vibration_3_sqr']=data01_sensor['vibration_3']**2
data01_sensor_agg['vibration_1_rms']=np.sqrt(data01_sensor.groupby('csv_no')['vibration_1_sqr'].mean()).values
data01_sensor_agg['vibration_2_rms']=np.sqrt(data01_sensor.groupby('csv_no')['vibration_2_sqr'].mean()).values
data01_sensor_agg['vibration_3_rms']=np.sqrt(data01_sensor.groupby('csv_no')['vibration_3_sqr'].mean()).values

#Kurtosis
data01_sensor_agg['vibration_1_kurt']=data01_sensor.groupby('csv_no')['vibration_1'].apply(pd.DataFrame.kurt).values
data01_sensor_agg['vibration_2_kurt']=data01_sensor.groupby('csv_no')['vibration_2'].apply(pd.DataFrame.kurt).values
data01_sensor_agg['vibration_3_kurt']=data01_sensor.groupby('csv_no')['vibration_3'].apply(pd.DataFrame.kurt).values

#Crest Factor
data01_sensor_agg['vibration_1_cest_fct']=data01_sensor_agg['vibration_1_max']/data01_sensor_agg['vibration_1_rms']
data01_sensor_agg['vibration_2_cest_fct']=data01_sensor_agg['vibration_2_max']/data01_sensor_agg['vibration_2_rms']
data01_sensor_agg['vibration_3_cest_fct']=data01_sensor_agg['vibration_3_max']/data01_sensor_agg['vibration_3_rms']

In [14]:
data01_sensor_agg.head()

Unnamed: 0,csv_no,vibration_1_mean,vibration_1_std,vibration_1_max,vibration_2_mean,vibration_2_std,vibration_2_max,vibration_3_mean,vibration_3_std,vibration_3_max,current_mean,current_std,current_max,vibration_1_rms,vibration_2_rms,vibration_3_rms,vibration_1_kurt,vibration_2_kurt,vibration_3_kurt,vibration_1_cest_fct,vibration_2_cest_fct,vibration_3_cest_fct
0,1,0.032055,1.257466,10.526876,-0.013657,1.023135,9.440342,-0.008531,1.807261,29.979248,-11.138611,33.050992,68.091055,1.257874,1.023226,1.807281,2.660817,3.224526,19.469639,8.368782,9.226058,16.58804
1,2,0.03386,1.815351,13.055378,-0.0092,1.575724,12.979081,0.012159,2.029509,28.497853,-0.319054,36.35598,59.412173,1.815666,1.575751,2.029545,1.305299,2.013373,0.776188,7.190407,8.23676,14.0415
2,3,0.030849,1.904693,15.540936,-0.012517,1.649458,15.523182,0.013061,1.992761,15.933651,-0.330611,35.652936,58.783423,1.904942,1.649505,1.992803,1.631363,2.612785,0.783387,8.158221,9.410812,7.995598
3,4,0.029016,1.872942,15.947795,-0.013855,1.653137,14.723691,-0.229712,3.973331,29.999038,-0.322389,35.504967,58.372149,1.873166,1.653195,3.979965,1.587242,2.599197,20.588895,8.513819,8.906205,7.537514
4,5,0.03049,1.928085,14.67781,-0.012181,1.715352,16.262056,-0.346211,5.426306,29.999771,-0.316911,35.359442,58.205663,1.928326,1.715395,5.437338,1.68632,2.740558,11.22365,7.611686,9.480066,5.517364


In [15]:
#saving the generated dataset
data01_sensor_agg.to_csv('/content/gdrive/My Drive/Foxconn2020-tool-wear/Notebooks/John/data01_sensor_agg.csv',index=False)