In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
      try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2024)])
      except RuntimeError as e:
        print(e)

In [1]:
import pandas as pd
import matplotlib.pyplot as plot
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

#### Denoising the data:

In [2]:
# following code snippets were taken from https://www.kaggle.com/theoviel/fast-fourier-transform-denoising
from numpy.fft import *

def filter_signal(signal, threshold=1e3):
    fourier = rfft(signal)
    frequencies = rfftfreq(signal.size, d=20e-3/signal.size)
    fourier[frequencies > threshold] = 0
    return irfft(fourier)

In the above function the numpy's fft library(fourier tranform) was used and then frequncies were computed and then the inverse fourier transform was applied.

# Feature Engineering:

In [3]:
from pandas import concat

In [4]:
def new_features(dataset):
    dataset['resultant_orientation']=np.sqrt((dataset['orientation_X'])**2+(dataset['orientation_Y'])**2+(dataset['orientation_Z'])**2+(dataset['orientation_W'])**2)
    dataset['resultant_angular_velocity']=np.sqrt((dataset['angular_velocity_X'])**2+(dataset['angular_velocity_Y'])**2+(dataset['angular_velocity_Z'])**2)
    dataset['resultant_linear_acceleration']=np.sqrt((dataset['linear_acceleration_X'])**2+(dataset['linear_acceleration_Y'])**2+(dataset['linear_acceleration_Z'])**2)
    
    dataset['resultant_orientation_sum']=np.sqrt(((dataset['orientation_X'])**2+(dataset['orientation_Y'])**2+(dataset['orientation_Z'])**2+(dataset['orientation_W'])**2))/(dataset['orientation_X']+dataset['orientation_Y']+dataset['orientation_Z']+dataset['orientation_W'])
    dataset['resultant_angular_velocity_sum']=np.sqrt(((dataset['angular_velocity_X'])**2+(dataset['angular_velocity_Y'])**2+(dataset['angular_velocity_Z'])**2))/(dataset['angular_velocity_X']+dataset['angular_velocity_Y']+dataset['angular_velocity_Z'])
    dataset['resultant_linear_acceleration_sum']=np.sqrt(((dataset['linear_acceleration_X'])**2+(dataset['linear_acceleration_Y'])**2+(dataset['linear_acceleration_Z'])**2))/(dataset['linear_acceleration_X']+dataset['linear_acceleration_Y']+dataset['linear_acceleration_Z'])
    
    return dataset

In the above cell we calculated the resultant of each sensor feature by square rooting the sum of the square of the feaures coordinates.

In [5]:
def compute_statistical_features(X_data):
    #Drop unwanted features
#     output_X_data = X_data.drop(['surface','row_id','measurement_number','group_id'],axis=1)

    #Group by series 
    output_stat_series=pd.DataFrame()

    for col in X_data.columns:
        if col not in ['series_id', 'measurement_number','row_id']:
            output_stat_series[col+'_mean']=X_data.groupby('series_id')[col].mean()
            output_stat_series[col+'_median']=X_data.groupby('series_id')[col].median()
            output_stat_series[col+'_max']=X_data.groupby('series_id')[col].max()
            output_stat_series[col+'_min']=X_data.groupby('series_id')[col].min()
            output_stat_series[col+'_var']= X_data.groupby('series_id')[col].var()
            output_stat_series[col+'_std']= X_data.groupby('series_id')[col].std()
            output_stat_series[col+'_quant'] = X_data.groupby('series_id')[col].quantile()
            output_stat_series[col+'_skew']= X_data.groupby('series_id')[col].skew()
            output_stat_series[col + '_mad'] = X_data.groupby(['series_id'])[col].apply(lambda x: np.median(np.abs(np.diff(x))))
            output_stat_series[col + '_abs_max'] = X_data.groupby('series_id')[col].apply(lambda x: np.max(np.abs(x)))
            output_stat_series[col + '_abs_min'] = X_data.groupby('series_id')[col].apply(lambda x: np.min(np.abs(x)))
    series_ids = np.unique(X_data['series_id'])

    
    return output_stat_series #x_data_final,

In the above cell we used aggregate functions mean,var,std on the features orintaiton,angular velocity and linear acceleration.

above cell is the denoised data with original ,statistical and manually computed features.

In [6]:
from sklearn.metrics import f1_score

### Final Pipeline

In [7]:
import joblib

In [133]:
test_data_set=pd.read_csv('X_test_kaggle.csv')

In [134]:
X_test_denoised = test_data_set.copy()
# X_test_denoised=X_test_denoised.sample(n=1)
for col in test_data_set.columns:      
    if col[0:3] == 'ang' or col[0:3] == 'lin':
        # Apply filter_signal function to the data in each series
        denoised_data = test_data_set.groupby(['series_id'])[col].apply(lambda x: filter_signal(x))
        # Assign the denoised data back to X_train
        list_denoised_data = []
        for arr in denoised_data:
            for val in arr:
                list_denoised_data.append(val)
        X_test_denoised[col] = list_denoised_data

In [135]:
def Final_Pipeline(test_df):
#     X_test_denoised=X_test_denoised.sample(n=1)
    #randomly sampled one row and computed the features
    X_test_denoised=new_features(test_df)
    X_test_denoised=compute_statistical_features(X_test_denoised)
    final_model = joblib.load('lgbm_model_new.pkl')
    y_prob = final_model.predict(X_test_denoised)
    return y_prob

In [153]:
sample_data=X_test_denoised.sample(n=1)
y_pred=Final_Pipeline(sample_data)