In [1]:
import numpy as np
import  pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.utils import pad_sequences
from sklearn.metrics import classification_report
from plot_keras_history import plot_history
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib notebook

In [2]:
data=pd.read_csv('DATA.csv')

In [3]:
data.drop(columns=['Unnamed: 0'],axis=1,inplace=True)

In [4]:
data['label']=data['label'].replace(2,1)

In [5]:
X=data['combined_result']
y=data['label']
time_series=data['t_dist']

In [6]:
std_X = X.std()
desired_snr = 0.1
std_noise = std_X * desired_snr
noise = np.random.normal(0, std_noise, X.shape)

In [7]:
data['noisy_feature_column'] = X + noise
noisy_signal=data['noisy_feature_column']

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [15]:
noisy_signal=noisy_signal.to_frame()

In [16]:
trf1=ColumnTransformer(transformers=[
    ('Scaling',MinMaxScaler(feature_range=(0.0, 10.789)),[0])
],remainder='passthrough')

In [17]:
pipe = Pipeline([
    ('trf1', trf1)
])

In [18]:
X_transformed=pipe.fit_transform(noisy_signal)

In [21]:
print(X_transformed.shape)

(299995, 1)


In [22]:
X_transformed_series = pd.Series(X_transformed[:, 0], name='Transformed Data')

In [31]:
X_transformed_series.min()

0.0

In [28]:
# Combining X_transformed and y to Data Frame for Illustration
df=pd.DataFrame({"Transformed-Data":X_transformed_series,"Labels":y})
df.head(10)

Unnamed: 0,Transformed-Data,Labels
0,0.058865,1.0
1,0.039261,1.0
2,0.06365,1.0
3,0.072294,1.0
4,0.046934,1.0
5,0.051019,1.0
6,0.051612,1.0
7,0.041134,1.0
8,0.062973,1.0
9,0.070713,1.0


In [29]:
df.to_csv('Transformed-Data.csv')

In [32]:
# Extracting sequences from the dataset
sequences=[]
labels=[]
temp=[]

for index,value in y.items():
    if value==1:
        temp.append(X_transformed_series[index])
        if (index+1)<len(y) and y[index+1]==0:
            sequences.append(temp)
            labels.append(1)
            temp=[]
    if value==0:
        temp.append(X_transformed_series[index])
        if (index+1)<len(y) and y[index+1]==1:
            sequences.append(temp)
            labels.append(0)
            temp=[]

In [33]:
# Padding and reshaping
X_train=pad_sequences(sequences,padding='post',truncating='post',dtype=float,maxlen=50)

In [34]:
y_train = np.array(labels)
y_train = y_train.reshape(-1, 1)

In [35]:
X_train=X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_train.shape

(1449, 50, 1)

In [50]:
print(X_train[90])

[[0.08125645]
 [0.0358537 ]
 [0.0388303 ]
 [0.05880582]
 [0.10365053]
 [0.06793716]
 [0.04521443]
 [0.0408228 ]
 [0.07437759]
 [0.04560736]
 [0.05141161]
 [0.05392243]
 [0.05161588]
 [0.04900899]
 [0.04284856]
 [0.06524419]
 [0.05475102]
 [0.05840395]
 [0.0535378 ]
 [0.04324242]
 [0.04984779]
 [0.04974488]
 [0.06637587]
 [0.06025826]
 [0.04266726]
 [0.05787961]
 [0.05824339]
 [0.04649168]
 [0.03594983]
 [0.08872858]
 [0.04454339]
 [0.05573022]
 [0.0573265 ]
 [0.08281195]
 [0.05781499]
 [0.05177692]
 [0.07175849]
 [0.08091755]
 [0.05370321]
 [0.0574853 ]
 [0.06051083]
 [0.07675408]
 [0.05575863]
 [0.04899016]
 [0.0652386 ]
 [0.08198168]
 [0.05288592]
 [0.06027841]
 [0.04917005]
 [0.04905803]]


In [59]:
# It is because Our each  sequence has max laength of 50 and in output we want to predict a label
Tx=50
Ty=1
repeator = tf.keras.layers.RepeatVector(Tx)
concatenator = tf.keras.layers.Concatenate(axis=-1)
densor1 =tf.keras.layers. Dense(10, activation = "tanh")
densor2 = tf.keras.layers.Dense(1, activation = "relu")
activator = tf.keras.layers.Activation('softmax', name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor =tf.keras.layers. Dot(axes = 1)

In [60]:
def one_step_attention(a,s_prev):
    # We done this to change s_prev to shape of(m,Tx,n_s) for cocatination with a, because concatenation requires all dimension same accept concat axis in this 
    # case is last one so a and s_prev have same first two dims (m,Tx) but last one is changed
    s_prev=repeator(s_prev)
    # We will here concatenate a and s_prev
    concat=concatenator([a,s_prev])
    # here i will calculate energies with 2 dense layers
    e=densor1(concat)
    energies=densor2(e)
    # we know alpha is softmax of this energy
    alpha=activator(energies)
    # to calculate context vector we take dot product of alpha and a
    context_vector=dotor([alpha,a])
    return context_vector

In [61]:
n_a = 32 # number of units for the pre-attention, bi-directional LSTM's hidden state 'a'
n_s = 64 # number of units for the post-attention LSTM's hidden state "s"

# Please note, this is the post attention LSTM cell.
post_activation_LSTM_cell = tf.keras.layers.LSTM(n_s, return_state = True) # Please do not modify this global variable.
output_layer = tf.keras.layers.Dense(1, activation='sigmoid')

In [62]:
def modelf(Tx,Ty,n_a, n_s):
    X=tf.keras.layers.Input(shape=(X_train.shape[1],1)) # because embedding layer only demands the sequence length if i give full shape like (m,Tx) the
    # ouput of embedding layer will be 4D which can not be fed into BILSTM
    # hidden state for post LSTM
    s0 = tf.keras.layers.Input(shape=(n_s,), name='s0')
    # cell state for post lstm
    # because we know From CampusX that shape of hidden and cell state of lstm are equal
    c0 = tf.keras.layers.Input(shape=(n_s,), name='c0')
    s=s0
    c=c0
    outputs = []

    a = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(n_a,return_sequences=True),input_shape=(X_train.shape[1],1))(X)

    for t in range(Ty):
        context=one_step_attention(a,s)
        _,s,c=post_activation_LSTM_cell(context,initial_state = [s,c] )
        out = output_layer(s)
        outputs.append(out)



    print(outputs)
    model=tf.keras.models.Model(inputs=[X,s0,c0],outputs=outputs)

    return model

In [63]:
model = modelf(Tx, Ty, n_a, n_s)

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'dense_8')>]


In [64]:
opt = tf.keras.optimizers.Adam(learning_rate=0.00001,beta_1=0.9,beta_2=0.999)#0.00001
model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer = opt, metrics = ['accuracy'])

In [65]:
# For Training Set
m=X_train.shape[0]
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))

In [66]:
model_history=model.fit([X_train, s0, c0], y_train, epochs=500, batch_size=100)#350

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [67]:
plot_history(model_history,show_standard_deviation=False,show_average=True)

<IPython.core.display.Javascript object>

(<Figure size 1000x500 with 2 Axes>,
 array([<Axes: title={'center': 'Loss'}, xlabel='Epochs', ylabel='Loss'>,
        <Axes: title={'center': 'Accuracy'}, xlabel='Epochs', ylabel='Accuracy'>],
       dtype=object))

In [68]:
#model.save_weights('Scikit-Learn-PipeLine-Model.h5')
model.load_weights('Scikit-Learn-PipeLine-Model.h5')

**Testing On 10 Min Set**

In [90]:
min_10_data=pd.read_csv('10_minutes_final_processed_gsmfloor_data5.csv')

In [91]:
min_10_data.drop(columns=['Unnamed: 0'],axis=1,inplace=True)
min_10_data.shape

(59999, 3)

In [92]:
min_10_data['label']=min_10_data['label'].replace(2,1)

In [93]:
x_values=min_10_data['combined_result']
y_values=min_10_data['label']

In [94]:
x_values.max()

3.148

In [95]:
std_X = x_values.std()
desired_snr = 0.1
std_noise = std_X * desired_snr
noise = np.random.normal(0, std_noise, x_values.shape)

In [96]:
min_10_data['noisy_feature_column'] = x_values + noise
noisy_signal=min_10_data['noisy_feature_column']

In [97]:
noisy_signal=noisy_signal.to_frame()

In [98]:
trf1=ColumnTransformer(transformers=[
    ('Scaling',MinMaxScaler(feature_range=(0.0, 3.14)),[0])
],remainder='passthrough')

In [99]:
pipe = Pipeline([
    ('trf1', trf1)
])

In [100]:
X_transformed=pipe.fit_transform(noisy_signal)

In [101]:
X_transformed_series = pd.Series(X_transformed[:, 0], name='Transformed Data')

In [102]:
event=[]
labe=[]
temp=[]
for index,value in y_values.items():
    if value==1:
        temp.append(X_transformed_series[index])
        if (index+1)<len(y_values) and y_values[index+1]==0:
            event.append(temp)
            labe.append(1)
            temp=[]
    if value==0:
        temp.append(X_transformed_series[index])
        if (index+1)<len(y_values) and y_values[index+1]==1:
            event.append(temp)
            labe.append(0)
            temp=[]

In [103]:
event=pad_sequences(event,padding='post',truncating='post',dtype=float,maxlen=50)
labe=np.array(labe)
labe=labe.reshape(-1,1)
event=event.reshape(event.shape[0], event.shape[1], 1)

In [104]:
# For 10 Min Data set
m_10=event.shape[0]
s0_10 = np.zeros((m_10, n_s))
c0_10 = np.zeros((m_10, n_s))

In [105]:
predictions_10 = model.predict([event, s0_10, c0_10])
predictions_10 = np.round(predictions_10)



In [106]:
predictions_10_count=np.count_nonzero(predictions_10==1)
actual_10_count=np.count_nonzero(labe==1)
print(predictions_10_count/actual_10_count)

1.986206896551724


In [107]:
report = classification_report(predictions_10,labe)
print(report)

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         1
         1.0       0.99      0.50      0.67       288

    accuracy                           0.50       289
   macro avg       0.50      0.25      0.33       289
weighted avg       0.99      0.50      0.66       289
