In [None]:
import tensorflow as tf
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from keras.layers import Layer
from keras.layers import Dense
from keras.models import Model, Sequential
from keras import regularizers
import subprocess
import sys
import os
import re
import atexit
import ipywidgets as widgets
import datetime as dt
from sklearn.preprocessing import OneHotEncoder

num_features = 10
input_shape = (num_features,)


input_data = tf.keras.Input(shape=input_shape)

hidden_layer = tf.keras.layers.Dense(units=64, activation=tf.nn.relu)(input_data)
output_layer = tf.keras.layers.Dense(units=num_features, activation=None)(hidden_layer)

model = tf.keras.Model(inputs=input_data, outputs=output_layer)

model.compile(optimizer='adam', loss='mean_squared_error')

num_samples = 1000
input_data = np.random.randn(num_samples, num_features)
output_data = input_data

model.fit(input_data, output_data, epochs=100, batch_size=32)




: 

In [None]:
dtype = {'DeviceID':str, 'Fault_Code_Type_1':str, 'Fault_Code_Type_2':str, 'Fault_Code_Type_3':str, 'Fault_Code_Type_4':str}

df = pd.read_csv('sampledata.csv', sep=',', dtype=dtype, parse_dates=['Date'], infer_datetime_format=True)

df = df.drop_duplicates(['DeviceID', 'Date'])


df = df.dropna(how='any', subset=['DeviceID', 'Date'])

df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

In [None]:
datetime_features = ['Date', 'Day', 'Month', 'Year']
categorical_features = ['DeviceID', 'Categorical_1', 'Categorical_2', 'Categorical_3', 'Categorical_4',
                        'Fault_Code_Type_1', 'Fault_Code_Type_2', 'Fault_Code_Type_3', 'Fault_Code_Type_4']
warning_type1_features = [feature for feature in df.columns if feature.startswith('Warning_1')]
warning_type2_features = [feature for feature in df.columns if feature.startswith('Warning_2')]
warning_features = warning_type1_features + warning_type2_features
numeric_features = list(set(df.columns) - set(datetime_features) - set(categorical_features))

values = dict(zip(numeric_features, np.zeros(len(numeric_features))))
df = df.fillna(value=values)

values = dict(zip(categorical_features, ['Unknown' for k in range(len(categorical_features))]))
df = df.fillna(value=values)

df.head()


In [None]:
df[numeric_features].describe()

In [None]:
for feature in numeric_features:
    df.loc[(df[feature] < 0), feature] = 0

df[numeric_features].describe()


In [None]:
def ExploratoryDataAnalysis(df, device, feature, start_date, end_date):
    if(device != None and feature != None):
        eda = df.loc[(df['DeviceID'] == device)]

        from_date = start_date if start_date != None else FIRST_DATE
        to_date = end_date if end_date != None else LAST_DATE

        if(pd.date_range(from_date, to_date).size > 0):
            eda = eda[eda['Date'].isin(pd.date_range(from_date, to_date))]

            eda.plot(x='Date', y=feature)

def f(device, feature, start_date, end_date):
    ExploratoryDataAnalysis(df, device, feature, start_date, end_date)

deviceID_selector = widgets.Dropdown(options=df.DeviceID.unique(), value=None, description='Device ID:', disabled=False)
feature_selector = widgets.Dropdown(options=numeric_features, value='Usage_Count_1', description='Feature:', disabled=False)
start_date_selector = widgets.DatePicker(value=FIRST_DATE, description='From:', disabled=False)
end_date_selector = widgets.DatePicker(value=LAST_DATE, description='To:', disabled=False)

w = widgets.interactive(f, device=deviceID_selector, feature=feature_selector,
                        start_date=start_date_selector, end_date=end_date_selector)

left_box = widgets.VBox([w.children[0], w.children[1]])
right_box = widgets.VBox([w.children[2], w.children[3]])
controls = widgets.HBox([left_box, right_box])
output = w.children[-1]
display(widgets.VBox([controls, output]))



In [None]:
def ExploratoryDataAnalysis2(df, device, feature_x, feature_y, start_date, end_date):
    if(device != None and feature_x != None and feature_y != None):
        eda = df.loc[(df['DeviceID'] == device)]

        from_date = start_date if start_date != None else FIRST_DATE
        to_date = end_date if end_date != None else LAST_DATE

        if(pd.date_range(from_date, to_date).size > 0):
            eda = eda[eda['Date'].isin(pd.date_range(from_date, to_date))]

            eda.plot.scatter(x=feature_x, y=feature_y)

def f2(device, feature_x, feature_y, start_date, end_date):
    ExploratoryDataAnalysis2(df, device, feature_x, feature_y, start_date, end_date)

deviceID_selector = widgets.Dropdown(options=df.DeviceID.unique(), value=None, description='Device ID:', disabled=False)
feature_x_selector = widgets.Dropdown(options=numeric_features, value='Usage_Count_1', description='X-Feature:', disabled=False)
feature_y_selector = widgets.Dropdown(options=numeric_features, value='Usage_Count_2', description='Y-Feature:', disabled=False)
start_date_selector = widgets.DatePicker(value=FIRST_DATE, description='From:', disabled=False)
end_date_selector = widgets.DatePicker(value=LAST_DATE, description='To:', disabled=False)

w = widgets.interactive(f2, device=deviceID_selector, feature_x=feature_x_selector, feature_y=feature_y_selector,
                        start_date=start_date_selector, end_date=end_date_selector)

left_box = widgets.VBox([w.children[0]])
center_box = widgets.VBox([w.children[1], w.children[2]])
right_box = widgets.VBox([w.children[3], w.children[4]])
controls = widgets.HBox([left_box, center_box, right_box])
output = w.children[-1]
display(widgets.VBox([controls, output]))

In [None]:
def ExploratoryDataAnalysis3(df, feature):
    eda = pd.pivot_table(df, values=feature, index='Year', columns='Month', aggfunc=np.sum)
    plt.figure(figsize = (16,4))
    plt.title('Cumulative Feature by Month and Year', fontsize=20)
    sns.heatmap(eda, cmap="YlGnBu", annot=True, fmt='.0f')

def f3(feature):
    ExploratoryDataAnalysis3(df, feature)

feature_selector = widgets.Dropdown(options=numeric_features, value='ProblemReported', description='Feature:', disabled=False)

w = widgets.interactive(f3, feature=feature_selector)
display(w)

In [None]:
df_corr = df[['Error_Count_1', 'Error_Count_2', 'Error_Count_3', 'Error_Count_4',
              'Error_Count_5', 'Error_Count_6', 'Error_Count_7', 'Error_Count_8', 'ProblemReported',
            'Problem_Type_1', 'Problem_Type_2', 'Problem_Type_3', 'Problem_Type_4', 'Usage_Count_1', 'Usage_Count_2']]
corr = df_corr.corr()
plt.figure(figsize = (16,4))
plt.title('Correlation matrix between numerical features', fontsize=20)
sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values, cmap="YlGnBu", annot=True, fmt='.2f')




In [None]:
df['Warning_Type_1'] = sum(df[feature] for feature in warning_type1_features)
df['Warning_Type_2'] = sum(df[feature] for feature in warning_type2_features)


In [None]:
#pt1 = pd.pivot_table(df, values=['Categorical_1', 'Categorical_2', 'Categorical_3', 'Categorical_4'],
#                     columns='DeviceID', aggfunc=np.std)
#plt.figure(figsize = (16,4))
#sns.heatmap(pt1)


In [None]:

df_cat = df[['Categorical_1', 'Categorical_2', 'Categorical_3', 'Categorical_4']].describe()
df_cat.round(1)


In [None]:
df['Categorical_1_Grouped'] = pd.qcut(df['Categorical_1'], 4, labels=['LL', 'L', 'H', 'HH'])
df['Categorical_2_Grouped'] = pd.qcut(df['Categorical_2'], 4, labels=['LL', 'L', 'H', 'HH'])
df['Categorical_3_Grouped'] = pd.qcut(df['Categorical_3'], 3, labels=['LL', 'L', 'H'])
df['Categorical_4_Grouped'] = pd.qcut(df['Categorical_4'], 4, labels=['LL', 'L', 'H', 'HH'])



In [None]:
df_cat = df[['Categorical_1_Grouped', 'Categorical_2_Grouped', 'Categorical_3_Grouped', 'Categorical_4_Grouped']].describe()
df_cat.round(1)


In [None]:
df_pbm = df[['Problem_Type_1', 'Problem_Type_2', 'Problem_Type_3', 'Problem_Type_4']].describe()
df_pbm.round(1)

In [None]:
df_err = df[['Error_Count_1', 'Error_Count_2', 'Error_Count_3', 'Error_Count_4',
            'Error_Count_5', 'Error_Count_6', 'Error_Count_7', 'Error_Count_8']].describe()
df_err.round(1)


In [None]:
df['Error_Count_5'] = [1 if x > 1 else x for x in df['Error_Count_5']]
df[['Error_Count_5']].describe().round(1)

In [None]:
df_flt = df[['Fault_Code_Type_1', 'Fault_Code_Type_2', 'Fault_Code_Type_3', 'Fault_Code_Type_4']].describe()
df_flt


In [None]:
df['Fault_Code_Type_1_Count'] = [0 if x == 'Unknown' else 1 for x in df['Fault_Code_Type_1']]
df['Fault_Code_Type_2_Count'] = [0 if x == 'Unknown' else 1 for x in df['Fault_Code_Type_2']]
df['Fault_Code_Type_3_Count'] = [0 if x == 'Unknown' else 1 for x in df['Fault_Code_Type_3']]
df['Fault_Code_Type_4_Count'] = [0 if x == 'Unknown' else 1 for x in df['Fault_Code_Type_4']]


In [None]:
df['Problem_Type_1_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Problem_Type_1'] / df['Usage_Count_1'])
df['Problem_Type_2_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Problem_Type_2'] / df['Usage_Count_1'])
df['Problem_Type_3_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Problem_Type_3'] / df['Usage_Count_1'])
df['Problem_Type_4_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Problem_Type_4'] / df['Usage_Count_1'])
df['Problem_Type_1_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Problem_Type_1'] / df['Usage_Count_2'])
df['Problem_Type_2_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Problem_Type_2'] / df['Usage_Count_2'])
df['Problem_Type_3_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Problem_Type_3'] / df['Usage_Count_2'])
df['Problem_Type_4_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Problem_Type_4'] / df['Usage_Count_2'])


In [None]:
df['Fault_Code_Type_1_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Fault_Code_Type_1_Count'] / df['Usage_Count_1'])
df['Fault_Code_Type_2_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Fault_Code_Type_2_Count'] / df['Usage_Count_1'])
df['Fault_Code_Type_3_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Fault_Code_Type_3_Count'] / df['Usage_Count_1'])
df['Fault_Code_Type_4_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Fault_Code_Type_4_Count'] / df['Usage_Count_1'])
df['Fault_Code_Type_1_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Fault_Code_Type_1_Count'] / df['Usage_Count_2'])
df['Fault_Code_Type_2_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Fault_Code_Type_2_Count'] / df['Usage_Count_2'])
df['Fault_Code_Type_3_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Fault_Code_Type_3_Count'] / df['Usage_Count_2'])
df['Fault_Code_Type_4_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Fault_Code_Type_4_Count'] / df['Usage_Count_2'])



In [None]:

df['Warning_Type_1_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Warning_Type_1'] / df['Usage_Count_1'])
df['Warning_Type_2_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df['Warning_Type_2'] / df['Usage_Count_1'])
df['Warning_Type_1_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Warning_Type_1'] / df['Usage_Count_2'])
df['Warning_Type_2_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df['Warning_Type_2'] / df['Usage_Count_2'])



In [None]:
ratio_features = [feature for feature in df.columns if 'Per_Usage' in feature]

for feature in ratio_features:
    df[feature + '_Grouped'] = ['>1' if x > 1 else '>0' if x > 0 else 'Zero' for x in df[feature]]

features_grouped = [feature for feature in df.columns if '_Grouped' in feature]
features_grouped.extend(['Error_Count_1', 'Error_Count_2', 'Error_Count_3', 'Error_Count_4',
                         'Error_Count_5', 'Error_Count_6', 'Error_Count_7', 'Error_Count_8'])
df_grp = df[features_grouped]
df_grp.head()



In [None]:
df_grp_ohe = pd.DataFrame()

for feature in df_grp.columns:
    temp = pd.get_dummies(df_grp[feature], prefix=feature)
    df_grp_ohe[temp.columns] = temp

df_grp_ohe.head()



In [None]:
df = df.drop([feature for feature in df.columns if '_Grouped' in feature], axis=1)

df = pd.concat([df, df_grp_ohe], axis=1)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

for feature in ratio_features:
    df[feature] = scaler.fit_transform(np.array(df[feature]).reshape(-1, 1))

df[[feature for feature in ratio_features]].describe()

In [None]:
df_ae_t1_u1 = pd.DataFrame()
df_ae_t1_u2 = pd.DataFrame()
df_ae_t2_u1 = pd.DataFrame()
df_ae_t2_u2 = pd.DataFrame()

for feature in warning_type1_features:
    df_ae_t1_u1[feature + '_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df[feature] / df['Usage_Count_1'])
    df_ae_t1_u2[feature + '_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df[feature] / df['Usage_Count_2'])

for feature in warning_type2_features:
    df_ae_t2_u1[feature + '_Per_Usage_1'] = np.where(df['Usage_Count_1'] == 0, 0, df[feature] / df['Usage_Count_1'])
    df_ae_t2_u2[feature + '_Per_Usage_2'] = np.where(df['Usage_Count_2'] == 0, 0, df[feature] / df['Usage_Count_2'])



In [None]:
scaler = MinMaxScaler()
for column in df_ae_t1_u1.columns:
    df_ae_t1_u1[column] = scaler.fit_transform(np.array(df_ae_t1_u1[column]).reshape(-1, 1))

for column in df_ae_t1_u2.columns:
    df_ae_t1_u2[column] = scaler.fit_transform(np.array(df_ae_t1_u2[column]).reshape(-1, 1))

for column in df_ae_t2_u1.columns:
    df_ae_t2_u1[column] = scaler.fit_transform(np.array(df_ae_t2_u1[column]).reshape(-1, 1))

for column in df_ae_t2_u2.columns:
    df_ae_t2_u2[column] = scaler.fit_transform(np.array(df_ae_t2_u2[column]).reshape(-1, 1))


In [None]:
from keras.models import Model
from keras.layers import Input, Dense

seed = 2019
np.random.seed(seed)
AE_FEATURES_NUM = 5

entry = Input(shape=(df_ae_t1_u1.shape[1],))
coder = Dense(AE_FEATURES_NUM, activation='relu', name='intermediate')(entry)
decoder = Dense(df_ae_t1_u1.shape[1], activation='relu')(coder)
autoencoder = Model(entry, decoder)

encoder1 = Model(autoencoder.input, autoencoder.get_layer('intermediate').output)

autoencoder.compile(optimizer='nadam', loss='mse', metrics=['accuracy'])
autoencoder.fit(df_ae_t1_u1, df_ae_t1_u1, epochs=3)

In [None]:
entry = Input(shape=(df_ae_t1_u2.shape[1],))
coder = Dense(AE_FEATURES_NUM, activation='relu', name='intermediate')(entry)
decoder = Dense(df_ae_t1_u2.shape[1], activation='relu')(coder)
autoencoder = Model(entry, decoder)

encoder2 = Model(autoencoder.input, autoencoder.get_layer('intermediate').output)

autoencoder.compile(optimizer='nadam', loss='mse', metrics=['accuracy'])
autoencoder.fit(df_ae_t1_u2, df_ae_t1_u2, epochs=3)

In [None]:
entry = Input(shape=(df_ae_t2_u1.shape[1],))
coder = Dense(AE_FEATURES_NUM, activation='relu', name='intermediate')(entry)
decoder = Dense(df_ae_t2_u1.shape[1], activation='relu')(coder)
autoencoder = Model(entry, decoder)

encoder3 = Model(autoencoder.input, autoencoder.get_layer('intermediate').output)

autoencoder.compile(optimizer='nadam', loss='mse', metrics=['accuracy'])
autoencoder.fit(df_ae_t2_u1, df_ae_t2_u1, epochs=3)



In [None]:
entry = Input(shape=(df_ae_t2_u2.shape[1],))
coder = Dense(AE_FEATURES_NUM, activation='relu', name='intermediate')(entry)
decoder = Dense(df_ae_t2_u2.shape[1], activation='relu')(coder)
autoencoder = Model(entry, decoder)

encoder4 = Model(autoencoder.input, autoencoder.get_layer('intermediate').output)

autoencoder.compile(optimizer='nadam', loss='mse', metrics=['accuracy'])
autoencoder.fit(df_ae_t2_u2, df_ae_t2_u2, epochs=3)



In [None]:
df = df.drop([feature for feature in warning_features], axis=1)

df_ae_t1_u1 = encoder1.predict(df_ae_t1_u1)
df_ae_t1_u1 = pd.DataFrame(df_ae_t1_u1, columns=['AE_T1U1_Warning_' + str(i + 1) for i in range(AE_FEATURES_NUM)])

df_ae_t1_u2 = encoder2.predict(df_ae_t1_u2)
df_ae_t1_u2 = pd.DataFrame(df_ae_t1_u2, columns=['AE_T1U2_Warning_' + str(i + 1) for i in range(AE_FEATURES_NUM)])

df_ae_t2_u1 = encoder3.predict(df_ae_t2_u1)
df_ae_t2_u1 = pd.DataFrame(df_ae_t2_u1, columns=['AE_T2U1_Warning_' + str(i + 1) for i in range(AE_FEATURES_NUM)])

df_ae_t2_u2 = encoder4.predict(df_ae_t2_u2)
df_ae_t2_u2 = pd.DataFrame(df_ae_t2_u2, columns=['AE_T2U2_Warning_' + str(i + 1) for i in range(AE_FEATURES_NUM)])

scaler = MinMaxScaler()
for column in df_ae_t1_u1.columns:
    df_ae_t1_u1[column] = scaler.fit_transform(np.array(df_ae_t1_u1[column]).reshape(-1, 1))

for column in df_ae_t1_u2.columns:
    df_ae_t1_u2[column] = scaler.fit_transform(np.array(df_ae_t1_u2[column]).reshape(-1, 1))

for column in df_ae_t2_u1.columns:
    df_ae_t2_u1[column] = scaler.fit_transform(np.array(df_ae_t2_u1[column]).reshape(-1, 1))

for column in df_ae_t2_u2.columns:
    df_ae_t2_u2[column] = scaler.fit_transform(np.array(df_ae_t2_u2[column]).reshape(-1, 1))

df = pd.concat([df, df_ae_t1_u1, df_ae_t1_u2, df_ae_t2_u1, df_ae_t2_u2], axis=1)


df.to_pickle('sampledata2.pkl')


In [None]:

#save the model
model.save('my_trained_model.keras')

In [None]:
#testing data
model = tf.keras.models.load_model('my_trained_model.keras')

test_data = pd.read_csv('Testingdata01.csv')

test_data_np = test_data.to_numpy()

predictions = model.predict(test_data_np)

reconstruction_errors = np.mean(np.square(test_data_np - predictions), axis=1)

threshold = 0.2

anomalies_indices = np.where(reconstruction_errors > threshold)[0]
print("Anomalies detected at indices:", anomalies_indices)




In [None]:
plt.figure(figsize=(10, 6))
plt.plot(reconstruction_errors, marker='o', linestyle='', color='r', label='Reconstruction Errors')
plt.axhline(y=threshold, color='b', linestyle='--', label='Threshold')
plt.title('Reconstruction Errors vs Data Indices')
plt.xlabel('Data Indices')
plt.ylabel('Reconstruction Errors')
plt.legend()
plt.show()



In [None]:
plt.figure(figsize=(10, 6))
plt.plot(reconstruction_errors, color='r', label='Reconstruction Errors')
plt.axhline(y=threshold, color='b', linestyle='--', label='Threshold')
plt.title('Reconstruction Errors Over Data Samples')
plt.xlabel('Data Samples')
plt.ylabel('Reconstruction Errors')
plt.legend()
plt.show()


In [None]:
df = pd.read_pickle('sampledata2.pkl')
ROLLING_DAYS = 3

In [None]:
for device in df.DeviceID.unique():
    reported_array = df.loc[(df.DeviceID == device), 'ProblemReported'].values
    reported_array = reported_array[::-1]
    problem_array = np.zeros(len(reported_array))
    daysto_array = np.zeros(len(reported_array))
    problem_found = False

    for i in range(len(reported_array)):

        if not problem_found:
            daysto_array[i] = -1
        else:
            daysto_array[i] = daysto_array[i - 1] + 1

        if reported_array[i] == 1:
            problem_found = True
            problem_array[i] = 1
            daysto_array[i] = 0

            for j in range(i + 1, i + ROLLING_DAYS):
                if j < len(reported_array):
                    problem_array[j] = 1

    problem_array = problem_array[::-1]
    df.loc[(df.DeviceID == device), 'Problem'] = problem_array

    daysto_array = daysto_array[::-1]
    df.loc[(df.DeviceID == device), 'DaysTo'] = daysto_array



In [None]:
train_no_anomalies = df.loc[(df.DaysTo >= 540)]
print(train_no_anomalies.shape)

test_no_anomalies = df.loc[(df.DaysTo >= 530) & (df.DaysTo < 540)]
print(test_no_anomalies.shape)

test_anomalies = df.loc[(df.ProblemReported == 1)]
print(test_anomalies.shape)


In [None]:
df_A = pd.concat([train_no_anomalies, test_no_anomalies, test_anomalies], ignore_index=True)
low_std = {}

for column in df_A.columns:
    try:
        if df_A[column].std() < 0.01:
            low_std[column] = df_A[column].std()
    except:
        None
df_A = df_A.drop([k for k in low_std.keys()], axis=1)

features = ['ProblemReported', 'DaysTo']
features.extend([feature for feature in df_A.columns if 'Error_' in feature and len(feature) == 13])
features.extend([feature for feature in df_A.columns if 'Fault_' in feature and len(feature) == 23])
features.extend([feature for feature in df_A.columns if 'Problem_' in feature and len(feature) == 26])
features.extend([feature for feature in df_A.columns if 'Warning_' in feature and len(feature) == 26])
features.extend([feature for feature in df_A.columns if 'AE_' in feature and len(feature) == 17])

In [None]:
df_A = df_A[features]
print(df_A.shape)

train_no_anomalies = df_A.loc[(df_A.DaysTo >= 540)]
train_no_anomalies = train_no_anomalies.drop(['ProblemReported', 'DaysTo'], axis=1)
print(train_no_anomalies.shape)

test_no_anomalies = df_A.loc[(df_A.DaysTo >= 530) & (df_A.DaysTo < 540)]
test_no_anomalies = test_no_anomalies.drop(['ProblemReported', 'DaysTo'], axis=1)
print(test_no_anomalies.shape)

test_anomalies = df_A.loc[(df_A.ProblemReported == 1)]
test_anomalies = test_anomalies.drop(['ProblemReported', 'DaysTo'], axis=1)
print(test_anomalies.shape)