In [None]:
!pip install --upgrade pip
!pip install ibm-cos-sdk
!pip install matplotlib
!pip install pandas
!pip install numpy
!pip install scikit-learn


In [None]:
from os import listdir
from os.path import join
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from scipy import stats
from sklearn.decomposition import PCA

In [None]:
# Print versions
!python --version
print('Numpy ' + np.__version__)
print('Pandas ' + pd.__version__)

In [None]:
# Enable interactive matplotlib plots
#%matplotlib notebook
# use this if you are getting Javascript Error: IPython is not defined
%matplotlib widget

In [None]:
dataset_path = 'ceiling-fan-dataset'  # Directory where raw accelerometer data is stored
normal_op_list = ['fan_0_low-deploy']
anomaly_op_list = ['fan_0_med_0_weight', 'fan_0_high_0_weight',
                  'fan_0_low_1_weight', 'fan_0_med_1_weight', 'fan_0_high_1_weight']

In [None]:
import os
import ibm_boto3
from ibm_botocore.client import Config, ClientError

# Constants for IBM COS values
COS_ENDPOINT = os.getenv('COS_ENDPOINT') # Current list avaiable at https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints
COS_API_KEY_ID = os.getenv('COS_API_KEY_ID') # eg "W00YixxxxxxxxxxMB-odB-2ySfTrFBIQQWanc--P3byk"
COS_INSTANCE_CRN = os.getenv('COS_INSTANCE_CRN') # eg "crn:v1:bluemix:public:cloud-object-storage:global:a/3bf0d9003xxxxxxxxxx1c3e97696b71c:d6f04d83-6c4f-4a62-a165-696756d63903::"
COS_AUTH_ENDPOINT = os.getenv('COS_AUTH_ENDPOINT') #eg "https://iam.cloud.ibm.com/identity/token"
REGION = os.getenv('REGION')
# Create resource
cos = ibm_boto3.resource("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_INSTANCE_CRN,
    ibm_auth_endpoint=COS_AUTH_ENDPOINT,
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT,
    region_name=REGION
)

total_list = normal_op_list + anomaly_op_list
if not os.path.exists('./' + dataset_path):
    for folder in total_list:
        os.makedirs(dataset_path + '/' + folder)


bucket = 'tead-bucket'

files = cos.Bucket(bucket).objects.all()
i = 0
displayerrors = 0
for file in files:
    #ceiling-fan/fan_0_low_0_weight/0171.csv
    sample = file.key.split("/")
    if len(sample) == 3:
        if sample[2].endswith('.csv'):
            if sample[1] in total_list:
                local_file_name = file.key.replace("ceiling-fan", "ceiling-fan-dataset")
                try:
                    cos.meta.client.download_file(bucket, file.key, local_file_name)
                    i = i + 1
                    if i % 50 == 0:
                        print("Downloaded: ", format(i))
                except Exception as e:
                    if displayerrors:
                        print(Exception, e)

In [None]:
from os import listdir
from os.path import join

# Create list of filenames
def createFilenameList(op_list):
    
    # Extract paths and filenames in each directory
    op_filenames = []
    num_samples = 0
    for index, target in enumerate(op_list):
        samples_in_dir = listdir(join(dataset_path, target))
        samples_in_dir = [join(dataset_path, target, sample) for sample in samples_in_dir]
        op_filenames.append(samples_in_dir)
    
    # Flatten list
    return [item for sublist in op_filenames for item in sublist]

In [None]:
# Create normal and anomaly filename lists
normal_op_filenames = createFilenameList(normal_op_list)
anomaly_op_filenames = createFilenameList(anomaly_op_list)
print('Number of normal samples:', len(normal_op_filenames))
print('Number of anomaly samples:', len(anomaly_op_filenames))

In [None]:
# Function to plot normal vs anomaly samples side-by-side
def plotTimeSeriesSample(normal_sample, anomaly_sample):
    fig, axs = plt.subplots(2, 1, figsize=(6, 6))
    fig.tight_layout(pad=3.0)
    axs[0].plot(normal_sample.T[0], label='x')
    axs[0].plot(normal_sample.T[1], label='y')
    axs[0].plot(normal_sample.T[2], label='z')
    axs[0].set_title('Normal sample')
    axs[0].set_xlabel('sample')
    axs[0].set_ylabel('G-force')
    axs[0].legend()
    axs[1].plot(anomaly_sample.T[0], label='x')
    axs[1].plot(anomaly_sample.T[1], label='y')
    axs[1].plot(anomaly_sample.T[2], label='z')
    axs[1].set_title('Anomaly sample')
    axs[1].set_xlabel('sample')
    axs[1].set_ylabel('G-force')
    axs[1].legend()

In [None]:
# Function to plot 3D scatterplot of normal and anomaly smaples
def plotScatterSamples(normal_samples, anomaly_samples, num_samples, title=''):
    fig = plt.figure()
    ax = plt.axes(projection='3d')
    for i in range(num_samples):
        ax.scatter(normal_samples[i].T[0], normal_samples[i].T[1], normal_samples[i].T[2], c='b')
        ax.scatter(anomaly_samples[i].T[0], anomaly_samples[i].T[1], anomaly_samples[i].T[2], c='r')
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.set_title(title)

In [None]:
# Examine a normal sample vs anomalous sample
normal_sample = np.genfromtxt(normal_op_filenames[0], delimiter=',')
anomaly_sample = np.genfromtxt(anomaly_op_filenames[0], delimiter=',')

# Plot time series of accelerometer data
plotTimeSeriesSample(normal_sample, anomaly_sample)

In [None]:
!ipython --version

In [None]:
!pip uninstall ipywidgets -y

In [None]:
!pip install --upgrade ipython

In [None]:
jupyterlab --version

In [None]:
!pip install ipywidgets==7.7.2