In [None]:
#STEP 1
#run this if using Anaconda or someother Jupyter notebook software to import your data file 
### Load the dataset
import numpy as np
Data = np.genfromtxt("synthetic_control.data", dtype=None)
type(Data)

In [None]:
#run this if using Google Colab to import your data file 
import numpy as np
import io
from google.colab import files

uploaded = files.upload()

Data = np.genfromtxt(io.BytesIO(uploaded['synthetic_control.data']))

In [None]:
## Import Pandas and load the dataset into a pandas dataframe
## Pandas dataframe looks like a en excel and can be another representation of the data
import pandas as pd 
data_df = pd.read_csv("synthetic_control.data", delim_whitespace=True, header=None)
type(data_df)

In [None]:
# STEP 2

## Generate labels based on given information.
## 1-100   Normal
## 101-200 Cyclic
## 201-300 Increasing trend
## 301-400 Decreasing trend
## 401-500 Upward shift
## 501-600 Downward shift
data_df['label'] = 0
data_df.loc[(data_df.index > #place value),'label']= 1
data_df.loc[(data_df.index > #place value),'label']= 2
data_df.loc[(data_df.index > #place value),'label']= 3
data_df.loc[(data_df.index > #place value),'label']= 4
data_df.loc[(data_df.index > #place value),'label']= 5

In [None]:
data_df.label.value_counts()

In [None]:
#STEP 3

X = data_df.drop(columns='label')
Y = data_df.label

In [None]:
#STEP 4

## N is the number of samples in a dataset
X = X.to_numpy()
N = X.shape[0]
N

In [None]:
## T is the number of timestamps in a single time-series
T = X.shape[1]
T

In [None]:
## Dimentionality of the time-series M=1 is univariate TS and M>1 is multivariate
M =1

In [None]:
## Generte the empty dataset in the shape of (N,T,M). This is specially useful when M>1
Dataset = np.zeros((N,T,M))
Dataset.shape

In [None]:
X = np.reshape(X, newshape=(600,60,1))
X.shape

In [None]:
## Generate the dataset
for i in range(Dataset.shape[0]):
    Dataset[i]= X[i]

Dataset.shape

In [None]:
# STEP 5

## Plot a single Time-series before standardization
import matplotlib.pylab as plt
fig, ax = plt.subplots(figsize=(10, 4))
plt.plot(Dataset[210,:,0])
plt.xlabel("Time")
plt.ylabel("Control Signal")

plt.show()

In [None]:
## Standard Normalization
## StdDataset is the new dataset with standardazied values
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
StdDataset = np.copy(Dataset)
for i in range(Dataset.shape[0]):
      StdDataset[i,:,:] = scaler.fit_transform(Dataset[i,:,:])

In [None]:
## Plot a single Time-series after standardization
fig, ax = plt.subplots(figsize=(10, 4))
plt.plot(StdDataset[210,:,0])
plt.xlabel("Time")
plt.ylabel("Control Signal")

plt.show()

In [None]:
# STEP 6

## Resampling the data
from scipy import signal
StdDataset_r = signal.resample(StdDataset, 600, axis=1)

In [None]:
## Plot a single Time-series after Resampling
fig, ax = plt.subplots(figsize=(10, 4))
plt.plot(StdDataset_r[210,:,0])
plt.xlabel("Time")
plt.ylabel("Control Signal")

plt.show()

In [None]:
# STEP 7

##Add noise to the data
noise = np.random.normal(0, 1, StdDataset_r.shape)
new_StdDataset_r = StdDataset_r + noise

In [None]:
## Plot a single Time-series after adding noise
fig, ax = plt.subplots(figsize=(10, 4))
plt.plot(new_StdDataset_r[210,:,0])
plt.xlabel("Time")
plt.ylabel("Control Signal")

plt.show()

In [None]:
## Randomly cut first 30 observation of 10% of the series
import random
for i in range(60):
    num = random.randrange(600)
    new_StdDataset_r[num,0:30,:] = 0

## Randomly cut last 30 observation of 10% of the series
for i in range(60):
    num = random.randrange(600)
    new_StdDataset_r[num,-30:,:] = 0

In [None]:
## Plot a single Time-series after adding noise
fig, ax = plt.subplots(figsize=(10, 4))
plt.plot(new_StdDataset_r[122,:,0])
plt.xlabel("Time")
plt.ylabel("Control Signal")

plt.show()

In [None]:
# STEP 8

## Plot all classes for visual comparision
## class 1 Normal
## class 2 Cyclic
## class 3 Increasing trend
## class 4 Decreasing trend
## class 5 Upward shift
## class 6 Downward shift

#original data in classes
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15, 8))
for i in range(6):
    plt.subplot(2, 3, i+1)
    plt.title('class_num ' + str(i+1))
    loc = np.where(Y == i)[0][0]
    plt.plot(Dataset[loc,:], label=Y[loc])
    plt.legend()

In [None]:
#standardized data
fig = plt.figure(figsize=(15, 8))
for i in range(6):
    plt.subplot(2, 3, i+1)
    plt.title('class_num ' + str(i+1))
    loc = np.where(Y == i)[0][0]
    plt.plot(StdDataset[loc,:], label=Y[loc])
    plt.legend()

In [None]:
#noisy data
fig = plt.figure(figsize=(15, 8))
for i in range(6):
    plt.subplot(2, 3, i+1)
    plt.title('class_num ' + str(i+1))
    loc = np.where(Y == i)[0][0]
    plt.plot(new_StdDataset_r[loc,:], label=Y[loc])
    plt.legend()

In [None]:
# STEP 9 

## Generate labels array
Labels = np.zeros((N,1))
Labels = Y
Labels.shape

In [None]:
##Generate the initial training and test sets to feed to the model
## Original data
from sklearn.model_selection import train_test_split

##Generate train and test arrays. 
X_train, X_test, y_train, y_test = train_test_split(StdDataset, Labels, train_size=0.8 ,shuffle=True) 


# perform the classification on the noisy data

In [None]:
#noisy
X_train_rn, X_test__rn, y_train_rn, y_test_rn = train_test_split(new_StdDataset_r, Labels, train_size=0.8 ,shuffle=True) 
##Generate train and test arrays. shuffle to introduce randomness

In [None]:
%pip install sktime

In [None]:
# STEP 10 

## 1-nearest neighbor classifier with Euclidean distance as the distance measure
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sklearn.metrics import confusion_matrix, classification_report
classifier1 = KNeighborsTimeSeriesClassifier(distance="euclidean")
classifier1.fit(X_train_rn.squeeze(), y_train)
y_pred = classifier1.predict(X_test__rn.squeeze())
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

In [None]:

## 1-nearest neighbor classifier with DTW (Dynamic Time Warping) as the distance measure
classifier2 = KNeighborsTimeSeriesClassifier(distance="dtw") ## Generate a classifier instance
classifier2.fit(X_train_rn, y_train)
y_pred = classifier2.predict(X_test__rn) ##Predict the not seen X_test labels

#confusion matrix and classification report
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

In [None]:
## 1-nearest neighbor classifier with erp as the distance measure
classifier3 = KNeighborsTimeSeriesClassifier(distance="erp")
classifier3.fit(X_train_rn, y_train)
y_pred = classifier3.predict(X_test__rn)

#confusion matrix and classification report
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

In [None]:
## Multiclass Support vector machines (SVM) with linear kernel
from sklearn.svm import SVC
classifier4 = SVC(kernel='linear', verbose=True)
classifier4.fit(X_train_rn.squeeze(), y_train_rn)
y_pred_rn = classifier4.predict(X_test__rn.squeeze()) ##Predict the not seen X_test labels

#confusion matrix and classification report    
cm = confusion_matrix(y_test_rn, y_pred_rn)
print(cm)
print(classification_report(y_test_rn, y_pred_rn))

In [None]:
## Multiclass Support vector machines (SVM) with RBF kernel
from sklearn.svm import SVC
classifier5 = SVC(kernel='rbf', verbose=True)
classifier5.fit(X_train_rn.squeeze(), y_train_rn)
y_pred_rn = classifier5.predict(X_test__rn.squeeze()) ##Predict the not seen X_test labels

#confusion matrix and classification report    
cm = confusion_matrix(y_test_rn, y_pred_rn)
print(cm)
print(classification_report(y_test_rn, y_pred_rn))

In [None]:
## Time-series classifier
from sktime.classification.interval_based import TimeSeriesForestClassifier
classifier6 = TimeSeriesForestClassifier(n_estimators=5)
classifier6.fit(X_train_rn.squeeze(), y_train_rn)
y_pred_rn = classifier6.predict(X_test__rn.squeeze()) ##Predict the not seen X_test labels

#confusion matrix and classification report    
cm = confusion_matrix(y_test_rn, y_pred_rn)
print(cm)
print(classification_report(y_test_rn, y_pred_rn))

In [None]:
# STEP 11

##Try a deep learning algorithm 
from sktime.classification.deep_learning.fcn import FCNClassifier
fcn = FCNClassifier(n_epochs=20, verbose=True)
fcn.fit(X_train_rn.squeeze(), y_train_rn)
y_pred_rn = fcn.predict(X_test__rn.squeeze()) ##Predict the not seen X_test labels

#confusion matrix and classification report    
cm = confusion_matrix(y_test_rn, y_pred_rn)
print(cm)
print(classification_report(y_test_rn, y_pred_rn))