In [140]:
import os
from sktime.transformations.series.fourier import FourierTransform
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sktime.classification.kernel_based import RocketClassifier
from sktime.transformations.panel.rocket import MiniRocket
from sktime.transformations.panel.rocket import MultiRocketMultivariate
from sktime.transformations.panel.rocket import MultiRocket
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import LogisticRegression
from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sktime.classification.deep_learning import InceptionTimeClassifier
from sktime.classification.hybrid import HIVECOTEV2
from sktime.classification.compose import ColumnEnsembleClassifier
from sklearn import preprocessing
import torch.nn as nn
from sktime.regression.kernel_based import RocketRegressor
from sktime.regression.compose import ComposableTimeSeriesForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.linear_model import RidgeClassifierCV
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_curve,roc_auc_score
import sys
from sklearn import preprocessing
from torch.utils.data import TensorDataset, DataLoader
from scipy import stats, signal
import scipy.interpolate as interp
import joblib
import math
import csv
from torch.optim import LBFGS
import xgboost as xgb
import torch
from xgboost import XGBRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
from sklearn.metrics import precision_recall_curve, auc

In [124]:
def from_2d_array_to_nested(X, index=None, columns=None, time_index=None, cells_as_numpy=False):
    """Convert 2D dataframe to nested dataframe.
    Convert tabular pandas DataFrame with only primitives in cells into
    nested pandas DataFrame with a single column.
    Parameters
    ----------
    X : pd.DataFrame
    cells_as_numpy : bool, default = False
        If True, then nested cells contain NumPy array
        If False, then nested cells contain pandas Series
    index : array-like, shape=[n_samples], optional (default = None)
        Sample (row) index of transformed DataFrame
    time_index : array-like, shape=[n_obs], optional (default = None)
        Time series index of transformed DataFrame
    Returns
    -------
    Xt : pd.DataFrame
        Transformed DataFrame in nested format
    """
    if (time_index is not None) and cells_as_numpy:
        raise ValueError(
            "`Time_index` cannot be specified when `return_arrays` is True, "
            "time index can only be set to "
            "pandas Series"
        )
    if isinstance(X, pd.DataFrame):
        X = X.to_numpy()

    container = np.array if cells_as_numpy else pd.Series

    # for 2d numpy array, rows represent instances, columns represent time points
    n_instances, n_timepoints = X.shape

    if time_index is None:
        time_index = np.arange(n_timepoints)
    kwargs = {"index": time_index}

    Xt = pd.DataFrame(
        pd.Series([container(X[i, :], **kwargs) for i in range(n_instances)])
    )
    if index is not None:
        Xt.index = index
    if columns is not None:
        Xt.columns = columns
    return Xt

In [141]:
def dividedf(df,labels="test",splits=3):
    
    num_columns = df.shape[1]
    columns_per_third = num_columns // splits
    # Split each column into thirds
    thirds = []
    if labels !="test":
        labels = labels * splits

    for i in range(splits):

        start_col = i * columns_per_third
        end_col = (i + 1) * columns_per_third
        third_df = df.iloc[:, start_col:end_col]
        thirds.append(np.array(third_df))

    # Concatenate the thirds vertically
    result_df = np.concatenate(thirds, axis=0)
    if labels =="test":
        return result_df
    return result_df, labels

In [197]:
k=3
labels = []
dataframes=[]

for i in [0,1,2,3,4,6,8]:
    path = f"D:/Projects/PHM_2023/Data_Challenge_PHM2023_training_data/Pitting_degradation_level_{i}"
    file_list = os.listdir(path)
    filtered_list = [file_name for file_name in file_list if file_name.startswith("V100_500N")]
    
    for file_name in filtered_list:
        complete_path = os.path.join(path,file_name)
        df = pd.read_csv(complete_path, sep=' ', header=None)
        dataframes.append(df.iloc[:-1,0].transpose())

        labels.append(i)

df = pd.concat(dataframes, axis=1).transpose()
fft=FourierTransform()
df = df.dropna(axis=1)
df.reset_index(inplace=True, drop=True)
prev_label = labels[0]
plt.figure(figsize=(12, 10))
# Iterate through the DataFrame rows and labels
value_list=[]
indices=[]

df, labels = dividedf(df.iloc[:,100:],labels,5)
df = pd.DataFrame(df)
df_fft = pd.DataFrame(df)
label_list=[]
for i in range(len(df)):
    label = labels[i]
    
    # Check if the label has changed
    
    # plt.figure(figsize=(12, 3))
    # plt.plot(transformed)
    # plt.show()
    if label not in label_list:

        transformed = np.array(fft.fit_transform(df.iloc[i])[100:])
        label_list.append(label)
        for j in range(k):
            idx = np.argmax(transformed)
            indices.append(idx)
            transformed[max(0,idx-10):min(len(transformed),idx+10)]=0

        transformed = np.array(fft.fit_transform(df.iloc[i])[100:])
indices = list(set(indices))
for i in range(len(df)):
    label = labels[i]
    
    # Check if the label has changed
    transformed = np.array(fft.fit_transform(df.iloc[i])[100:])
    # plt.figure(figsize=(12, 3))
    # plt.plot(transformed)
    # plt.show()

            
    for idx in indices:
        value_list.append(np.max(transformed[max(0,idx-10):min(len(transformed),idx+10)]))
        #print(np.argmax(transformed[idx-10:idx+10]))
value_list = np.array(value_list).reshape(-1,len(indices))

df = pd.DataFrame(value_list)
df_averages=df
data = {"indices":indices,
        "values":df}


<Figure size 1200x1000 with 0 Axes>

In [199]:
labels

[0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 3,
 3,
 4,
 4,
 4,
 4,
 6,
 6,
 6,
 8,
 8,
 8,
 8,
 8,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 3,
 3,
 4,
 4,
 4,
 4,
 6,
 6,
 6,
 8,
 8,
 8,
 8,
 8,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 3,
 3,
 4,
 4,
 4,
 4,
 6,
 6,
 6,
 8,
 8,
 8,
 8,
 8,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 3,
 3,
 4,
 4,
 4,
 4,
 6,
 6,
 6,
 8,
 8,
 8,
 8,
 8,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 3,
 3,
 4,
 4,
 4,
 4,
 6,
 6,
 6,
 8,
 8,
 8,
 8,
 8]

In [198]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,172.769196,1421.799554,162.166870,162.166870,153.377077,172.769196,153.377077,8.504708,9.413069,161.374596,161.374596,231.357597,231.357597
1,185.830734,1397.528600,157.621500,157.621500,185.802770,176.630174,185.802770,9.684713,12.474007,151.330257,151.330257,222.168789,222.168789
2,194.804803,1388.338349,156.483428,156.483428,207.699433,194.804803,207.699433,10.768107,11.278132,148.925053,148.925053,276.461993,276.461993
3,244.124115,1350.651651,152.569639,152.569639,267.658748,208.499287,267.658748,10.057764,10.375375,171.877903,171.877903,266.056251,266.056251
4,243.424940,5638.612268,577.634746,577.634746,201.953396,279.017028,201.953396,599.912405,579.086956,544.987459,544.987459,274.703321,274.703321
...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,150.675736,892.731831,189.006679,189.006679,247.944862,238.870248,247.944862,8.897747,8.841587,199.545109,199.545109,310.193034,310.193034
126,177.309648,893.288472,188.228189,188.228189,245.050392,247.098421,245.050392,7.229006,9.368254,201.576147,201.576147,356.735835,356.735835
127,252.903176,894.205061,189.688562,189.688562,242.925270,210.144966,242.925270,5.682657,6.487874,200.610067,200.610067,320.912106,320.912106
128,280.572771,881.420553,184.430804,184.430804,224.317811,206.475356,224.317811,6.334072,8.201579,198.330811,198.330811,371.435066,371.435066


In [127]:
for j in np.arange(0,len(df_fft)):
    df_2d = df_fft[j].transpose()
    df_nested = from_2d_array_to_nested(df_fft)
    df1 = df_nested.transpose()
    if j==0:
        df_fft=df1
    else: 
        df_fft = pd.concat([df,df1], axis=0)

df_fft.reset_index(inplace=True, drop=True)

In [128]:
if os.path.exists("C:/PHM_2023_Datadump/mini_rocket_1024"):
    # Load the model from file
    mrm = joblib.load("C:/PHM_2023_Datadump/mini_rocket_1024")

    df_transformed = mrm.transform(df_fft)
else:
    mrm = MiniRocket(num_kernels=10, max_dilations_per_kernel=32, n_jobs=-1)
    df_transformed = mrm.fit_transform(df_fft)
    joblib.dump(mrm, "C:/PHM_2023_Datadump/mini_rocket_1024")

ValueError: Incompatible indexer with Series

In [123]:
df_transformed

Unnamed: 0,0__0,0__1,0__2,0__3,0__4,0__5,0__6,0__7,0__8,0__9,...,49335__914,49335__915,49335__916,49335__917,49335__918,49335__919,49335__920,49335__921,49335__922,49335__923
0,0.384,0.76,0.152,0.529915,0.91453,0.299145,0.672,0.056,0.44,0.811966,...,0.8,0.88,0.6,0.64,0.2,0.408,0.6,0.176,0.4,0.936


In [122]:
multivar_rocket = Pipeline([('scl', StandardScaler(with_mean=False)),
                        ('clf',  RidgeCV())])
cv = StratifiedKFold(n_splits=3)
y_pred = cross_val_predict(multivar_rocket, df_transformed, labels, cv=cv)
print(confusion_matrix(labels, np.round(labels)))
multivar_rocket.fit(df_transformed, labels)

ValueError: Found input variables with inconsistent numbers of samples: [1, 125]

In [187]:
labels = np.array(labels)
labels[np.where(labels == 6)] = 5

# Change labels from 8 to 6
labels[np.where(labels == 8)] = 6

In [188]:
from xgboost import XGBClassifier

labels_df = pd.DataFrame({'labels': labels})

# Concatenate the labels DataFrame with the original df
df_with_labels = pd.concat([df, labels_df], axis=1)

# Shuffle the DataFrame
shuffled_df = df_with_labels.sample(frac=1, random_state=42)

# Separate shuffled_df back into df and labels
df = shuffled_df.drop(columns=['labels'])
labels = np.array(shuffled_df['labels'],dtype=np.float32)
pipeline = Pipeline([("scaler", StandardScaler()), ("ridge", XGBClassifier(min_child_weight=0.1))])
y_pred = cross_val_predict(pipeline, df, labels)
print(y_pred)
print(confusion_matrix(np.round(y_pred), labels))
pipeline.fit(df, labels)

[0 4 5 1 2 1 4 0 0 0 4 0 3 1 5 6 2 0 4 1 5 2 4 2 6 5 5 0 0 0 3 4 6 6 4 2 2
 4 6 6 5 3 5 1 2 5 2 1 4 6 3 0 5 6 0 1 4 6 3 6 2 4 6 2 1 1 4 6 0 1 2 3 4 0
 4 3 5 6 1 1 2 2 0 6 4 5 1 2 6 0 5 3 4 1 3 6 2 1 1 6 1 5 1 2 1 2 0 0 3 0 0
 6 0 6 6 6 2 4 6 2 1 4 6 5 2 5 0 4 4 6]
[[20  0  0  0  0  0  1]
 [ 0 20  0  0  0  0  0]
 [ 0  0 20  0  0  0  0]
 [ 0  0  0 10  0  0  0]
 [ 0  0  0  0 20  0  0]
 [ 0  0  0  0  0 15  0]
 [ 0  0  0  0  0  0 24]]


In [178]:


dataframes=[]


path = f"D:/Projects/PHM_2023/Data_Challenge_PHM2023_test_data"
file_list = os.listdir(path)
filtered_list = [file_name for file_name in file_list if file_name.endswith("V100_500N.txt")]

for file_name in filtered_list:
    complete_path = os.path.join(path,file_name)
    df_test = pd.read_csv(complete_path, sep=' ', header=None)
    dataframes.append(df_test.iloc[:-1,0].transpose())


df_test = pd.concat(dataframes, axis=1).transpose()
fft=FourierTransform()
df_test = df_test.dropna(axis=1)
df_test.reset_index(inplace=True, drop=True)

plt.figure(figsize=(12, 10))
# Iterate through the DataFrame rows and labels
value_list=[]

print(df_test.shape)
df_test= dividedf(df_test.iloc[:,100:],splits=5)
df_test = pd.DataFrame(df_test)
df_fft_test = pd.DataFrame(df_test)
for i in range(len(df_test)):

    
    # Check if the label has changed
    transformed = np.array(fft.fit_transform(df_test.iloc[i])[100:])
    # plt.figure(figsize=(12, 3))
    # plt.plot(transformed)
    # plt.show()
        
            
    for idx in indices:
        value_list.append(np.max(transformed[max(0,idx-10):min(len(transformed),idx+10)]))
        #print(np.argmax(transformed[idx-10:idx+10]))
value_list = np.array(value_list).reshape(-1,len(indices))

df_test = pd.DataFrame(value_list)

(11, 246784)


<Figure size 1200x1000 with 0 Axes>

In [166]:
labels[:26]

[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 6, 6, 6, 8, 8, 8, 8, 8]

In [179]:
df[:26]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,172.769196,1421.799554,162.16687,162.16687,153.377077,172.769196,153.377077,8.504708,9.413069,161.374596,161.374596,231.357597,231.357597
1,185.830734,1397.5286,157.6215,157.6215,185.80277,176.630174,185.80277,9.684713,12.474007,151.330257,151.330257,222.168789,222.168789
2,194.804803,1388.338349,156.483428,156.483428,207.699433,194.804803,207.699433,10.768107,11.278132,148.925053,148.925053,276.461993,276.461993
3,244.124115,1350.651651,152.569639,152.569639,267.658748,208.499287,267.658748,10.057764,10.375375,171.877903,171.877903,266.056251,266.056251
4,243.42494,5638.612268,577.634746,577.634746,201.953396,279.017028,201.953396,599.912405,579.086956,544.987459,544.987459,274.703321,274.703321
5,325.751951,5755.436382,599.241124,599.241124,221.13717,198.238154,221.13717,628.259345,619.387935,457.031151,457.031151,312.901102,312.901102
6,168.580899,5815.667649,557.057338,557.057338,239.401839,196.033737,239.401839,636.187553,608.745981,434.421211,434.421211,245.457577,245.457577
7,307.092256,5548.787836,563.068907,563.068907,196.487172,249.735098,196.487172,616.746019,582.984315,426.282828,426.282828,267.297506,267.297506
8,218.805095,1347.648963,86.19171,86.19171,259.322792,218.805095,259.322792,92.70781,79.306909,123.586023,123.586023,281.837427,281.837427
9,237.967644,1347.335099,98.554933,98.554933,249.948576,237.967644,249.948576,93.178853,78.227645,80.228845,80.228845,429.735424,429.735424


In [180]:
df_test[:11]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,290.407521,1195.441043,178.494262,178.494262,216.913262,290.407521,216.913262,10.718152,12.062935,198.375567,198.375567,305.081433,305.081433
1,232.15014,1176.880083,163.508991,163.508991,290.584835,223.486293,290.584835,8.335252,10.39786,188.702833,188.702833,335.611322,335.611322
2,183.447762,5557.767992,469.26778,469.26778,269.88411,307.561237,269.88411,594.340621,537.62534,379.109177,379.109177,303.777404,303.777404
3,202.021206,1191.352115,166.720455,166.720455,162.206775,196.304137,162.206775,9.980143,11.63715,185.522163,185.522163,357.893874,357.893874
4,256.053807,1184.839196,167.111441,167.111441,263.195008,165.363234,263.195008,9.917977,11.029692,192.965171,192.965171,266.641359,266.641359
5,308.640009,5230.54674,1242.528383,1242.528383,363.400497,308.640009,363.400497,74.472214,84.045048,1356.929365,1356.929365,402.673242,402.673242
6,237.383925,1360.245436,103.354996,103.354996,233.404948,245.442311,233.404948,93.562127,79.217769,142.050622,142.050622,393.44656,393.44656
7,204.470106,1020.39184,159.438359,159.438359,181.352702,204.470106,181.352702,6.373357,8.085127,181.457883,181.457883,311.831577,311.831577
8,216.538161,1031.828713,146.777967,146.777967,256.985957,216.538161,256.985957,6.271934,7.58827,167.744913,167.744913,402.245884,402.245884
9,320.745871,1030.869715,149.357569,149.357569,327.145592,273.291811,327.145592,6.810328,9.002051,173.285446,173.285446,373.083266,373.083266


In [182]:
from pyod.models.lof import LOF
anomaly_model= LOF(contamination = 0.000001, n_neighbors=10, n_jobs=-1)
anomaly_model.fit(df)
anomaly_scores_train = anomaly_model.decision_function(df)
print(anomaly_scores_train)
anomaly_scores_test = anomaly_model.decision_function(df_test)
print(anomaly_scores_test)

[1.15104029 1.02932381 0.97296801 0.99866574 0.96195539 1.04886019
 1.09684184 1.08786435 1.00016125 1.13746376 1.04985861 0.94584426
 1.60965602 1.90944744 1.03674262 1.01956889 0.97354429 0.97354429
 1.03465044 1.15094714 0.96062938 0.97444606 1.02193945 0.96302915
 1.23350772 0.99734755 1.00741067 0.95167051 0.99237722 0.97981307
 0.98814469 1.04397607 1.13193372 0.98456058 0.96973455 0.9542155
 1.30723726 1.06436455 3.11579463 1.6279738  1.02786008 1.02115139
 0.99616382 0.97354429 1.04148187 0.96364312 0.99824949 1.03256857
 1.16376093 0.96369579 1.0964066  1.16623733 0.98349651 0.98594057
 0.98008292 1.05316007 0.9845427  0.96256745 1.08342357 0.9845427
 0.95031068 1.03943906 1.01760668 0.96679729 2.90788704 1.60012551
 1.0432618  1.01956889 0.99616382 0.99616382 0.95336354 0.94551349
 0.98149972 1.24059669 0.98910212 1.04289769 1.27208394 1.13167442
 0.98964611 0.9744358  0.96503126 1.20351259 0.99273521 0.95397915
 1.03997452 0.97713304 0.96979374 1.03170105 1.25969003 1.051228

In [96]:
df_fft_test.shape

(45, 49336)

In [None]:
if os.path.exists("C:/PHM_2023_Datadump/mini_rocket_1024"):
    # Load the model from file
    mrm = joblib.load("C:/PHM_2023_Datadump/mini_rocket_1024")

    df_transformed_test = mrm.transform(df_fft_test)
else:
    mrm = MiniRocketMultivariate(num_kernels=1000, max_dilations_per_kernel=1024, n_jobs=-1)
    df_transformed_test = mrm.fit_transform(df_fft_test)
    joblib.dump(mrm, "C:/PHM_2023_Datadump/mini_rocket_1024")

In [189]:
y_pred = pipeline.predict(df_test)

xgb classifier

In [190]:
np.round(y_pred)

array([0, 0, 1, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 4, 2, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 5, 2, 0, 0, 0, 0], dtype=int64)

In [None]:
y_pred = multivar_rocket.predict(df_fft_test)

fft_transformed

In [None]:
np.round(y_pred)

xgregressor

In [60]:
np.round(y_pred)

array([3., 0., 8., 3., 0., 0., 8., 6., 0., 3., 0., 8., 3., 0., 0., 8., 6.,
       0., 3., 0., 8., 3., 0., 0., 8., 6., 0., 3., 0., 8., 3., 0., 0., 8.,
       6., 0., 3., 0., 8., 3., 0., 0., 8., 6., 0.], dtype=float32)

fft

In [103]:
np.round(y_pred)

array([ 0.,  1.,  5.,  4.,  5.,  4.,  5.,  4.,  4., -3.,  4.,  6.,  6.,
        7.,  5.,  5.,  5.,  6., -0.,  6.,  5.,  7.,  7.,  6.,  5.,  5.,
        6.,  2.,  5.,  5.,  6.,  4.,  5.,  4.,  5.,  5.,  0.,  5.,  4.,
       -0.,  6.,  6.,  5.,  6.,  6.])