## **New Dynamic Code**:
15-April, 2024

In [1]:
# Install pyod
!pip install pyod -q

import os
from datetime import datetime
import numpy as np
import pandas as pd

from scipy.stats import ttest_ind
from sklearn.preprocessing import MinMaxScaler

# Import pyod modules one by one
from pyod.models.pca import PCA
from pyod.models.ecod import ECOD
from pyod.models.cblof import CBLOF
from pyod.models.hbos import HBOS
# from pyod.models.knn import KNN
from pyod.models.copod import COPOD
from pyod.models.gmm import GMM
from pyod.models.loda import LODA
from pyod.models.iforest import IForest

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

#### Helper functions

In [2]:
def combine_dataframes(dataframes):
    # Concatenate the dataframes into a single dataframe
    df_combined = pd.concat(dataframes, axis=1)
    
    # Generate column names based on a pattern
    df_combined.columns = [dfs[i].columns[0] for i in range(len(dataframes))]

    # Remove rows with NaN values
    df_combined = df_combined.dropna()

    # Add a new column named 'time'
    number_points = len(df_combined)
    dt = 1 / 2999
    start = 0
    points = [start + i * dt for i in range(number_points)]
    df_combined['time'] = points
    
    return df_combined

def split_groups(df):
    # Extract the rows for the first group (0 to 300)
    group1 = df[df['time']<=300]

    # Extract the rows for the second group (1500 to 1800)
    group2 = df[(df['time'] >= 1500) & (df['time'] <= 1800)]
    
    return group1, group2

# List of auxiliary functions
def fit_and_get_scores(model, data):
    model.fit(data)
    decision_scores = model.decision_scores_
    return model, decision_scores

def scale_and_average_scores(scores_df):
    # Set random seed for reproducibility
    np.random.seed(42)
    scaler = MinMaxScaler()
    scaled_scores = scaler.fit_transform(scores_df[['PCA', 'ECOD', 'CBLOF', 'HBOS',  'COPOD', 'GMM', 'LODA', 'IForest']])
    scores_df[['PCA', 'ECOD', 'CBLOF', 'HBOS',  'COPOD', 'GMM', 'LODA', 'IForest' ]] = scaled_scores
    scores_df['Average_Score'] = scores_df[['PCA', 'ECOD', 'CBLOF', 'HBOS',  'COPOD', 'GMM', 'LODA', 'IForest']].mean(axis=1)
    return scores_df

def display_and_style_df(df, title):
    # Reset the index to display correctly
    df.reset_index(drop=True, inplace=True)

    # Display DataFrame with both data and styled caption
    display(pd.DataFrame(df).style.set_caption(title).format("{:.2f}"))

def process_group_scores(group_df ):
    RANDOM_SEED = 42
    np.random.seed(RANDOM_SEED)
    
    # Create instances of PyOD models
    models = [
        PCA(),
        ECOD(),
        CBLOF(n_jobs=-1),
        HBOS(),
        # KNN(n_jobs=-1),
        COPOD(),
        GMM(),
        LODA(),
        IForest(n_jobs=-1)
    ]

    # Set random state for models that support it
    for model in models:
        if hasattr(model, 'random_state'):
            model.random_state = RANDOM_SEED
    
    
    scores_before_scale = pd.DataFrame()

    for model in models:
        model_name = model.__class__.__name__
        print(model_name)
        _, scores = fit_and_get_scores(model, group_df.values.reshape(-1, 1))
        scores_before_scale[model_name] = scores

    scores_before_scale['Average_Score'] = scores_before_scale.mean(axis=1)

    # Concatenate the toy data DataFrame with the scores DataFrame and display it
    display_df_before_scale = pd.concat([group_df, scores_before_scale], axis=1)

    scores_after_scale = scale_and_average_scores(scores_before_scale.copy())

    # Concatenate the toy data DataFrame with the scaled scores DataFrame and display it
    display_df_after_scale = pd.concat([group_df, scores_after_scale], axis=1)

    return display_df_before_scale, display_df_after_scale


def stack_columns_except_time(df):
    stacked_df = pd.concat([df[column] for column in df.columns if column != 'time'], ignore_index=True)
    return stacked_df

def before_after_split(dfs):
    wr_df = combine_dataframes(dfs)
    wr_df.sort_index(axis=1, inplace=True)
    print('\n\n--- DF | Head & Tail---\n\n')
    print('Shapes: ',wr_df.shape)
    display(wr_df.head(2),wr_df.tail(2))
    
    
    wr_group1, wr_group2 = split_groups(wr_df)
    print('\n\n--- Group 1 | Groupd 2 | Tail---\n\n')
    print('Shapes: ',wr_group1.shape, wr_group2.shape)
    display(wr_group1.tail(2),wr_group2.tail(2))
    
    stack_before_5_min = stack_columns_except_time(wr_group1)
    stack_after_30_min = stack_columns_except_time(wr_group2)
    print('\n\n--- 5 Min | 30 Min | Tail---\n\n')
    print('Shapes: ',stack_before_5_min.shape, stack_after_30_min.shape)
    display(stack_before_5_min.tail(2),' ',stack_after_30_min.tail(2))
    
    return stack_before_5_min, stack_after_30_min
    


In [3]:
main_path = '/kaggle/input/zara-analysis-wr/zara-analysis/'
!ls /kaggle/input/zara-analysis-wr/zara-analysis

CH1-wr	CH2-wr	CH3-wr	CH4-wr


# Figure i)

In [4]:
import os

os.makedirs('/kaggle/working/fig1/')
for folder in ['CH1-wr','CH2-wr','CH3-wr','CH4-wr']:
    os.makedirs('/kaggle/working/fig1/'+folder)

In [5]:
# os.makedirs('/kaggle/working/fig1/')

for folder in ['CH1-wr','CH2-wr','CH3-wr','CH4-wr']:
    folder_path = main_path + folder

    # Get a list of all text files in the folder
    text_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
    print(folder)
    print(text_files)
    # Initialize an empty numpy array to store the dataframes
    dfs = []

    # Iterate over the text files and read their contents into dataframes
    for i, file in enumerate(text_files):
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path, sep='\t', header=None, names=[str(file)])
        dfs.append(df)
        
    stack_before_5_min, stack_after_30_min = before_after_split(dfs)
#     os.makedirs('/kaggle/working/fig1/'+folder)
    
    print('Before 5 min')
    print("Current time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    before_scale_5_min, after_scale_5_min = process_group_scores(stack_before_5_min)
    
    before_scale_5_min.to_csv('/kaggle/working/fig1/'+folder+'/before_scale_5_min.csv')
    after_scale_5_min.to_csv('/kaggle/working/fig1/'+folder+'/after_scale_5_min.csv')

    print('After 30 min')
    print("Current time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    before_scale_30_min, after_scale_30_min = process_group_scores(stack_after_30_min)
    
    before_scale_30_min.to_csv('/kaggle/working/fig1/'+folder+'/before_scale_30_min.csv')
    after_scale_30_min.to_csv('/kaggle/working/fig1/'+folder+'/after_scale_30_min.csv')
    

CH1-wr
['WR17Ch1.txt', 'WR20Ch1.txt', 'WR16Ch1.txt', 'WR18Ch1.txt', 'WR19Ch1.txt']


--- DF | Head & Tail---


Shapes:  (6327228, 6)


Unnamed: 0,WR16Ch1.txt,WR17Ch1.txt,WR18Ch1.txt,WR19Ch1.txt,WR20Ch1.txt,time
0,258.4224,280.8238,268.0998,267.5622,272.7593,0.0
1,252.6877,277.4188,265.4117,272.2217,271.5048,0.000333


Unnamed: 0,WR16Ch1.txt,WR17Ch1.txt,WR18Ch1.txt,WR19Ch1.txt,WR20Ch1.txt,time
6327226,270.6088,258.4224,270.4296,272.5801,273.1177,2109.778593
6327227,265.4117,259.8561,266.6661,270.2504,265.0532,2109.778926




--- Group 1 | Groupd 2 | Tail---


Shapes:  (899701, 6) (899701, 6)


Unnamed: 0,WR16Ch1.txt,WR17Ch1.txt,WR18Ch1.txt,WR19Ch1.txt,WR20Ch1.txt,time
899699,272.9385,271.5048,274.3722,270.788,267.383,299.999667
899700,265.2324,272.5801,268.9959,260.573,263.6195,300.0


Unnamed: 0,WR16Ch1.txt,WR17Ch1.txt,WR18Ch1.txt,WR19Ch1.txt,WR20Ch1.txt,time
5398199,273.1177,269.1751,269.3543,267.5622,269.7127,1799.999667
5398200,273.4762,266.1285,266.3077,273.1177,271.3256,1800.0




--- 5 Min | 30 Min | Tail---


Shapes:  (4498505,) (4498505,)


4498503    267.3830
4498504    263.6195
dtype: float64

' '

4498503    269.7127
4498504    271.3256
dtype: float64

Before 5 min
Current time: 2024-04-15 19:22:08
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest
After 30 min
Current time: 2024-04-15 19:31:45
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest
CH2-wr
['WR19Ch2.txt', 'WR18Ch2.txt', 'WR16Ch2.txt', 'WR17Ch2.txt', 'WR20Ch2.txt']


--- DF | Head & Tail---


Shapes:  (6327228, 6)


Unnamed: 0,WR16Ch2.txt,WR17Ch2.txt,WR18Ch2.txt,WR19Ch2.txt,WR20Ch2.txt,time
0,275.4475,268.0998,238.8884,274.3722,272.2217,0.0
1,275.2683,267.9206,241.0389,269.5335,271.684,0.000333


Unnamed: 0,WR16Ch2.txt,WR17Ch2.txt,WR18Ch2.txt,WR19Ch2.txt,WR20Ch2.txt,time
6327226,267.2038,269.3543,272.4009,265.7701,273.2969,2109.778593
6327227,268.6375,267.9206,266.4869,267.383,268.6375,2109.778926




--- Group 1 | Groupd 2 | Tail---


Shapes:  (899701, 6) (899701, 6)


Unnamed: 0,WR16Ch2.txt,WR17Ch2.txt,WR18Ch2.txt,WR19Ch2.txt,WR20Ch2.txt,time
899699,261.2898,272.4009,246.7737,269.3543,271.684,299.999667
899700,267.9206,272.9385,252.5085,269.1751,271.1464,300.0


Unnamed: 0,WR16Ch2.txt,WR17Ch2.txt,WR18Ch2.txt,WR19Ch2.txt,WR20Ch2.txt,time
5398199,266.4869,263.978,267.9206,267.2038,265.7701,1799.999667
5398200,268.9959,264.874,272.5801,271.5048,265.4117,1800.0




--- 5 Min | 30 Min | Tail---


Shapes:  (4498505,) (4498505,)


4498503    271.6840
4498504    271.1464
dtype: float64

' '

4498503    265.7701
4498504    265.4117
dtype: float64

Before 5 min
Current time: 2024-04-15 19:42:24
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest
After 30 min
Current time: 2024-04-15 19:52:33
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest
CH3-wr
['WR16Ch3.txt', 'WR18Ch3.txt', 'WR20Ch3.txt', 'WR19Ch3.txt', 'WR17Ch3.txt']


--- DF | Head & Tail---


Shapes:  (6327228, 6)


Unnamed: 0,WR16Ch3.txt,WR17Ch3.txt,WR18Ch3.txt,WR19Ch3.txt,WR20Ch3.txt,time
0,271.1464,268.9959,293.727,283.512,266.8453,0.0
1,270.2504,271.1464,301.2539,275.6267,265.2324,0.000333


Unnamed: 0,WR16Ch3.txt,WR17Ch3.txt,WR18Ch3.txt,WR19Ch3.txt,WR20Ch3.txt,time
6327226,267.0246,265.0532,268.6375,269.8919,267.0246,2109.778593
6327227,272.5801,270.2504,267.7414,271.5048,267.2038,2109.778926




--- Group 1 | Groupd 2 | Tail---


Shapes:  (899701, 6) (899701, 6)


Unnamed: 0,WR16Ch3.txt,WR17Ch3.txt,WR18Ch3.txt,WR19Ch3.txt,WR20Ch3.txt,time
899699,266.3077,240.3221,267.2038,286.917,270.6088,299.999667
899700,273.8346,241.0389,262.5443,289.4259,265.0532,300.0


Unnamed: 0,WR16Ch3.txt,WR17Ch3.txt,WR18Ch3.txt,WR19Ch3.txt,WR20Ch3.txt,time
5398199,273.8346,271.684,274.9098,272.2217,267.383,1799.999667
5398200,268.0998,271.684,272.5801,272.4009,263.4403,1800.0




--- 5 Min | 30 Min | Tail---


Shapes:  (4498505,) (4498505,)


4498503    270.6088
4498504    265.0532
dtype: float64

' '

4498503    267.3830
4498504    263.4403
dtype: float64

Before 5 min
Current time: 2024-04-15 20:03:10
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest
After 30 min
Current time: 2024-04-15 20:13:37
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest
CH4-wr
['WR18Ch4.txt', 'WR19Ch4.txt', 'WR16Ch4.txt', 'WR17Ch4.txt', 'WR20Ch4.txt']


--- DF | Head & Tail---


Shapes:  (6327228, 6)


Unnamed: 0,WR16Ch4.txt,WR17Ch4.txt,WR18Ch4.txt,WR19Ch4.txt,WR20Ch4.txt,time
0,279.3901,262.3651,285.6625,265.7701,277.598,0.0
1,279.2109,266.4869,284.408,265.0532,270.9672,0.000333


Unnamed: 0,WR16Ch4.txt,WR17Ch4.txt,WR18Ch4.txt,WR19Ch4.txt,WR20Ch4.txt,time
6327226,266.6661,265.4117,273.2969,266.1285,271.1464,2109.778593
6327227,268.8167,263.978,269.5335,270.2504,267.0246,2109.778926




--- Group 1 | Groupd 2 | Tail---


Shapes:  (899701, 6) (899701, 6)


Unnamed: 0,WR16Ch4.txt,WR17Ch4.txt,WR18Ch4.txt,WR19Ch4.txt,WR20Ch4.txt,time
899699,271.3256,266.3077,267.5622,264.6948,278.1356,299.999667
899700,269.3543,269.5335,266.8453,266.4869,273.8346,300.0


Unnamed: 0,WR16Ch4.txt,WR17Ch4.txt,WR18Ch4.txt,WR19Ch4.txt,WR20Ch4.txt,time
5398199,265.5909,267.2038,267.9206,264.3364,265.7701,1799.999667
5398200,261.8274,270.6088,266.8453,264.874,268.9959,1800.0




--- 5 Min | 30 Min | Tail---


Shapes:  (4498505,) (4498505,)


4498503    278.1356
4498504    273.8346
dtype: float64

' '

4498503    265.7701
4498504    268.9959
dtype: float64

Before 5 min
Current time: 2024-04-15 20:24:29
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest
After 30 min
Current time: 2024-04-15 20:35:10
PCA
ECOD
CBLOF




HBOS
COPOD
GMM
LODA
IForest


In [6]:
1

1