In [None]:
'''
Iterate over the files in the Event Directory and the Non-event Directory one by one and write the File No, features and Label
to a csv file
'''

import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import os
import shutil
from scipy.stats import skew, kurtosis
from scipy.signal import find_peaks

non_event_dir = r"C:\Users\Sounak Banerjee\Non-event Data"
event_dir = r"C:\Users\Sounak Banerjee\Event Data"


def extract_features(data_chunk):
    # Statistical Features
    std = np.std(data_chunk)
    var = np.var(data_chunk)
    mx = np.max(data_chunk)
    mn = np.min(data_chunk)
    ptp = np.ptp(data_chunk)
    fq = np.percentile(data_chunk, 25)
    sq = np.percentile(data_chunk, 50)
    tq = np.percentile(data_chunk, 75)
    # Derived Features
    sqrt = np.sqrt(np.mean(data_chunk ** 2))  # rms
    se = np.sum(data_chunk ** 2)  # signal energy
    entr = -np.sum(np.log2(data_chunk[data_chunk > 0]) * data_chunk[data_chunk > 0])  # Entropy

    features = [std, var, mx, mn, ptp, sk, ks, fq, sq, tq, sqrt, se, entr]
    return features

directory = r"C:\Users\Sounak Banerjee\Non-event Data"
columns = ['File No', 'Std deviation', 'Variance', 'Maximum', 'Minimum', 'Peak-to-Peak', 'Skewness', 'Kurtosis', 'First Quartile', 'Second Quartile', 'Third Quartile', 'RMS', 'Signal Energy', 'Entropy', 'Label']

output_csv_path = r"C:\Users\Sounak Banerjee\Dataset.csv"

file_number = 1

# for files in Event Data directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        amplitude_values = df['AI0 (V)'].values
        features = extract_features(amplitude_values)
        row = [file_number] + features + [1]
        row_df = pd.DataFrame([row], columns=columns)
        if file_number == 1:
            row_df.to_csv(output_csv_path, mode='w', header=True, index=False)
        else:
            row_df.to_csv(output_csv_path, mode='a', header=False, index=False)
        file_number += 1


# for files in Non-event Data directory
file_number = 532
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        amplitude_values = df['AI0 (V)'].values
        features = extract_features(amplitude_values)
        row = [file_number] + features + [0]
        row_df = pd.DataFrame([row])
        row_df.to_csv(output_csv_path, mode='a', header=False, index=False)
        file_number += 1