In [None]:
import numpy as np
import pandas as pd
import glob
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Data Collection
cur_path = "archive"
normal_file_names = glob.glob(cur_path + '/normal/normal/*.csv')
imnormal_file_names_6g = glob.glob(cur_path + '/imbalance/imbalance/6g/*.csv')
imnormal_file_names_10g = glob.glob(cur_path + '/imbalance/imbalance/10g/*.csv')
imnormal_file_names_15g = glob.glob(cur_path + '/imbalance/imbalance/15g/*.csv')
imnormal_file_names_20g = glob.glob(cur_path + '/imbalance/imbalance/20g/*.csv')
imnormal_file_names_25g = glob.glob(cur_path + '/imbalance/imbalance/25g/*.csv')
imnormal_file_names_30g = glob.glob(cur_path + '/imbalance/imbalance/30g/*.csv')

def dataReader(path_names):
    data_n = pd.DataFrame()
    for i in path_names:
        low_data = pd.read_csv(i, header=None)
        data_n = pd.concat([data_n, low_data], ignore_index=True)
    return data_n

# Load and concatenate data
data_n = dataReader(normal_file_names)
data_6g = dataReader(imnormal_file_names_6g)
data_10g = dataReader(imnormal_file_names_10g)
data_15g = dataReader(imnormal_file_names_15g)
data_20g = dataReader(imnormal_file_names_20g)
data_25g = dataReader(imnormal_file_names_25g)
data_30g = dataReader(imnormal_file_names_30g)

data = pd.concat([data_n, data_6g, data_10g, data_15g, data_20g, data_25g, data_30g], ignore_index=True)

In [None]:
# Create labels
y_1 = pd.DataFrame(np.zeros(int(len(data_n)), dtype=int))
y_2 = pd.DataFrame(np.ones(int(len(data_6g)), dtype=int))
y_3 = pd.DataFrame(np.full((int(len(data_10g)), 1), 2))
y_4 = pd.DataFrame(np.full((int(len(data_15g)), 1), 3))
y_5 = pd.DataFrame(np.full((int(len(data_20g)), 1), 4))
y_6 = pd.DataFrame(np.full((int(len(data_25g)), 1), 5))
y_7 = pd.DataFrame(np.full((int(len(data_30g)), 1), 6))

y = pd.concat([y_1, y_2, y_3, y_4, y_5, y_6, y_7], ignore_index=True)

# Data Preprocessing
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data_scaled, y, test_size=0.25, shuffle=True)

# Reshape data for CNN
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [None]:
# Model Design
model = Sequential([
    Conv1D(128, 5, activation='relu', input_shape=(X_train.shape[1], 1)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(7, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model Training
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

In [None]:
# Model Evaluation
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Print classification report and confusion matrix
print(classification_report(y_test, y_pred_classes))
print(confusion_matrix(y_test, y_pred_classes))

# Visualize training history
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()