In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import dask.dataframe as dk
import numpy as np
from tensorflow.keras.utils import Sequence, to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import OneHotEncoder

csv_file = '/content/drive/MyDrive/Colab_Notebooks/Mapped_Dataset.csv'  # Update with your CSV file path

# Load data in batches using Dask
def load_data_in_batches(csv_file, batch_size=100000):
    ddf = dk.read_csv(csv_file)
    encoder = OneHotEncoder(sparse_output=False)

    for batch in ddf.to_delayed():
        df = batch.compute()  # Chuyển từ Dask -> Pandas (batch nhỏ)
        features = df.iloc[:, :-1].values.astype(np.float32)
        labels = encoder.fit_transform(df.iloc[:, -1].values.reshape(-1, 1))
        for i in range(0, len(features), batch_size):
            yield features[i:i+batch_size], labels[i:i+batch_size]



# Create TensorFlow Dataset from generator
def create_tf_dataset(csv_file, batch_size=512):
    output_signature = (
        tf.TensorSpec(shape=(None, 46, 1), dtype=tf.float32),  # shape: (batch, features, 1)
        tf.TensorSpec(shape=(None, 10), dtype=tf.float32)       # shape: (batch, labels)
    )

    def generator():
        for features, labels in load_data_in_batches(csv_file, batch_size):
            features = np.expand_dims(features, axis=-1)  # Conv1D yêu cầu shape (batch, timesteps, 1)
            yield features, labels

    return tf.data.Dataset.from_generator(generator, output_signature=output_signature).prefetch(tf.data.AUTOTUNE)


dataset = create_tf_dataset(csv_file, 512)

In [None]:
# CNN model definition
def create_cnn_model(input_shape, output_shape):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv1D(filters=32, kernel_size=3, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.BatchNormalization(),
        layers.Dense(output_shape, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model
# Reshape input for Conv1D
# dataset = dataset.map(lambda x, y: (tf.expand_dims(x, -1), y))
sample_features, sample_labels = next(iter(dataset))
input_shape = (sample_features.shape[1], 1)
output_shape = sample_labels.shape[1]
model = create_cnn_model(input_shape, output_shape)
model.fit(dataset, epochs=10, steps_per_epoch=91000)
model.save('trained_cnn_model.keras')
print("Model training complete. Model saved as 'trained_cnn_model.h5'.")
