1. Import Dependencies

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd





2. Define Functions

In [2]:
def read_csv_files(folder_paths):
    data = []
    for folder_path in folder_paths:
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(folder_path, file_name)
                df = pd.read_csv(file_path)
                data.append(df.values)
    return data

def preprocess_data(folder_paths):
    data = read_csv_files(folder_paths)
    X = []
    y = []
    for samples in data:
        for sample in samples:
            # Assuming the last column contains labels
            X.append(sample[:-1])  # Features
            y.append(sample[-1])   # Label
    X = np.array(X)
    y = np.array(y)
    
    # Perform any necessary preprocessing steps like normalization, encoding, etc.
    # Here's an example of label encoding for binary classification
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    return X_train, X_test, y_train, y_test



def create_mil_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)

    # Reshape the input to add sequence length dimension
    reshaped_input = tf.keras.layers.Reshape((input_shape[0], 1))(inputs)

    # Define the shared part of the model
    shared_layer = tf.keras.layers.Dense(64, activation='relu')(reshaped_input)

    # Apply GlobalAveragePooling1D directly to the shared layer output
    pooled_output = tf.keras.layers.GlobalAveragePooling1D()(shared_layer)

    # Add a dense layer for classification
    output = tf.keras.layers.Dense(1, activation='sigmoid')(pooled_output)

    # Create the model
    model = tf.keras.Model(inputs=inputs, outputs=output)

    # Compile the model
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model


def train_mil_model(X_train, y_train):
    model = create_mil_model(input_shape=X_train.shape[1:])
    model.fit(X_train, y_train, epochs=10, batch_size=1024)
    return model

def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Test Accuracy:", accuracy)

3. Import Data

In [3]:
train_folder_paths = ['Z:\Capstone Data\CSV Files\Training_Good', 'Z:\Capstone Data\CSV Files\Training_Bad']
test_folder_paths = ['Z:\Capstone Data\CSV Files\Testing_Good', 'Z:\Capstone Data\CSV Files\Testing_Bad']

4. Train Model

In [4]:
X_train, X_test, y_train, y_test = preprocess_data(train_folder_paths)

In [5]:
model = train_mil_model(X_train, y_train)



Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
evaluate_model(model, X_test, y_test)



5. Save Model

In [None]:
model.save('Model-E10-BS1024')