In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import optuna

In [None]:
# Define the function to read OMG data from a CSV file
def read_omg_csv(path_palm_data: str, 
                 n_omg_channels: int, 
                 n_acc_channels: int = 0, 
                 n_gyr_channels: int = 0, 
                 n_mag_channels: int = 0, 
                 n_enc_channels: int = 0,
                 button_ch: bool = True, 
                 sync_ch: bool = True, 
                 timestamp_ch: bool = True) -> pd.DataFrame:
    
    df_raw = pd.read_csv(path_palm_data, sep=' ', 
                         header=None, 
                         skipfooter=1, 
                         skiprows=1, 
                         engine='python')
    columns = np.arange(n_omg_channels).astype('str').tolist()
    
    for label, label_count in zip(['ACC', 'GYR', 'MAG', 'ENC'], 
                                  [n_acc_channels, n_gyr_channels, n_mag_channels, n_enc_channels]):
        columns = columns + ['{}{}'.format(label, i) for i in range(label_count)]
        
    if button_ch:
        columns = columns + ['BUTTON']
        
    if sync_ch:
        columns = columns + ['SYNC']
        
    if timestamp_ch:
        columns = columns + ['ts']
        
    df_raw.columns = columns
    
    return df_raw

In [None]:
def prepare_training_data(path_palm_data, path_protocol_data, path_meta_data, 
                          standardize=False, normalize=False):
    # Read the OMG data
    omg_data = read_omg_csv(path_palm_data, 50, 3, 3, 0, 6)
    
    # Read the protocol data and encode gestures
    gestures_protocol = pd.read_csv(path_protocol_data)
    le = LabelEncoder()
    gestures_protocol['gesture'] = le.fit_transform(
        gestures_protocol[[
            "Thumb", "Index", "Middle", "Ring", "Pinky",
            'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
        ]].apply(lambda row: str(tuple(row)), axis=1)
    )
    
    # Read meta information
    df_meta = pd.read_csv(path_meta_data)
    palm_file = path_palm_data.split('/')[-1]
    last_train_idx = df_meta[df_meta['montage'] == palm_file].to_dict(orient='records')[0]['last_train_idx']
    
    # Sync gesture labels with OMG data using SYNC channel
    y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in omg_data['SYNC'].values])
    
    # Prepare feature names for OMG data
    OMG_CH = [str(i) for i in range(50)]  # Assuming 50 OMG channels
    
    # Split data into training and testing sets
    X_train = omg_data[OMG_CH].values[:last_train_idx]
    y_train = y_cmd[:last_train_idx]
    X_test = omg_data[OMG_CH].values[last_train_idx:]
    y_test = y_cmd[last_train_idx:]
    
    # Standardization and Normalization
    if standardize:
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
    if normalize:
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    return (X_train, y_train), (X_test, y_test)

# Parameters for the function
path_palm_data = 'data/2023-05-31_17-14-41.palm'
path_protocol_data = 'data/2023-05-31_17-14-41.palm.protocol.csv'
path_meta_data = 'data/meta_information.csv'

# Example of using the function with standardization and normalization
(X_train, y_train), (X_test, y_test) = prepare_training_data(path_palm_data, path_protocol_data, path_meta_data, standardize=True, normalize=False)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
def build_and_train_model(X_train, y_train, X_test, y_test, epochs=100, batch_size=50):
    num_classes = len(np.unique(y_train))  # Determine the number of unique classes

    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    

    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1)
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)

    return model, history, test_loss, test_accuracy

# Example usage:
model, history, test_loss, test_accuracy = build_and_train_model(X_train, y_train, X_test, y_test)
print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)

In [None]:
def objective(trial):
    # Предлагаем параметры
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.7)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
    epochs = 100  # Можно также оптимизировать количество эпох

    # Создание модели
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])
    
    # Компиляция модели
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    # Обучение модели
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
    
    # Оценка модели
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy  # Максимизация точности

# Создание исследования
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # Можно изменить количество испытаний в зависимости от времени/ресурсов

print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))