# Classification of Mock vs Real Care

In [None]:
import numpy as np
import tensorflow
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

def preprocess_data(data, sequence_length):
    # Flatten the list of sequences into a single string
    data = ''.join(data)
    
    # Create a mapping from surfaces to integers
    surfaces = sorted(set(data))
    surface_to_int = dict((c, i) for i, c in enumerate(surfaces))
    
    # Convert the data to integers using the surface-to-integer mapping
    data_int = [surface_to_int[c] for c in data]
    
    # Split the data into input sequences and labels
    inputs = []
    labels = []
    for i in range(0, len(data_int) - sequence_length, 1):
        inputs.append(data_int[i:i + sequence_length])
        labels.append(data_int[i + sequence_length])
        
    # One-hot encode the labels
    labels = tensorflow.keras.utils.to_categorical(labels)
    
    # Pad the input sequences with zeros to make them all the same length
    inputs = tensorflow.keras.preprocessing.sequence.pad_sequences(inputs, maxlen=sequence_length, padding='pre', value=0)
    
    # Split the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(inputs, labels, test_size=0.2, random_state=42)
    
    return x_train, x_test, y_train, y_test, surface_to_int


In [None]:


import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split, encode_input_sequence, decode_prediction

# Preprocess the data
x_train, x_test, y_train, y_test, surface_to_int = preprocess_data(data, sequence_length=3)




In [None]:
import tensorflow as tensorflow
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential

def build_and_train_model(x_train, y_train, x_test, y_test, surface_to_int, sequence_length, epochs=100):
    # Build the model
    model = Sequential()
    model.add(Embedding(input_dim=len(surface_to_int), output_dim=10, input_length=sequence_length))
    model.add(LSTM(units=50))
    model.add(Dense(units=y_train.shape[1], activation='softmax'))

    tensorflow.config.run_functions_eagerly(True)
    
    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'],run_eagerly=True)
    
    # Train the model
    model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test))
    
    return model, surface_to_int


## Import the Sequence Data

In [None]:
# Toy dataset of surface contacts
data = ['ABGFGE', 'GBESGSGS', 'EEGEGEGEBAE']

# Preprocess the data
x_train, x_test, y_train, y_test, surface_to_int = preprocess_data(data, sequence_length=11)


# Build and train the model
model, surface_to_int = build_and_train_model(x_train, y_train, x_test, y_test, surface_to_int, sequence_length=11)

## Classification test

In [None]:
# Encode the input sequence
input_sequence = ['A', 'B', 'F']
encoded_sequence = encode_input_sequence(input_sequence, surface_to_int)

# Make a prediction
prediction = model.predict(encoded_sequence)

# Decode the prediction
predicted_label = decode_prediction(prediction, surface_to_int)
print(f'Predicted label: {predicted_label}')


In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test loss: {loss:.4f}, Test accuracy: {accuracy:.4f}')