In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import src.model as modelfunc
import src.sequencing as sequencing
from config import config
import pickle
import src.labeler as labeler
import src.imu_extraction as imu_extraction


In [3]:
EPOCHS = 10

In [None]:
print("TensorFlow Version:", tf.__version__)
print("Is TensorFlow built with CUDA?", tf.test.is_built_with_cuda())
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("GPU Details:", tf.config.list_physical_devices('GPU'))


print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
if tf.config.list_physical_devices('GPU'):
    print("Running on GPU")
else:
    print("GPU not available. Running on CPU")

## parse GPMF

In [None]:
import telemetry_parser
import pandas as pd

def parse_telemetry(file_path):
    # Convert PosixPath to string if necessary
    tp = telemetry_parser.Parser(str(file_path))
    print(f"Camera: {tp.camera}, Model: {tp.model}")
    
    # Extract and normalize telemetry data
    data = tp.normalized_imu()
    df = pd.DataFrame(data)
    
    # Expand and organize columns
    df[['TIMESTAMP']] = pd.DataFrame((df['timestamp_ms'] / 1000).tolist(), index=df.index)
    df[['GYRO_x', 'GYRO_y', 'GYRO_z']] = pd.DataFrame(df['gyro'].tolist(), index=df.index)
    df[['ACCL_x', 'ACCL_y', 'ACCL_z']] = pd.DataFrame(df['accl'].tolist(), index=df.index)
    df.drop(columns=['timestamp_ms', 'gyro', 'accl'], inplace=True)
    df = df[['TIMESTAMP', 'ACCL_x', 'ACCL_y', 'ACCL_z', 'GYRO_x', 'GYRO_y', 'GYRO_z']]
    
    return df

# Example usage
df = parse_telemetry(config.DATA_DIR / "GH010041.MP4")
display(df)


## Add index and label = None

In [None]:
video_path = "data/GH010041.MP4"
df = labeler.frame_index(video_path, df)

display(df)

In [None]:
df = pd.read_csv(config.DATA_DIR / "labeled_GH010041.csv")  # Replace with your data file
df.dropna(inplace=True)
overlap = 0.0
length = 10
num_actions = 5  # Example number of actions

display(df)

# Convert LABEL to categorical and save mappings
df["LABEL"] = df["LABEL"].astype("category")
label_mapping = dict(enumerate(df["LABEL"].cat.categories))
df["LABEL"] = df["LABEL"].cat.codes

display(df)
print(label_mapping)

## Load data

In [24]:
# settings = {
#     "video_path": 0,
#     "imu_path": 0,
#     "overlap": 0.0,
#     "length": 10,
#     "epochs": 5
# }

# label_mapping = {
#     "links": 0,
#     "opstappen": 1,
#     "rechtdoor": 2,
#     "rechts": 3,
#     "remmen": 4
# }
# unique_labels = df["LABEL"].unique()

# df["LABEL"] = df["LABEL"].map(label_mapping)
# n_labels = len(label_mapping)
# label_df = tf.one_hot(df["LABEL"].values, depth=n_labels)

# label_columns = pd.DataFrame(
#     label_df.numpy(), 
#     columns=[f"LABEL_{label}" for label in unique_labels]
# )

# df = df.drop(columns=["LABEL"]).reset_index(drop=True)
# df = pd.concat([df, label_columns], axis=1)

# display(df)
# df.to_csv("test.csv")
# sequences = sequencing.create_sequence(df, settings["overlap"], settings["length"]) # large length can result in memory issues.
# padded_sequences, padded_labels = sequencing.get_filtered_sequences_and_labels(sequences)
# padded_labels = tf.one_hot(df["LABEL"].values, depth=n_labels)
# # print(padded_sequences)
# # print(padded_labels)
# print(padded_sequences.shape, padded_labels.shape) # test

In [None]:
# Preprocess data
# padded_sequences, padded_labels = sequencing.get_sequences_and_labels(df, overlap, length, num_actions)
sequences = sequencing.create_sequence(df, overlap, length)
padded_sequences = sequencing.get_sequences_pure_data(sequences)
padded_labels = sequencing.get_pure_labels(sequences)
print(padded_labels.shape)
padded_labels = tf.one_hot(padded_labels, depth=5)

In [None]:
print(len(padded_sequences)*len(padded_sequences[0]))
print(padded_sequences.shape)
print(padded_labels.shape)

In [27]:
validating = False

if validating:
    train_size = int(0.5 * len(padded_sequences))
    padded_sequences, X_val = padded_sequences[:train_size], padded_sequences[train_size:]
    padded_labels, y_val = padded_labels[:train_size], padded_labels[train_size:]

    print("Train shape:", padded_sequences.shape, padded_labels.shape)
    print("Validation shape:", X_val.shape, y_val.shape)
else:
    X_val = None
    y_val = None

In [28]:
# Generate sample weights to ignore padded timesteps
sample_weights = np.array([
    [1 if np.any(timestep != 0) else 0 for timestep in sequence]
    for sequence in padded_sequences
])

## Build Model

In [None]:
# Model parameters
print(padded_sequences.shape)
timesteps = padded_sequences.shape[1] # Variable-length sequences
features = 6  # IMU features (e.g., ax, ay, az, gx, gy, gz)
num_classes = 5  # Actions (e.g., left turn, right turn, stopping)
print(timesteps)
# Build the model
model = modelfunc.build_seq2seq_lstm((timesteps, features), num_classes, dropout=0.4)
model.summary()

## Train Model

In [21]:
# _ = model.predict(X_train[:1])  # Perform a forward pass to initialize the model

# # Build the model explicitly
# model.build(input_shape=(None, timesteps, 6))  # Example: batch size = None, 50 timesteps, 6 features
# model.make_train_function()
# model.make_predict_function()

In [None]:
# # Example call to the function
history = modelfunc.train_model(
    model=model,  # Your pre-defined Keras model
    X_train=padded_sequences, 
    y_train=padded_labels,
    sample_weight=sample_weights,  # Optional; pass None if not using sample weights
    batch_size=8,  # Optional; defaults to 16
    epochs=EPOCHS       # Optional; defaults to 10
# )

# # Example call to the function
# history = modelfunc.train_model(
#     model=model,  # Your pre-defined Keras model
#     X_train=padded_sequences, 
#     y_train=padded_labels,
#     X_val=X_val,
#     y_val=y_val,
#     sample_weight=sample_weights,  # Optional; pass None if not using sample weights
#     batch_size=16,  # Optional; defaults to 16
#     epochs=EPOCHS       # Optional; defaults to 10
)

## Model prediction

In [None]:
df_test = parse_telemetry(config.DATA_DIR / "GH010043.MP4")
sequences = sequencing.create_sequence(df_test, overlap, length)
test_sequences = sequencing.get_sequences_pure_data(sequences)

In [None]:
# Make predictions
predictions = model.predict(test_sequences)
print(predictions.shape)
confidence_scores = anomaly.calculate_confidence(predictions)
confidence_scores = anomaly.calculate_confidence(predictions)
predicted_classes = np.argmax(predictions, axis=-1)  # Class with the highest probability for each timestep

# Print results
print("Confidence scores shape:", confidence_scores.shape)  # Same shape as the input sequence
print("Predicted classes shape:", predicted_classes.shape)

In [None]:
entropy_score = anomaly.calculate_entropy(predictions)
print(entropy_score)

In [26]:
# Flatten the 3D array to 2D (e.g., concatenate along the first axis)
flattened_data = predictions.reshape(-1, predictions.shape[-1])

# Save to a CSV file
np.savetxt("output.csv", flattened_data, delimiter=",", fmt="%.5f")

# predictions_file = config.DATA_DIR / 'model_predictions.pkl'

# with predictions_file.open('wb') as file:
#     pickle.dump(predictions, file)

In [27]:
# should_load_predictions = False

# if should_load_predictions:
#     with predictions_file.open('rb') as file:
#         predictions = pickle.load(file)

In [None]:
print(predicted_classes)

In [None]:
print(len(predicted_classes.flatten()))
bound = len(predicted_classes.flatten())
bound2 = print(len(df["TIMESTAMP"]))

df_results = pd.DataFrame({
    "TIMESTAMP": df["TIMESTAMP"].values[:bound],  # Pas lengte aan indien nodig
    "FRAME_INDEX": df["FRAME_INDEX"].values[:bound],
    "predicted_class": predicted_classes.flatten(),
    "confidence": confidence_scores.flatten()
})

df_results["predicted_class"] = df_results["predicted_class"].map(lambda x: label_mapping[x])

print(df_results.head(20))

In [None]:
# Convert DataFrame to list of dictionaries based on changes in predicted_class

def df_to_dict(df):
    result = []
    start_idx = 0  # Track the start index of the current predicted_class

    for i in range(1, len(df)):
        if df.loc[i, 'predicted_class'] != df.loc[i - 1, 'predicted_class']:
            # Add entry when predicted_class changes
            result.append({
                "label": df.loc[start_idx, 'predicted_class'],
                "frame_start": df.loc[start_idx, 'FRAME_INDEX'],
                "frame_end": df.loc[i - 1, 'FRAME_INDEX']
            })
            start_idx = i  # Update start index for next segment

    # Add the last segment
    result.append({
        "label": df.loc[start_idx, 'predicted_class'],
        "frame_start": df.loc[start_idx, 'FRAME_INDEX'],
        "frame_end": df.loc[len(df) - 1, 'FRAME_INDEX']
    })

    return df


result = df_to_dict(df_results)
print(result)

In [None]:
def dict_to_labeledframes(dict_list):
    
    df_label = pd.DataFrame(dict_list)

    return df_label

dict_to_labeledframes(result)

In [None]:
def model_scratch(labeled_frames, video_path, imu_path=None):
    if imu_path = None:
        df = imu_extraction.extract_imu_data(video_path)
    else:
        df = pd.read_csv(imu_path)
    
    unlabeled_df = labeler.add_frame_index(df)

    return unlabeled_df

    


In [46]:
def label_vectorize(df, label_mapping, unique_labels):
    df["LABEL"] = df["LABEL"].map(label_mapping)
    n_labels = len(label_mapping)
    label_df = tf.one_hot(df["LABEL"].values, depth=n_labels)

    label_columns = pd.DataFrame(
        label_df.numpy(), 
        columns=[f"LABEL_{label}" for label in unique_labels]
    )

    df = df.drop(columns=["LABEL"]).reset_index(drop=True)
    df = pd.concat([df, label_columns], axis=1)

    return df

In [None]:
label_mapping = []
settings = {
    "video_path": path,
    "imu_path": path,
    "overlap": 0.0,
    "length": 10,
    "epochs": 5
}
model = None

def run_model(labeled_frames, settings, from_scratch=True, model, unlabeled_df=None, label_mapping):

    unique_labels = sorted(set(item["label"] for item in label_list))
    current_labels = sorted(label_mapping.keys())
    if unique_labels != current_labels:
        from_scratch = True
        label_mapping = {label: idx for idx, label in enumerate(unique_labels)}
    n_labels = len(label_mapping)

    if from_scratch == True:
        if imu_path = None:
            unlabeled_df = imu_extraction.extract_imu_data(settings["video_path"])
        else:
            unlabeled_df = pd.read_csv(settings["imu_path"])
        
        unlabeled_df = labeler.add_frame_index(unlabeled_df)
    
    df = unlabeled_df.copy()
    
    for item in labeled_frames:
        label = item["label"]
        start_frame = item["beginning_frame"]
        end_frame = item["end_frame"]
        
        df.loc[(df["FRAME_INDEX"] >= start_frame) & (df["FRAME_INDEX"] <= end_frame), "LABEL"] = label

    # convert df using tf.one_hot
    df = label_vectorize(df, label_mapping, unique_labels)

    sequences = sequencing.create_sequence(df, settings["overlap"], settings["length"]) # large length can result in memory issues.
    padded_sequences, padded_labels = sequencing.get_filtered_sequences_and_labels(sequences)
    # add old sequences aswell

    print(padded_sequences.shape(), padded_labels.shape()) # test

    sample_weights = np.array([
        [1 if np.any(timestep != 0) else 0 for timestep in sequence]
        for sequence in padded_sequences
    ])
    
    if from_scratch == True:
        timesteps = padded_sequences.shape[1]
        features = 6
        model = modelfunc.build_seq2seq_lstm((timesteps, features), n_labels, dropout=0.4)
    
    history = modelfunc.train_model(
        model=model,
        X_train=padded_sequences, 
        y_train=padded_labels,
        sample_weight=sample_weights,
        batch_size=16,
        epochs=settings["epochs"]
    )

    predict_sequences = sequencing.get_sequences_pure_data(sequences)
    predictions = model.predict(test_sequences) # shape: batches, n_datapoints, n_labels

    # Create reverse mapping from index to label
    reverse_label_mapping = {idx: label for label, idx in label_mapping.items()}

    predicted_classes = np.argmax(predictions, axis=-1)
    confidence_scores = np.max(predictions, axis=-1)

    # Map the predicted classes to their corresponding string labels
    predicted_labels = [reverse_label_mapping[pred_class] for pred_class in predicted_classes]
    print(predicted_labels.shape)

    sequences_list = [sequences, predicted_labels, confidence_scores]
    # predictions to restiched df with collums: [timestamp, frameindex, prediction, confidence score]
    # predictions [[frameindex = 1, average prediction, average confidence], [frameindex = 2, average prediction, average confidence]]

    return
    

    # inverse_label_mapping = {v: k for k, v in label_mapping.items()}