In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv("mhealth_raw_data.csv")
df.head()

Unnamed: 0,alx,aly,alz,glx,gly,glz,arx,ary,arz,grx,gry,grz,Activity,subject
0,2.1849,-9.6967,0.63077,0.1039,-0.84053,-0.68762,-8.6499,-4.5781,0.18776,-0.44902,-1.0103,0.034483,0,subject1
1,2.3876,-9.508,0.68389,0.085343,-0.83865,-0.68369,-8.6275,-4.3198,0.023595,-0.44902,-1.0103,0.034483,0,subject1
2,2.4086,-9.5674,0.68113,0.085343,-0.83865,-0.68369,-8.5055,-4.2772,0.27572,-0.44902,-1.0103,0.034483,0,subject1
3,2.1814,-9.4301,0.55031,0.085343,-0.83865,-0.68369,-8.6279,-4.3163,0.36752,-0.45686,-1.0082,0.025862,0,subject1
4,2.4173,-9.3889,0.71098,0.085343,-0.83865,-0.68369,-8.7008,-4.1459,0.40729,-0.45686,-1.0082,0.025862,0,subject1


In [None]:
!pip install pyts

from pyts.image import GramianAngularField





[notice] A new release of pip is available: 24.1.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


#DATASET PREPROCESSING START

In [None]:
X = df.drop(['Activity', 'subject'], axis=1)
y = df['Activity']


In [None]:
unique_counts = y.value_counts()
unique_counts

Activity
0     872550
1      30720
2      30720
3      30720
4      30720
9      30720
10     30720
11     30720
5      30720
7      29441
8      29337
6      28315
12     10342
Name: count, dtype: int64

In [None]:
from sklearn.utils import resample

In [None]:
data = pd.concat([X, y], axis=1)

# Get the minimum class count
min_count = data['Activity'].value_counts().min()

# Initialize an empty DataFrame for the balanced data
balanced_data = pd.DataFrame()

# Loop through each unique activity and resample the data
for activity in data['Activity'].unique():
    activity_data = data[data['Activity'] == activity]
    activity_data_balanced = resample(activity_data,
                                      replace=False,    # sample without replacement
                                      n_samples=min_count,  # match the minority count
                                      random_state=42)  # reproducible results
    balanced_data = pd.concat([balanced_data, activity_data_balanced])

# Separate the features and target again
X_balanced = balanced_data.drop('Activity', axis=1)
y_balanced = balanced_data['Activity']

# Verify the balancing
print(y_balanced.value_counts())

Activity
0     10342
1     10342
2     10342
3     10342
4     10342
6     10342
7     10342
8     10342
9     10342
10    10342
11    10342
12    10342
5     10342
Name: count, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size = 0.30,random_state=42)

In [None]:
#not a part of dataset prepocessing
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, MaxPooling2D, Dropout, GlobalMaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K
from sklearn.metrics import f1_score

# Define precision metric
def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

# Define recall metric
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

# Define f1 score metric
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

# Load and preprocess your data
# Assuming x_train, y_train, x_test, y_test are your data and labels


In [None]:
gaf = GramianAngularField()

In [None]:
x_train = gaf.fit_transform(X_train)
x_test= gaf.fit_transform(X_test)

In [None]:
x_train.shape

(94112, 12, 12)

In [None]:
y_train.shape

(94112,)

In [None]:
y_train

832586     10
1020810     0
100969     11
873615      3
249946      0
           ..
525017     11
1080424    12
1201115    11
740001      5
377937     12
Name: Activity, Length: 94112, dtype: int64

In [None]:
# Check unique labels
unique_labels_train = np.unique(y_train)
unique_labels_test = np.unique(y_test)
print("Unique labels in y_train:", unique_labels_train)
print("Unique labels in y_test:", unique_labels_test)

# Find the maximum label value
max_label_value = max(unique_labels_train.max(), unique_labels_test.max())
print("Max label value:", max_label_value)

# Set num_classes to the maximum label value plus one
num_classes = int(max_label_value + 1)
print("Number of classes:", num_classes)


Unique labels in y_train: [ 0  1  2  3  4  5  6  7  8  9 10 11 12]
Unique labels in y_test: [ 0  1  2  3  4  5  6  7  8  9 10 11 12]
Max label value: 12
Number of classes: 13


# DATAPREPROCESSING END

MODEL ARCHITECTURE

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, GlobalMaxPooling2D, Dense

# Define the input shape of the data
input_shape = (12, 12, 1)


num_classes = 13

# Build the model
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=input_shape, padding='same'),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(128, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),


    GlobalMaxPooling2D(),

    Dense(512, activation='relu'),

    Dense(num_classes, activation='softmax')  # Number of classes
])

# Print the model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy', precision_m, recall_m, f1_m])

In [None]:
x_train = x_train.reshape(x_train.shape[0], 12, 12, 1)
x_train = x_train.astype('float32')


In [None]:
from keras.utils import to_categorical

y_train_one_hot = to_categorical(y_train, num_classes=13)
y_test_one_hot = to_categorical(y_test, num_classes=13)


In [None]:
history = model.fit(
    x_train, y_train_one_hot,
    validation_data=(x_test, y_test_one_hot),
    epochs=30,
    batch_size=64
)



Epoch 1/30
[1m1471/1471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 91ms/step - accuracy: 0.5979 - f1_m: 0.5603 - loss: 1.1755 - precision_m: 0.7291 - recall_m: 0.4681 - val_accuracy: 0.7387 - val_f1_m: 0.7365 - val_loss: 0.7589 - val_precision_m: 0.7999 - val_recall_m: 0.6832
Epoch 2/30
[1m1471/1471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 83ms/step - accuracy: 0.7506 - f1_m: 0.7443 - loss: 0.7081 - precision_m: 0.8257 - recall_m: 0.6788 - val_accuracy: 0.7989 - val_f1_m: 0.7980 - val_loss: 0.5798 - val_precision_m: 0.8482 - val_recall_m: 0.7541
Epoch 3/30
[1m1471/1471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 82ms/step - accuracy: 0.7925 - f1_m: 0.7886 - loss: 0.5937 - precision_m: 0.8473 - recall_m: 0.7385 - val_accuracy: 0.8103 - val_f1_m: 0.8088 - val_loss: 0.5490 - val_precision_m: 0.8475 - val_recall_m: 0.7740
Epoch 4/30
[1m1471/1471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 98ms/step - accuracy: 0.8311 - f1_m: 0.8294 - 

In [None]:
# Predict on the test set
y_pred_prob = model.predict(x_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test_one_hot, axis=1)

# Calculate the weighted F1 score
weighted_f1 = f1_score(y_true, y_pred, average='weighted')
print("Weighted F1 Score:", weighted_f1)

In [None]:
model.save('my_model.keras')

In [None]:
from tensorflow.keras.models import load_model

# Load the model from the .h5 file
model_HAR_loaded = load_model('my_model.keras', custom_objects={'f1_m': f1_m, 'precision_m': precision_m, 'recall_m': recall_m})

# Print the summary of the loaded model to verify
model_HAR_loaded.summary()

In [None]:
import json

# Convert the NumPy arrays to lists
history_dict = {key: [float(val) for val in values] for key, values in history.history.items()}

# Save the history as JSON
with open('model_history.json', 'w') as f:
    json.dump(history_dict, f)

In [None]:
import json

# Load the history from the JSON file
with open('model_history.json', 'r') as f:
    history = json.load(f)

# Optionally, convert lists back to NumPy arrays (if needed)
import numpy as np
history = {key: np.array(values) for key, values in history.items()}

# Now you can use the history dictionary
print(history)

In [None]:

import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Assuming model_CNN is already trained and x_test, y_test_one_hot are available

# Predict the labels for the test set
y_pred = model.predict(x_test)

# Convert one-hot encoded labels to class labels
y_test_labels = y_test_one_hot.argmax(axis=1)
y_pred_labels = y_pred.argmax(axis=1)

# Compute the confusion matrix
cm = confusion_matrix(y_test_labels, y_pred_labels)

# Calculate overall accuracy
accuracy = np.trace(cm) / np.sum(cm)

# Normalize the confusion matrix by row (true classes)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Convert to percentages
cm_percent = cm_normalized * 100

# Print the confusion matrix in percentages
print("Confusion Matrix (in percentages):")
print(np.around(cm_percent, decimals=2))

# Print overall accuracy
print(f"Overall Accuracy: {accuracy * 100:.2f}%")

# Plot the confusion matrix
fig, ax = plt.subplots(figsize=(10, 10))
disp = ConfusionMatrixDisplay(confusion_matrix=cm_percent, display_labels=np.unique(y_test_labels))
disp.plot(cmap=plt.cm.Blues, ax=ax, values_format=".2f")
plt.title('Confusion Matrix in Percentage')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.grid(False)
plt.show()


In [None]:
# Extract training and validation loss from history
train_losses = history['loss']
val_losses = history['val_loss']
epochs = range(1, len(train_losses) + 1)

# Plotting train and test loss curves
plt.figure(figsize=(10, 5))
plt.plot(epochs, train_losses, label='Train Loss')
plt.plot(epochs, val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Curves')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report



# Ensure class_names contains the correct unique classes
class_names = np.unique(np.concatenate((y_test_labels, y_pred_labels)))

# Compute classification report
report = classification_report(y_test_labels, y_pred_labels, target_names=class_names, output_dict=True)

# Extract metrics for each class
per_class_precision = [report[class_name]['precision'] for class_name in class_names]
per_class_recall = [report[class_name]['recall'] for class_name in class_names]
per_class_f1 = [report[class_name]['f1-score'] for class_name in class_names]

# Example calculation of per-class accuracy (replace with your actual calculation)
per_class_accuracy = np.random.rand(len(class_names))  # Replace with actual accuracy calculation

# Plotting per-class metrics
plt.figure(figsize=(12, 6))

bar_width = 0.2
index = np.arange(len(class_names))

plt.bar(index - 1.5 * bar_width, per_class_precision, width=bar_width, label='Precision')
plt.bar(index - 0.5 * bar_width, per_class_recall, width=bar_width, label='Recall')
plt.bar(index + 0.5 * bar_width, per_class_f1, width=bar_width, label='F1 Score')
plt.bar(index + 1.5 * bar_width, per_class_accuracy, width=bar_width, label='Accuracy')

plt.xlabel('Class')
plt.ylabel('Scores')
plt.title('Per-Class Metrics')
plt.xticks(index, class_names, rotation=45)
plt.ylim(0, 1)  # Assuming accuracy ranges from 0 to 1
plt.legend()
plt.tight_layout()
plt.show()
