In [1]:
from google.colab import files
uploaded = files.upload()

Saving raw.txt to raw.txt


In [13]:
import pandas as pd
import re

# Load the entire file as one big string
with open("raw.txt", "r") as file:
    content = file.read()

# Use regex to extract valid records
pattern = r"(\d+),(\w+),(\d+),([-.\d]+),([-.\d]+),([-.\d]+);"
matches = re.findall(pattern, content)

# Convert to DataFrame
df = pd.DataFrame(matches, columns=['user', 'activity', 'timestamp', 'x', 'y', 'z'])

# Convert numeric columns
df['user'] = df['user'].astype(int)
df['timestamp'] = df['timestamp'].astype(int)
df['x'] = df['x'].astype(float)
df['y'] = df['y'].astype(float)
df['z'] = df['z'].astype(float)

df.head()


Unnamed: 0,user,activity,timestamp,x,y,z
0,33,Jogging,49105962326000,-0.694638,12.680544,0.503953
1,33,Jogging,49106062271000,5.012288,11.264028,0.953424
2,33,Jogging,49106112167000,4.903325,10.882658,-0.081722
3,33,Jogging,49106222305000,-0.612916,18.496431,3.023717
4,33,Jogging,49106332290000,-1.18497,12.108489,7.205164


In [26]:
import numpy as np

# Parameters
window_size = 80  # around 10 seconds of data assuming 20Hz
step_size = 40

segments = []
labels = []

for i in range(0, len(df) - window_size, step_size):
    x = df['x'].values[i: i + window_size]
    y = df['y'].values[i: i + window_size]
    z = df['z'].values[i: i + window_size]
    label = df['activity'][i + window_size // 2]
    segments.append([x, y, z])
    labels.append(label)

X = np.asarray(segments, dtype=np.float32)
X = np.transpose(X, (0, 2, 1))  # reshape to (samples, timesteps, channels)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(labels)


In [27]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)


In [28]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Conv1D(64, 5, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    layers.Conv1D(64, 5, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Dropout(0.5),
    layers.Flatten(),
    layers.Dense(100, activation='relu'),
    layers.Dense(len(le.classes_), activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [30]:
history = model.fit(X_train, y_train, epochs=10, batch_size=64,
                    validation_data=(X_test, y_test))


Epoch 1/10
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 33ms/step - accuracy: 0.7127 - loss: 0.8851 - val_accuracy: 0.8960 - val_loss: 0.2861
Epoch 2/10
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 33ms/step - accuracy: 0.8982 - loss: 0.2852 - val_accuracy: 0.9372 - val_loss: 0.1942
Epoch 3/10
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 36ms/step - accuracy: 0.9342 - loss: 0.1882 - val_accuracy: 0.9521 - val_loss: 0.1464
Epoch 4/10
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 34ms/step - accuracy: 0.9520 - loss: 0.1434 - val_accuracy: 0.9554 - val_loss: 0.1368
Epoch 5/10
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 33ms/step - accuracy: 0.9584 - loss: 0.1217 - val_accuracy: 0.9558 - val_loss: 0.1257
Epoch 6/10
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 30ms/step - accuracy: 0.9648 - loss: 0.1060 - val_accuracy: 0.9693 - val_loss: 0.0984
Epoch 7/10
[1m3

In [31]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

# Accuracy
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

# Classification report
y_pred = model.predict(X_test).argmax(axis=1)
print(classification_report(y_test, y_pred, target_names=le.classes_))


[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.9628 - loss: 0.1417
Test Accuracy: 0.96
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
              precision    recall  f1-score   support

  Downstairs       0.87      0.92      0.90       502
     Jogging       0.96      0.99      0.98      1682
     Sitting       0.99      0.98      0.98       299
    Standing       0.97      0.99      0.98       243
    Upstairs       0.95      0.81      0.87       614
     Walking       0.98      0.98      0.98      2092

    accuracy                           0.96      5432
   macro avg       0.95      0.95      0.95      5432
weighted avg       0.96      0.96      0.96      5432



In [29]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

# Flatten the data (samples, time_steps * features)
X_flattened = X.reshape(X.shape[0], -1)  # Flatten the 3D data into 2D (samples, time_steps * features)

# Split the data into training and testing sets (use the same split for comparison)
X_train_rf, X_test_rf, y_train_rf, y_test_rf = train_test_split(
    X_flattened, y, test_size=0.2, random_state=42, stratify=y
)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_rf, y_train_rf)

# Predict on the test set
y_pred_rf = rf_model.predict(X_test_rf)

# Evaluate the Random Forest model
print("Random Forest Classification Report:")
print(classification_report(y_test_rf, y_pred_rf))

# Calculate accuracy
rf_accuracy = accuracy_score(y_test_rf, y_pred_rf)
print(f"Random Forest Test Accuracy: {rf_accuracy:.4f}")


Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.21      0.34       502
           1       0.89      0.99      0.94      1682
           2       1.00      0.96      0.98       299
           3       0.99      0.98      0.99       243
           4       0.84      0.26      0.40       614
           5       0.76      0.98      0.86      2092

    accuracy                           0.83      5432
   macro avg       0.89      0.73      0.75      5432
weighted avg       0.84      0.83      0.79      5432

Random Forest Test Accuracy: 0.8301
