In [189]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

**Data Cleaning**

In [190]:
df = pd.read_csv('synthetic_data.csv')
# Check for missing values
df.isnull().sum()


x         0
y         0
z         0
status    0
dtype: int64

In [191]:
# Remove duplicates
df.drop_duplicates()


Unnamed: 0,x,y,z,status
0,-0.123084,-0.909835,-0.399468,idle
1,-0.120303,-0.903106,-0.381034,idle
2,-0.123200,-0.893145,-0.454663,idle
3,-0.136790,-0.916880,-0.454484,idle
4,-0.129392,-0.911907,-0.371385,idle
...,...,...,...,...
299995,0.829013,0.188025,0.411385,used
299996,0.836796,0.229350,0.286411,used
299997,0.761621,0.206068,0.390765,used
299998,0.846855,0.359300,0.419307,used


In [192]:
idle_data = df[df['status'] == 'idle']
false_motion_data = df[df['status'] == 'false_motion']
used_data = df[df['status'] == 'used']


**Remove Outliers Using Z-Score**

In [193]:
from scipy.stats import zscore

def remove_outliers_by_zscore(data, columns, threshold=3):
    z_scores = zscore(data[columns])
    return data[(z_scores < threshold).all(axis=1)]

idle_data_cleaned = remove_outliers_by_zscore(idle_data, ['x', 'y', 'z'])
false_motion_data_cleaned = remove_outliers_by_zscore(false_motion_data, ['x', 'y', 'z'])
used_data_cleaned = remove_outliers_by_zscore(used_data, ['x', 'y', 'z'])


In [194]:
df = pd.concat([
    idle_data_cleaned,
    false_motion_data_cleaned,
    used_data_cleaned
])

df.reset_index(drop=True, inplace=True)


In [195]:
df['magnitude'] = np.sqrt(df['x']**2 + df['y']**2 + df['z']**2)


In [196]:
window_size = 10

# Calculate rolling mean, standard deviation, max, and min
df['rolling_mean'] = df['magnitude'].rolling(window=window_size).mean()
df['rolling_std'] = df['magnitude'].rolling(window=window_size).std()
df['rolling_max'] = df['magnitude'].rolling(window=window_size).max()
df['rolling_min'] = df['magnitude'].rolling(window=window_size).min()

In [197]:
df.dropna(subset=['rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min'], inplace=True)
df.reset_index(drop=True, inplace=True)

In [198]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
df['status_encoded'] = encoder.fit_transform(df['status'])


In [199]:
from sklearn.model_selection import train_test_split

X = df[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']]
y = df['status_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [200]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']] = scaler.fit_transform(df[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']])

In [201]:
df.head()

Unnamed: 0,x,y,z,status,magnitude,rolling_mean,rolling_std,rolling_max,rolling_min,status_encoded
0,-0.722631,-0.684882,-0.572784,idle,-0.319049,-0.176679,-0.974991,-0.820566,0.890017,1
1,-0.705323,-0.707061,-0.667939,idle,0.01083,-0.177337,-0.975095,-0.820566,0.890017,1
2,-0.701161,-0.690752,-0.720074,idle,0.015046,-0.118208,-0.979485,-0.820566,0.890017,1
3,-0.685538,-0.71408,-0.682856,idle,0.07338,-0.135369,-0.985175,-0.820566,0.890017,1
4,-0.724755,-0.653489,-0.617036,idle,-0.435118,-0.409551,-1.045116,-0.998741,0.890017,1


**Train Model**

In [202]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


In [203]:
from sklearn.metrics import classification_report, accuracy_score

print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")


              precision    recall  f1-score   support

           0       0.97      0.96      0.97     19879
           1       0.96      0.97      0.97     19864
           2       1.00      1.00      1.00     20015

    accuracy                           0.98     59758
   macro avg       0.98      0.98      0.98     59758
weighted avg       0.98      0.98      0.98     59758

Accuracy: 0.98


**Save Model**

In [204]:
import joblib

joblib.dump(clf, "classify_status_model.pkl")

['classify_status_model.pkl']

**Test Data**

In [180]:
df_test = pd.read_csv('1_highly_used_data.csv')
# Check for missing values
df_test.isnull().sum()

x         0
y         0
z         0
status    0
dtype: int64

In [181]:
df_test.drop_duplicates()

Unnamed: 0,x,y,z,status
0,0.885636,0.167930,0.144440,used
1,0.811211,0.272013,0.255646,used
2,0.894434,0.228979,0.411866,used
3,0.739594,0.322290,0.427841,used
4,0.874145,0.189126,0.820142,used
...,...,...,...,...
99995,-0.136474,-0.882179,-0.439679,idle
99996,-0.129160,-0.871507,-0.451367,idle
99997,-0.123045,-0.858669,-0.413231,idle
99998,-0.133078,-0.875517,-0.431696,idle


In [182]:
idle_data_test = df_test[df_test['status'] == 'idle']
false_motion_data_test = df_test[df_test['status'] == 'false_motion']
used_data_test = df_test[df_test['status'] == 'used']

def remove_outliers_by_zscore(data, columns, threshold=3):
    z_scores = zscore(data[columns])
    return data[(z_scores < threshold).all(axis=1)]

idle_data_cleaned_test = remove_outliers_by_zscore(idle_data_test, ['x', 'y', 'z'])
false_motion_data_cleaned_test = remove_outliers_by_zscore(false_motion_data_test, ['x', 'y', 'z'])
used_data_cleaned_test = remove_outliers_by_zscore(used_data_test, ['x', 'y', 'z'])

df_test = pd.concat([
    idle_data_cleaned_test,
    false_motion_data_cleaned_test,
    used_data_cleaned_test
])

df_test.reset_index(drop=True, inplace=True)

In [183]:
df_test['magnitude'] = np.sqrt(df_test['x']**2 + df_test['y']**2 + df_test['z']**2)


In [184]:
window_size = 10

# Calculate rolling mean, standard deviation, max, and min
df_test['rolling_mean'] = df_test['magnitude'].rolling(window=window_size).mean()
df_test['rolling_std'] = df_test['magnitude'].rolling(window=window_size).std()
df_test['rolling_max'] = df_test['magnitude'].rolling(window=window_size).max()
df_test['rolling_min'] = df_test['magnitude'].rolling(window=window_size).min()


In [185]:
df_test.dropna(subset=['rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min'], inplace=True)
df_test.reset_index(drop=True, inplace=True)

In [186]:
df_test[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']] = scaler.transform(df_test[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']])

In [187]:
df_test['encoded_status'] = encoder.transform(df_test['status'])


In [188]:
import joblib
from sklearn.metrics import classification_report

model = joblib.load('classify_status_model_final1.pkl')

X = df_test[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']]

df_test['estimated_status'] = model.predict(X)
if 'status' in df_test.columns:
    print("Classification Report:\n")
    print(classification_report(df_test['encoded_status'], df_test['estimated_status']))

df_test.to_csv('classified_data.csv', index=False)


Classification Report:

              precision    recall  f1-score   support

           0       0.60      1.00      0.75     14952
           1       0.00      0.00      0.00      9944
           2       1.00      1.00      1.00     74718

    accuracy                           0.90     99614
   macro avg       0.53      0.67      0.58     99614
weighted avg       0.84      0.90      0.86     99614



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Separate**

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf, X, y, cv=5)
print(f"Cross-validation accuracy: {scores.mean():.2f}")


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(cm, display_labels=encoder.classes_).plot()


In [None]:
from sklearn.dummy import DummyClassifier

baseline = DummyClassifier(strategy="most_frequent")
baseline.fit(X_train, y_train)
baseline_acc = baseline.score(X_test, y_test)
print(f"Baseline Accuracy: {baseline_acc:.2f}")


Baseline Accuracy: 0.24


In [None]:
df['time'] = range(len(df))
statuses = df['status'].unique()

for status in statuses:
    subset = df[df['status'] == status]
    plt.figure(figsize=(10, 5))
    plt.plot(subset['time'], subset['magnitude'], label='x', alpha=0.7)
    plt.title(f'Accelerometer Data for Status: {status}')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.legend()
    plt.show()