In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

**Process Data**

In [40]:
df = pd.read_csv('synthetic_data.csv')
df.isnull().sum()
df.drop_duplicates()


Unnamed: 0,x,y,z,status
0,-0.123084,-0.909835,-0.399468,idle
1,-0.120303,-0.903106,-0.381034,idle
2,-0.123200,-0.893145,-0.454663,idle
3,-0.136790,-0.916880,-0.454484,idle
4,-0.129392,-0.911907,-0.371385,idle
...,...,...,...,...
299995,0.829013,0.188025,0.411385,used
299996,0.836796,0.229350,0.286411,used
299997,0.761621,0.206068,0.390765,used
299998,0.846855,0.359300,0.419307,used


In [41]:
idle_data = df[df['status'] == 'idle']
false_motion_data = df[df['status'] == 'false_motion']
used_data = df[df['status'] == 'used']

In [42]:
from scipy.stats import zscore

def remove_outliers_by_zscore(data, columns, threshold=3):
    z_scores = zscore(data[columns])
    return data[(z_scores < threshold).all(axis=1)]

idle_data_cleaned = remove_outliers_by_zscore(idle_data, ['x', 'y', 'z'])
false_motion_data_cleaned = remove_outliers_by_zscore(false_motion_data, ['x', 'y', 'z'])
used_data_cleaned = remove_outliers_by_zscore(used_data, ['x', 'y', 'z'])

In [43]:
df = pd.concat([
    idle_data_cleaned,
    false_motion_data_cleaned,
    used_data_cleaned
])

df.reset_index(drop=True, inplace=True)

In [44]:
df['magnitude'] = np.sqrt(df['x']**2 + df['y']**2 + df['z']**2)


In [45]:
window_size = 10

# Calculate rolling mean, standard deviation, max, and min
df['rolling_mean'] = df['magnitude'].rolling(window=window_size).mean()
df['rolling_std'] = df['magnitude'].rolling(window=window_size).std()
df['rolling_max'] = df['magnitude'].rolling(window=window_size).max()
df['rolling_min'] = df['magnitude'].rolling(window=window_size).min()

In [46]:
df.dropna(subset=['rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min'], inplace=True)
df.reset_index(drop=True, inplace=True)

In [47]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
df['status_encoded'] = encoder.fit_transform(df['status'])

In [48]:
from sklearn.model_selection import train_test_split

X = df[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']]
y = df['status_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']] = scaler.fit_transform(df[['x', 'y', 'z', 'magnitude', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min']])

**Train Random Forest Algorithm and Evaluation**

In [12]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [13]:
from sklearn.metrics import classification_report, accuracy_score

print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

              precision    recall  f1-score   support

           0       0.97      0.96      0.97     19879
           1       0.96      0.97      0.97     19864
           2       1.00      1.00      1.00     20015

    accuracy                           0.98     59758
   macro avg       0.98      0.98      0.98     59758
weighted avg       0.98      0.98      0.98     59758

Accuracy: 0.98


**Train Logistic Regression Algorithm and Evaluation**

In [38]:
from sklearn.linear_model import LogisticRegression

log_reg_model = LogisticRegression(random_state=42, max_iter=1000)
log_reg_model.fit(X_train, y_train)
y_pred_lr = log_reg_model.predict(X_test)

In [39]:
print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, y_pred_lr, target_names=encoder.classes_))
print(f"Accuracy: {accuracy_score(y_test, y_pred_lr):.2f}")


Logistic Regression Classification Report:
              precision    recall  f1-score   support

false_motion       0.95      0.92      0.93     19879
        idle       0.92      0.96      0.94     19864
        used       1.00      1.00      1.00     20015

    accuracy                           0.96     59758
   macro avg       0.96      0.96      0.96     59758
weighted avg       0.96      0.96      0.96     59758

Accuracy: 0.96


**Train Decision Tree Algorithm and Evaluation**

In [50]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

In [51]:
print("\nDecision Tree Classification Report:")
print(classification_report(y_test, y_pred_dt, target_names=encoder.classes_))
print(f"Accuracy: {accuracy_score(y_test, y_pred_dt):.2f}")


Decision Tree Classification Report:
              precision    recall  f1-score   support

false_motion       0.95      0.95      0.95     19879
        idle       0.95      0.95      0.95     19864
        used       1.00      1.00      1.00     20015

    accuracy                           0.97     59758
   macro avg       0.97      0.97      0.97     59758
weighted avg       0.97      0.97      0.97     59758

Accuracy: 0.97
