In [2]:
import pandas as pd

# Load data
file_paths = [
    ("imu_running_left_upto6.xlsx", "Running"),
    ("imu_still_left_upto6.xlsx", "Still"),
    ("imu_walking_left_upto6.xlsx", "Walking")
]

data_frames = []
for file_path, label in file_paths:
    df = pd.read_excel(file_path)
    df['label'] = label
    data_frames.append(df)

# Combine all data into a single DataFrame
data = pd.concat(data_frames, ignore_index=True)

# Display basic information about the dataset
print(data.info())
print(data.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 263528 entries, 0 to 263527
Data columns (total 8 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   timestamp  263528 non-null  int64 
 1   ax         263528 non-null  int64 
 2   ay         263528 non-null  int64 
 3   az         263528 non-null  int64 
 4   gx         263528 non-null  int64 
 5   gy         263528 non-null  int64 
 6   gz         263528 non-null  int64 
 7   label      263528 non-null  object
dtypes: int64(7), object(1)
memory usage: 16.1+ MB
None
       timestamp    ax     ay     az   gx    gy   gz    label
0  1621358377297  6297 -10524  11565  260 -1675  485  Running
1  1621358377307  6700 -10259  11712  320 -1650  471  Running
2  1621358377316  7021 -10117  11837  366 -1681  480  Running
3  1621358377326  7182 -10094  11793  379 -1751  580  Running
4  1621358377336  7252 -10225  11628  345 -1772  679  Running


In [3]:
# Check for missing values
print(data.isnull().sum())

# If there are missing values, we need to handle them (e.g., by filling or dropping)
data.dropna(inplace=True)

# Convert the timestamp to a datetime object (if not already in that format)
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Ensure that label is a categorical type
data['label'] = data['label'].astype('category')

# Display the cleaned data
print(data.info())
print(data.head())

timestamp    0
ax           0
ay           0
az           0
gx           0
gy           0
gz           0
label        0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 263528 entries, 0 to 263527
Data columns (total 8 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   timestamp  263528 non-null  datetime64[ns]
 1   ax         263528 non-null  int64         
 2   ay         263528 non-null  int64         
 3   az         263528 non-null  int64         
 4   gx         263528 non-null  int64         
 5   gy         263528 non-null  int64         
 6   gz         263528 non-null  int64         
 7   label      263528 non-null  category      
dtypes: category(1), datetime64[ns](1), int64(6)
memory usage: 14.3 MB
None
                      timestamp    ax     ay     az   gx    gy   gz    label
0 1970-01-01 00:27:01.358377297  6297 -10524  11565  260 -1675  485  Running
1 1970-01-01 00:27:01.358377307  6700 -10259  1

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Define features and labels
X = data[['ax', 'ay', 'az', 'gx', 'gy', 'gz']]
y = data['label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train and evaluate different models

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
rf_pred = rf.predict(X_test_scaled)
print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred))

# Support Vector Machine
svm = SVC(random_state=42)
svm.fit(X_train_scaled, y_train)
svm_pred = svm.predict(X_test_scaled)
print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

# K-Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(X_train_scaled, y_train)
knn_pred = knn.predict(X_test_scaled)
print("KNN Classification Report:")
print(classification_report(y_test, knn_pred))


Random Forest Classification Report:
              precision    recall  f1-score   support

     Running       0.98      0.95      0.97     26344
       Still       1.00      1.00      1.00     26364
     Walking       0.95      0.98      0.97     26351

    accuracy                           0.98     79059
   macro avg       0.98      0.98      0.98     79059
weighted avg       0.98      0.98      0.98     79059

