In [2]:
import pandas as pd
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

data_folder = 'homework_5/data'

data = pd.DataFrame()

for activity_folder in os.listdir(data_folder):
    activity_path = os.path.join(data_folder, activity_folder)
    if not os.path.isdir(activity_path):
        continue
    
    for file_name in os.listdir(activity_path):
        file_path = os.path.join(activity_path, file_name)
        if not file_name.endswith('.csv'):
            continue
        
        df = pd.read_csv(file_path)
        
        df['activity'] = activity_folder
        
        data = data.append(df, ignore_index=True)

data = data.sample(frac=1).reset_index(drop=True)

X = data.drop('activity', axis=1)
y = data['activity']

lab = preprocessing.LabelEncoder()
y_transformed = lab.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm_classifier = SVC(kernel="linear")
svm_classifier.fit(X_train, y_train)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

svm_predictions = svm_classifier.predict(X_test)
rf_predictions = rf_classifier.predict(X_test)

print("SVM Accuracy:", accuracy_score(y_test, svm_predictions))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_predictions))

print("\nSVM Confusion Matrix:")
print(confusion_matrix(y_test, svm_predictions))

print("\nRandom Forest Confusion Matrix:")
print(confusion_matrix(y_test, rf_predictions))

print("\nSVM Classification Report:")
print(classification_report(y_test, svm_predictions))

print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_predictions))

SVM Accuracy: 0.8204632208810482
Random Forest Accuracy: 0.999871040957392

SVM Confusion Matrix:
[[ 4987  1332     0    20]
 [  896 17403     0  2186]
 [    6   286     0   685]
 [  244  1306     0  9421]]

Random Forest Confusion Matrix:
[[ 6339     0     0     0]
 [    0 20484     0     1]
 [    2     0   975     0]
 [    1     1     0 10969]]

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.79      0.80      6339
           1       0.86      0.85      0.85     20485
           2       0.00      0.00      0.00       977
           3       0.77      0.86      0.81     10971

    accuracy                           0.82     38772
   macro avg       0.61      0.62      0.62     38772
weighted avg       0.80      0.82      0.81     38772


Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6339
           1       1.00      1.00      1.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
