In [114]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from typing import Dict, Any
from sklearn.metrics import accuracy_score

In [115]:
cname = ['wrist x value', 'wrist y value', 'wrist z value', 'thigh x value', 'thigh y value', 'thigh z value', 'class']
cname2 = ['wrist x value', 'wrist y value', 'wrist z value', 'thigh x value', 'thigh y value', 'thigh z value']
train = pd.read_csv('training.csv', names=cname, header=None)
df_train = pd.DataFrame(train)
df_train

Unnamed: 0,wrist x value,wrist y value,wrist z value,thigh x value,thigh y value,thigh z value,class
0,-0.453125,-1.234375,0.250000,-1.218750,-0.343750,0.078125,walkfast
1,-0.453125,-1.218750,0.265625,-1.250000,-0.281250,-0.078125,walkfast
2,-0.468750,-1.187500,0.281250,-1.296875,-0.218750,-0.125000,walkfast
3,-0.468750,-1.109375,0.296875,-1.375000,-0.218750,-0.125000,walkfast
4,-0.468750,-1.031250,0.296875,-1.437500,-0.281250,-0.125000,walkfast
...,...,...,...,...,...,...,...
29995,-0.468750,-0.859375,0.296875,-0.937500,-0.171875,0.218750,walkslow
29996,-0.468750,-0.875000,0.296875,-0.953125,-0.187500,0.218750,walkslow
29997,-0.453125,-0.875000,0.296875,-0.953125,-0.203125,0.187500,walkslow
29998,-0.437500,-0.875000,0.296875,-0.953125,-0.203125,0.171875,walkslow


In [116]:
test = pd.read_csv('testing.csv', names=cname2, header=None)
test

Unnamed: 0,wrist x value,wrist y value,wrist z value,thigh x value,thigh y value,thigh z value
0,-0.390625,-1.109.375,0.218750,-0.828125,-0.109375,0.15625
1,-0.390625,-109.375,0.203125,-0.8125,-0.1875,0.28125
2,-0.375,-10.625,0.187500,-0.8125,-0.265625,0.28125
3,-0.375,-1.046.875,0.171875,-0.8125,-0.265625,0.21875
4,-0.34375,-1.015.625,0.171875,-0.8125,-0.203125,0.1875
...,...,...,...,...,...,...
295,-0.484375,-0.859375,0.218750,-0.96875,-0.28125,0.3125
296,-0.5,-0.875,0.218750,-0.953125,-0.3125,0.328125
297,-0.5,-0.890625,0.218750,-0.96875,-0.3125,0.34375
298,-0.5,-0.90625,0.218750,-1.0,-0.28125,0.328125


In [117]:
# preprocessing test data for number with double dots
test = test.astype(str).apply(lambda x: x.str.replace(',', '.'))
test
# take the first 4 number of each number to convert it to float
test = test.astype(str).apply(lambda x: x.str[:4])
test = test.astype(float)
test

Unnamed: 0,wrist x value,wrist y value,wrist z value,thigh x value,thigh y value,thigh z value
0,-0.3,-1.1,0.21,-0.8,-0.1,0.15
1,-0.3,-109.0,0.20,-0.8,-0.1,0.28
2,-0.3,-10.0,0.18,-0.8,-0.2,0.28
3,-0.3,-1.0,0.17,-0.8,-0.2,0.21
4,-0.3,-1.0,0.17,-0.8,-0.2,0.18
...,...,...,...,...,...,...
295,-0.4,-0.8,0.21,-0.9,-0.2,0.31
296,-0.5,-0.8,0.21,-0.9,-0.3,0.32
297,-0.5,-0.8,0.21,-0.9,-0.3,0.34
298,-0.5,-0.9,0.21,-1.0,-0.2,0.32


In [118]:
# change the summary into numerical value
df_train['class'] = df_train['class'].map({'walkfast': -1, 'walkmod': 0, 'walkslow': 1})
df_train

Unnamed: 0,wrist x value,wrist y value,wrist z value,thigh x value,thigh y value,thigh z value,class
0,-0.453125,-1.234375,0.250000,-1.218750,-0.343750,0.078125,-1
1,-0.453125,-1.218750,0.265625,-1.250000,-0.281250,-0.078125,-1
2,-0.468750,-1.187500,0.281250,-1.296875,-0.218750,-0.125000,-1
3,-0.468750,-1.109375,0.296875,-1.375000,-0.218750,-0.125000,-1
4,-0.468750,-1.031250,0.296875,-1.437500,-0.281250,-0.125000,-1
...,...,...,...,...,...,...,...
29995,-0.468750,-0.859375,0.296875,-0.937500,-0.171875,0.218750,1
29996,-0.468750,-0.875000,0.296875,-0.953125,-0.187500,0.218750,1
29997,-0.453125,-0.875000,0.296875,-0.953125,-0.203125,0.187500,1
29998,-0.437500,-0.875000,0.296875,-0.953125,-0.203125,0.171875,1


In [119]:
X_train = df_train.iloc[:, 0:6].values
y_train = df_train.iloc[:, 6].values
X_test = test.iloc[:, 0:6]
X_test = X_test.apply(pd.to_numeric, errors = 'coerce')
data = X_train

In [120]:
class SVM:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, num_iterations=500):
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        num_samples, num_features = X.shape
        
        # Initialize weights and bias
        self.weights = np.zeros(num_features)
        self.bias = 0
        
        # Gradient descent
        for _ in range(self.num_iterations):
            # Calculate the hinge loss and gradients
            loss = 0
            dw = np.zeros(num_features)
            db = 0
            
            for i in range(num_samples):
                if y[i] * (np.dot(X[i], self.weights) - self.bias) >= 1:
                    dw += 0
                    db += 0
                else:
                    dw += self.lambda_param * y[i] * X[i]
                    db += self.lambda_param * y[i]
                    loss += 1 - y[i] * (np.dot(X[i], self.weights) - self.bias)
            
            # Update weights and bias
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
        
    def predict(self, X):
        y_pred = np.sign(np.dot(X, self.weights) - self.bias)
        return y_pred


In [121]:
# SVM
clf = SVM()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(predictions)

[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1. -1. -1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1

In [125]:
# create a new dataframe to store the predictions and ID number of each row
df = pd.DataFrame(predictions, columns=['label'])
df['ID'] = df.index + 1
df
# swap the columns
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]
df
# change label back to walkslow, walkmod, walkfast
df['label'] = df['label'].map({-1: 'walkfast', 0: 'walkmod', 1: 'walkslow'})
df

Unnamed: 0,ID,label
0,1,walkslow
1,2,walkslow
2,3,walkslow
3,4,walkslow
4,5,walkslow
...,...,...
295,296,walkslow
296,297,walkslow
297,298,walkslow
298,299,walkslow


In [126]:
# export the dataframe to csv file seperated by comma
df.to_csv('result.csv', index=False, sep=',')