## Imports

In [1]:
import glob
import json
import pandas as pd
import numpy as np
from pprint import pprint
from collections import OrderedDict

## Constants

In [2]:
data_path = "data/"
models_path = "models/"

## List Files in Data Folder

In [3]:
file_list = glob.glob(data_path+"*.csv")
file_list

['data\\W_YFA0002_UID_198D94B1-7CD8-4D3C-BCD3-14C9374A1F3D_TS_12-07-2022-GMT%200530-04-41-14-PM.csv',
 'data\\W_YFA0003_UID_198D94B1-7CD8-4D3C-BCD3-14C9374A1F3D_TS_12-07-2022-GMT%200530-04-52-03-PM.csv',
 'data\\W_YFA0004_UID_198D94B1-7CD8-4D3C-BCD3-14C9374A1F3D_TS_12-07-2022-GMT%200530-04-52-49-PM.csv']

## Combine the data to single CSV

In [4]:
df_total = pd.DataFrame()
for i in file_list:
    fname = i.split("\\")[-1]
    asanaId = fname.split("_")[1]
    userId = fname.split("_")[3]
    timestamp = fname.split("_")[5]
    
    df = pd.read_csv(i)
    df["Label"] = asanaId
    df_total = pd.concat([df_total, df])
df_total.to_csv(models_path+"combined.csv", index=False)
df_total

Unnamed: 0,Pitch,Yaw,Roll,Label
0,-0.176710,-0.015289,-0.172170,YFA0002
1,-0.184059,-0.006228,-0.179977,YFA0002
2,-0.177588,-0.006947,-0.174225,YFA0002
3,-0.163639,-0.013477,-0.171067,YFA0002
4,-0.161429,-0.016017,-0.167205,YFA0002
...,...,...,...,...
87,-0.189604,0.146176,1.268635,YFA0004
88,-0.190246,0.148044,1.269392,YFA0004
89,-0.190909,0.146365,1.269509,YFA0004
90,-0.191233,0.142468,1.269482,YFA0004


## Read Combined CSV

In [5]:
df = pd.read_csv(models_path+"combined.csv")
df

Unnamed: 0,Pitch,Yaw,Roll,Label
0,-0.176710,-0.015289,-0.172170,YFA0002
1,-0.184059,-0.006228,-0.179977,YFA0002
2,-0.177588,-0.006947,-0.174225,YFA0002
3,-0.163639,-0.013477,-0.171067,YFA0002
4,-0.161429,-0.016017,-0.167205,YFA0002
...,...,...,...,...
283,-0.189604,0.146176,1.268635,YFA0004
284,-0.190246,0.148044,1.269392,YFA0004
285,-0.190909,0.146365,1.269509,YFA0004
286,-0.191233,0.142468,1.269482,YFA0004


In [6]:
#
# create a JSON map and save a file locally
#
pred_label = []
org_label = list(df_total["Label"].unique())
for i in range(len(org_label)):
    pred_label.append(i)

#dict_file = {}
dict_file = OrderedDict()
for a in range(len(org_label)):
    dict_file[org_label[a]] = pred_label[a]
    
#d = {"name":"Yogifi","model_date": str(timestamp) ,"labels":[{'id':key,'label':value } for key,value in dict_file.items()]}
d = {"name":"Yogifi","model": str(timestamp) ,"labelmap":[{'asana_id':key,'label':value } for key,value in dict_file.items()]}
# d = {"name":"Yogifi","model": str(timestamp) ,"labelmap":[{key:value } for key,value in dict_file.items()]}
r = json.dumps(d)
loaded_r = json.loads(r)
pprint(loaded_r)
file_name = models_path+"labels"
with open(file_name+'.json', 'w') as outfile:
    #json.dump(r, outfile)
    print(r,file=outfile)

{'labelmap': [{'asana_id': 'YFA0002', 'label': 0},
              {'asana_id': 'YFA0003', 'label': 1},
              {'asana_id': 'YFA0004', 'label': 2}],
 'model': '12-07-2022-GMT%200530-04-52-49-PM.csv',
 'name': 'Yogifi'}


## Factorize the Data

In [7]:
df["Label"] = pd.factorize(df["Label"])[0]
df["Label"] = df["Label"].astype("category")
df.columns = range(df.shape[1])
df.to_csv(models_path+"train.csv", header = None, index=False)
df

Unnamed: 0,0,1,2,3
0,-0.176710,-0.015289,-0.172170,0
1,-0.184059,-0.006228,-0.179977,0
2,-0.177588,-0.006947,-0.174225,0
3,-0.163639,-0.013477,-0.171067,0
4,-0.161429,-0.016017,-0.167205,0
...,...,...,...,...
283,-0.189604,0.146176,1.268635,2
284,-0.190246,0.148044,1.269392,2
285,-0.190909,0.146365,1.269509,2
286,-0.191233,0.142468,1.269482,2


## Read Train Data

In [8]:
df = pd.read_csv(models_path+"train.csv", header = None)
df

Unnamed: 0,0,1,2,3
0,-0.176710,-0.015289,-0.172170,0
1,-0.184059,-0.006228,-0.179977,0
2,-0.177588,-0.006947,-0.174225,0
3,-0.163639,-0.013477,-0.171067,0
4,-0.161429,-0.016017,-0.167205,0
...,...,...,...,...
283,-0.189604,0.146176,1.268635,2
284,-0.190246,0.148044,1.269392,2
285,-0.190909,0.146365,1.269509,2
286,-0.191233,0.142468,1.269482,2


## Train a Decision Tree Classifier

In [9]:
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

In [10]:
X = df.drop(3,1)
y = df[3]

  """Entry point for launching an IPython kernel.


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [12]:
clf = DecisionTreeClassifier()
# clf = DecisionTreeClassifier(criterion="entropy", max_depth=3)

In [13]:
clf = clf.fit(X_train,y_train)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)


In [14]:
y_pred = clf.predict(X_test)

In [15]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 1.0


## Saving the Model

In [16]:
import pickle

In [17]:
with open(models_path+'model.pkl','wb') as f:
    pickle.dump(clf,f)

In [18]:
with open(models_path+'model.pkl', 'rb') as f:
    clf2 = pickle.load(f)

In [19]:
y_pred = clf2.predict(X_test)

In [20]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 1.0


## Converting the Model to ML Model

In [21]:
import coremltools



In [22]:
coreml_model = coremltools.converters.sklearn.convert(clf)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype=np.int):
  from collections import Mapping, defaultdict
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, positive=False):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  max_n_alp

Exception: Unable to load libmodelpackage. Cannot make save spec.

In [None]:
coreml_model.save('wearable.mlmodel')