Skip to content

Commit

Permalink
trained base model
Browse files Browse the repository at this point in the history
  • Loading branch information
aungkonazim committed Jan 15, 2023
1 parent 4e9fe8b commit 9216522
Show file tree
Hide file tree
Showing 2 changed files with 284 additions and 16 deletions.
65 changes: 49 additions & 16 deletions divide_data_by_labels.ipynb
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -11,12 +11,13 @@
"from scipy.interpolate import interp1d\n",
"import numpy as np\n",
"import pandas as pd\n",
"import os"
"import os\n",
"from joblib import Parallel, delayed"
]
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -26,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -35,7 +36,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -62,25 +63,24 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"Exercise (244, 8) (71, 8)\n",
"Sports (40782, 8) (10204, 8)\n",
"Stairs (4669, 8) (1178, 8)\n",
"Stationery (286619, 8) (71662, 8)\n",
"Walking (11191, 8) (2809, 8)\n"
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.\n",
"[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 17.1s remaining: 11.4s\n",
"[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.6min finished\n"
]
}
],
"source": [
"def interpolate_all_data(a):\n",
" a = a[a[:,0].argsort()]\n",
" a = a[:,1:].reshape(-1,3)\n",
" a = interpolate_acl(a)\n",
" return a.reshape(1,-1,3)\n",
"\n",
"\n",
Expand All @@ -92,9 +92,7 @@
" df = df.iloc[int(train_split*df.shape[0]):]\n",
" return df\n",
"\n",
"\n",
"data_directory = './data/mORAL_dataset_for_python_upload_09072020/processed_data/'\n",
"for i,df in all_data.groupby('prediction',as_index=False):\n",
"def save_data_by_labels(df):\n",
" activity = df['prediction'].values[0]\n",
" if not os.path.isdir(os.path.join(data_directory,activity)):\n",
" os.makedirs(os.path.join(data_directory,activity))\n",
Expand All @@ -105,9 +103,44 @@
" test_data = pd.concat([get_data(df_user,split_type='test') for j,df_user in df.groupby('user',as_index=False)]).sort_values('timestamp').reset_index(drop=True)\n",
" test_path = os.path.join(data_directory,activity,'test.p')\n",
" pickle.dump(test_data,open(test_path,'wb'))\n",
" print(activity,train_data.shape,test_data.shape) \n",
" print(activity,train_data.shape,test_data.shape)\n",
" return activity,train_data.shape,test_data.shape\n",
"\n",
"data_directory = './data/mORAL_dataset_for_python_upload_09072020/processed_data/'\n",
"done = Parallel(n_jobs=-1,verbose=2)(delayed(save_data_by_labels)(df) for i,df in all_data.groupby('prediction',as_index=False)) \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('Exercise', (244, 8), (71, 8)),\n",
" ('Sports', (40782, 8), (10204, 8)),\n",
" ('Stairs', (4669, 8), (1178, 8)),\n",
" ('Stationery', (286619, 8), (71662, 8)),\n",
" ('Walking', (11191, 8), (2809, 8))]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"done"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
235 changes: 235 additions & 0 deletions train_model.ipynb
@@ -0,0 +1,235 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"from tensorflow_addons.losses import metric_learning\n",
"import tensorflow_probability as tfp\n",
"from tensorflow.keras import layers, models, callbacks\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score\n",
"from tensorflow.keras import backend as K\n",
"import pickle\n",
"import os\n",
"\n",
"def _pairwise_distances(feature_A, feature_B=None, squared=False):\n",
" \"\"\"\n",
" Directly from https://www.tensorflow.org/api_docs/python/tf/contrib/losses/metric_learning/triplet_semihard_loss\n",
" Computes the pairwise distance matrix with numerical stability.\n",
" output[i, j] = || feature[i, :] - feature[j, :] ||_2\n",
" Args:\n",
" feature_A: 2-D Tensor of size [number of data A, feature dimension].\n",
" feature_B: 2-D Tensor of size [number of data B, feature dimension].\n",
" squared: Boolean, whether or not to square the pairwise distances.\n",
" Returns:\n",
" pairwise_distances: 2-D Tensor of size [number of data A, number of data B].\n",
" \"\"\"\n",
" if feature_B is None:\n",
" feature_B = feature_A\n",
"\n",
" pairwise_distances_squared = tf.add(\n",
" tf.reduce_sum(tf.square(feature_A), axis=[1], keepdims=True),\n",
" tf.reduce_sum(tf.square(tf.transpose(feature_B)), axis=[0], keepdims=True),\n",
" ) - 2.0 * tf.linalg.matmul(feature_A, tf.transpose(feature_B))\n",
"\n",
" # Deal with numerical inaccuracies. Set small negatives to zero.\n",
" pairwise_distances_squared = tf.maximum(pairwise_distances_squared, 0.0)\n",
" # Get the mask where the zero distances are at.\n",
" error_mask = tf.less_equal(pairwise_distances_squared, 0.0)\n",
"\n",
" # Optionally take the sqrt.\n",
" if squared:\n",
" pairwise_distances = pairwise_distances_squared\n",
" else:\n",
" pairwise_distances = tf.sqrt(\n",
" pairwise_distances_squared + tf.cast(error_mask, tf.float32) * 1e-16\n",
" )\n",
"\n",
" # Undo conditionally adding 1e-16.\n",
" pairwise_distances = tf.multiply(\n",
" pairwise_distances, tf.cast(tf.logical_not(error_mask), tf.float32)\n",
" )\n",
"\n",
" if feature_B is None:\n",
" num_data = tf.shape(feature_A)[0]\n",
" # Explicitly set diagonals to zero.\n",
" mask_offdiagonals = tf.ones_like(pairwise_distances) - tf.linalg.diag(\n",
" tf.ones([num_data])\n",
" )\n",
" pairwise_distances = tf.multiply(pairwise_distances, mask_offdiagonals)\n",
"\n",
" return pairwise_distances\n",
"def get_consistency_distinction_loss(labels,embeddings):\n",
" epsilon = 1e-7\n",
" lshape = tf.shape(labels)\n",
" labels = tf.reshape(labels, [lshape[0], 1])\n",
" clusters_labels, _, num_embeddings_per_cluster = tf.unique_with_counts(\n",
" tf.reshape(labels, [lshape[0]])\n",
" ) \n",
" num_clusters = tf.size(clusters_labels)\n",
" adjacency = tf.equal(\n",
" labels, tf.transpose(clusters_labels)\n",
" ) \n",
" centroids = tf.linalg.matmul(\n",
" tf.cast(adjacency, dtype=tf.float32), embeddings, transpose_a=True\n",
" )\n",
" centroids = tf.divide(\n",
" centroids,\n",
" tf.expand_dims(tf.cast(num_embeddings_per_cluster, dtype=tf.float32), axis=1),\n",
" )\n",
" pairwise_distances_distinction_first = _pairwise_distances(\n",
" feature_A=embeddings, feature_B=centroids, squared=True\n",
" )\n",
" pairwise_distances_distinction_first = pairwise_distances_distinction_first/tf.reshape(tf.reduce_max(pairwise_distances_distinction_first,axis=1),[lshape[0],1])\n",
" adjacency_not = tf.logical_not(adjacency)\n",
" pairwise_distances_distinction = tf.where(tf.cast(adjacency,tf.float32)==1.0,tf.reduce_max(pairwise_distances_distinction_first),pairwise_distances_distinction_first)\n",
" minimum_distance_to_other_cluster = tf.reduce_min(pairwise_distances_distinction,axis=1)\n",
" distinction_loss = tf.reduce_mean(minimum_distance_to_other_cluster)\n",
" mean_intra_class_distance = tf.reduce_mean(tf.boolean_mask(pairwise_distances_distinction_first,adjacency))\n",
" mean_inter_class_distance = tf.reduce_mean(tf.boolean_mask(pairwise_distances_distinction_first,tf.logical_not(adjacency)))\n",
" alpha = mean_intra_class_distance/ (mean_inter_class_distance+epsilon)\n",
" mask_for_equal = tf.math.equal(labels,tf.transpose(labels))\n",
" pairwise_distances = _pairwise_distances(embeddings,squared=True)\n",
" pairwise_distances = pairwise_distances/tf.reshape(tf.reduce_max(pairwise_distances,axis=1),[lshape[0],1]) \n",
" pairwise_distance_for_consistency = tf.multiply(pairwise_distances, tf.cast(mask_for_equal,tf.float32))\n",
" counts_same_class = tf.reduce_sum(tf.cast(mask_for_equal,tf.float32),axis=1)\n",
" total_distance_same_class = tf.reduce_sum(pairwise_distance_for_consistency,axis=1)\n",
" mean_distance_same_class = total_distance_same_class/(counts_same_class-1+epsilon)\n",
" loss = tf.constant(0,dtype=tf.float32)\n",
" for label in clusters_labels:\n",
" percentile_95 = tfp.stats.percentile(tf.where(labels==label,mean_distance_same_class,tf.reduce_max(mean_distance_same_class)),95)\n",
" loss+=percentile_95\n",
" consistency_loss = loss/tf.cast(tf.size(clusters_labels),tf.float32)\n",
" # consistency_loss = tf.reduce_mean(mean_distance_same_class)\n",
" return (1+alpha)*consistency_loss - distinction_loss\n",
"\n",
"\n",
"\n",
"def get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size,filepath):\n",
" n_classes = len(np.unique(y_train))\n",
" model = get_model(input_shape=(n_timesteps,n_channels),n_classes=n_classes)\n",
" checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min',save_weights_only=False)\n",
" es = callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=0,patience=40)\n",
" callbacks_list = [es,checkpoint]\n",
" train_x,val_x,train_y,val_y = train_test_split(X_train,y_train,test_size=.2,stratify=y_train)\n",
" history = model.fit(train_x,[train_y,train_y],validation_data=(val_x,[val_y,val_y]), epochs=200, batch_size=500,verbose=1,callbacks=callbacks_list,shuffle=True)\n",
" model.load_weights(filepath)\n",
" val_y_pred = model.predict(val_x)\n",
" if len(val_y_pred)<val_x.shape[0]:\n",
" val_y_pred = val_y_pred[0]\n",
" print('validation accuracy',accuracy_score(val_y,val_y_pred.argmax(axis=1)),end=',')\n",
" return model\n",
"import tensorflow_addons as tfa\n",
"def get_model(input_shape=(400,3),n_classes=1):\n",
" input_ = layers.Input(shape=input_shape)\n",
" x = layers.Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same')(input_)\n",
" x = layers.MaxPooling1D(2)(x)\n",
" x = layers.BatchNormalization()(x)\n",
" x = layers.Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same')(x)\n",
" x = layers.MaxPooling1D(2)(x)\n",
" x = layers.BatchNormalization()(x)\n",
" # x = layers.Activation('tanh')(x)\n",
" x = layers.Dropout(.4)(x)\n",
" x = layers.GRU(128,return_sequences=False,activation='tanh')(x)\n",
" x = layers.Flatten()(x)\n",
" x = layers.Dense(350,activation='relu')(x)\n",
" x = layers.Dense(n_classes,activation='relu')(x)\n",
" y2 = layers.Lambda(lambda a:K.l2_normalize(a,axis=1),name='feature')(x)\n",
" y1 = layers.Dense(n_classes,activation='softmax',name='final')(y2)\n",
" model = models.Model(input_,[y1,y2])\n",
" model.compile(loss={'final':tf.keras.losses.SparseCategoricalCrossentropy(),\n",
" 'feature':get_consistency_distinction_loss},\n",
" loss_weights = {'final':5,'feature':1},\n",
" optimizer='adam',\n",
" metrics={'final':['acc']})\n",
" return model\n",
"def get_X_y_dict(training_data,user_dict = None):\n",
" if user_dict is None:\n",
" user_dict = {a:i for i,a in enumerate(training_data['user'].unique())}\n",
" training_data['label'] = training_data['user'].apply(lambda a:user_dict[a])\n",
" X = np.concatenate(list(training_data['final_data']))\n",
" y = np.array(training_data['label'].values)\n",
" return X,y,user_dict\n",
"activity_label = 'Walking'\n",
"window_size = 20\n",
"base_directory = './data/mORAL_dataset_for_python_upload_09072020/'\n",
"training_data = pickle.load(open(os.path.join(base_directory,'processed_data',activity_label,'train.p'),'rb')).sort_values('timestamp').reset_index(drop=True)\n",
"testing_data = pickle.load(open(os.path.join(base_directory,'processed_data',activity_label,'test.p'),'rb')).sort_values('timestamp').reset_index(drop=True)\n",
"if not os.path.isdir(os.path.join(base_directory,'results',activity_label)):\n",
" os.makedirs(os.path.join(base_directory,'results',activity_label))\n",
"result_directory = os.path.join(base_directory,'results',activity_label)\n",
"model_directory = os.path.join(result_directory,'activity_{}_window_size_{}.h5'.format(activity_label,window_size))\n",
"X_train,y_train,user_dict = get_X_y_dict(training_data)\n",
"X_test,y_test,user_dict = get_X_y_dict(testing_data,user_dict)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trained_model = get_trained_model(X_train,y_train,n_timesteps=X_train.shape[1],n_channels=X_train.shape[-1],window_size=window_size,filepath=model_directory)"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [],
"source": [
"y_pred_test = trained_model.predict(X_test)\n",
"testing_data['embedding'] = list(y_pred_test[0])\n",
"testing_data['prediction'] = list(y_pred_test[0].argmax(axis=1))"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {},
"outputs": [],
"source": [
"pickle.dump(testing_data,open(os.path.join(result_directory,'activity_{}_window_size_{}.p'.format(activity_label,window_size)),'wb'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "test1",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "51b244ab9aca612e739a0539ae1af887c58db9e180d786deb0ab1761def69c1f"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 9216522

Please sign in to comment.