In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import os
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, plot_confusion_matrix
import matplotlib.pyplot as plt
import pickle

# Download data to runtime environment
if not os.path.exists("activity_clusters_df/"):
    ! gdown --folder https://drive.google.com/drive/folders/1CqsUGqC6-8EIY7wOEL58LgpRYxRcnR_4?usp=sharing

# Load files
a = {}
a.update({"downstairs": pd.read_csv("activity_clusters_df/activity_Downstairs_df")})
a.update({"jogging": pd.read_csv("activity_clusters_df/activity_Jogging_df")})
a.update({"sitting": pd.read_csv("activity_clusters_df/activity_Sitting_df")})
a.update({"standing": pd.read_csv("activity_clusters_df/activity_Standing_df")})
a.update({"upstairs": pd.read_csv("activity_clusters_df/activity_Upstairs_df")})
a.update({"walking": pd.read_csv("activity_clusters_df/activity_Walking_df")})

# View shapes
sorter = []
for k in a:
    sorter.append([k, a[k].shape])
sorter = sorted(sorter, key = lambda x: x[1], reverse=True)

# Numeric encode tasks
label_map = {}
for i, row in enumerate(sorter):
    label_map.update({row[0].capitalize(): i})
label_values = list(label_map.keys())
for k in a:
    a[k].replace({"label": label_map}, inplace=True)

# Drop column "Unnamed: 0", was previous index
for k in a:
    a[k].drop("Unnamed: 0", axis=1, inplace=True)

# Shuffle and split datasets
for k in a:
    a[k] = a[k].sample(frac=1, random_state=42)

    train = a[k].sample(frac=0.8, random_state=42)
    
    test = a[k].drop(train.index)
    val = test.sample(frac=0.5, random_state=42)
    test = test.drop(val.index)

    a[k] = {"train": train, "val": val, "test": test}

# Combine frequent and infrequent activites
data = {"freq": {"train": None, "val": None, "test": None}, "infreq": {"train": None, "val": None, "test": None}, "comb": {"train": None, "val": None, "test": None}}

data["freq"]["train"] = pd.concat([a["downstairs"]["train"], a["jogging"]["train"], a["upstairs"]["train"], a["walking"]["train"]], axis=0, ignore_index=True)
data["freq"]["val"] = pd.concat([a["downstairs"]["val"], a["jogging"]["val"], a["upstairs"]["val"], a["walking"]["val"]], axis=0, ignore_index=True)
data["freq"]["test"] = pd.concat([a["downstairs"]["test"], a["jogging"]["test"], a["upstairs"]["test"], a["walking"]["test"]], axis=0, ignore_index=True)

data["infreq"]["train"] = pd.concat([a["sitting"]["train"], a["standing"]["train"]], axis=0, ignore_index=True)
data["infreq"]["val"] = pd.concat([a["sitting"]["val"], a["standing"]["val"]], axis=0, ignore_index=True)
data["infreq"]["test"] = pd.concat([a["sitting"]["test"], a["standing"]["test"]], axis=0, ignore_index=True)

data["comb"]["train"] = pd.concat([a["downstairs"]["train"], a["jogging"]["train"], a["upstairs"]["train"], a["walking"]["train"], a["sitting"]["train"], a["standing"]["train"]], axis=0, ignore_index=True)
data["comb"]["val"] = pd.concat([a["downstairs"]["val"], a["jogging"]["val"], a["upstairs"]["val"], a["walking"]["val"], a["sitting"]["val"], a["standing"]["val"]], axis=0, ignore_index=True)
data["comb"]["test"] = pd.concat([a["downstairs"]["test"], a["jogging"]["test"], a["upstairs"]["test"], a["walking"]["test"], a["sitting"]["test"], a["standing"]["test"]], axis=0, ignore_index=True)

# Shuffle
for k1 in data:
    for k2 in data[k1]:
        data[k1][k2] = data[k1][k2].sample(frac=1, random_state=42)

# Split
img_rows = 27
img_cols = 15
for k1 in data:
    for k2 in data[k1]:

        # Flat data
        x = data[k1][k2][data[k1][k2].columns[:-1]]
        y = data[k1][k2][["label"]]

        # Image data
        shaped = x.to_numpy(copy=True)
        shaped = np.reshape(shaped, (shaped.shape[0], img_rows, img_cols))

        data[k1][k2] = {"df": data[k1][k2], "x": x, "y": y, "x_shaped": shaped}

# Write to .csv files
dir = "/content/drive/MyDrive/Classes/CSCE 5280 AI for Wearables/"
for k1 in data:
    for k2 in data[k1]:
        for k3 in data[k1][k2]:
            print(k1, k2, k3)
            if k3 == "x_shaped":
                with open("%s%s_%s_%s.pickle"%(dir, k1, k2, k3), "wb") as file:
                    pickle.dump(data[k1][k2][k3], file)
            else:
                data[k1][k2][k3].to_csv("%s%s_%s_%s.csv"%(dir, k1, k2, k3), header=True, index=False)

Retrieving folder list
Processing file 138srRsR00ALdY5C1C09tLFO_bWGMOkoZ activity_Downstairs_df
Processing file 1QJTINMLihyyjljNEk7DBRQyMjGXVjPcf activity_Jogging_df
Processing file 1U_lzf-yOBC96IupG-9wS324Lyu78u916 activity_Sitting_df
Processing file 1wZitN2BWyzh_dW2OGSIy03YxVdKI3P07 activity_Standing_df
Processing file 1C5GpQ7RhX_t_h-9XGV9x7I3Leufs5_Sl activity_Upstairs_df
Processing file 1_hPbQ1MtEqqEI3KQ7ttIYGIdYH7yi36d activity_Walking_df
Retrieving folder list completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=138srRsR00ALdY5C1C09tLFO_bWGMOkoZ
To: /content/activity_clusters_df/activity_Downstairs_df
100% 6.45M/6.45M [00:00<00:00, 87.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=1QJTINMLihyyjljNEk7DBRQyMjGXVjPcf
To: /content/activity_clusters_df/activity_Jogging_df
100% 5.84M/5.84M [00:00<00:00, 156MB/s]
Downloading...
From: https://drive.google.com/uc?id=1U_lzf-yOBC96IupG-9wS324Lyu78u916
T