# Create Dataframes from Data Collected with Android Application

In [1]:
import sys, os, logging
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [2]:
DATE = "2023_04_14"
DATA_FOLDER = os.path.join("../data/android_app/", DATE)

DATA_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and file.endswith(".txt")]

SEPARATOR = ";"

## Create Acceleration Dataframe

In [3]:
def dfFromFile(file):
    """
    Create dataframe from a single file.

    Args:
        file (string): File name.

    Returns:
        pd.DataFrame: Dataframe corresponding to the file.
    """
    df = pd.read_csv(file, sep=SEPARATOR, header=0, names=["ay", "ax", "az"])
    df["file"] = os.path.basename(file)
    df["index"] = range(0, len(df))
    df = df[["file", "index", "ax", "ay", "az"]]
    return df

In [4]:
def dfFromFiles(folder, files=None):
    """
    Create dataframe from multiple files.

    Args:
        folder (string): Folder where the files are located.
        files (list, optional): List of files inside the folder. Defaults to None.

    Returns:
        pd.DataFrame: Dataframe corresponding to the files.
    """
    if files == None:
        files = [file for file in os.listdir(folder) if os.path.isfile(os.path.join(folder, file)) and file.endswith(".txt")]

    try:
        df = dfFromFile(os.path.join(folder, files[0]))
        for file in files[1:]:
            df_file = dfFromFile(os.path.join(folder, file))
            df = pd.concat([df, df_file])
        return df
    except:
        logging.error("Empty folder...")
        return None

In [5]:
df = dfFromFiles(DATA_FOLDER)

In [6]:
df.describe()

Unnamed: 0,index,ax,ay,az
count,229570.0,229570.0,229570.0,229570.0
mean,2109.043982,0.10245,0.294837,9.921677
std,1999.835536,1.302086,2.230282,2.601988
min,0.0,-32.101456,-70.39911,-49.387592
25%,699.0,-0.363919,-0.42138,9.174581
50%,1402.0,0.124498,0.287304,9.883265
75%,2798.0,0.612916,0.995988,10.716447
max,9487.0,62.83343,56.541466,72.98485


In [7]:
df.head()

Unnamed: 0,file,index,ax,ay,az
0,DATA43.txt,0,0.220267,0.172383,9.959879
1,DATA43.txt,1,0.181959,0.105345,10.04607
2,DATA43.txt,2,0.325611,0.220267,9.768343
3,DATA43.txt,3,0.277727,0.105345,9.787497
4,DATA43.txt,4,0.287304,0.220267,9.950302


In [8]:
df.to_csv(os.path.join(DATA_FOLDER, DATE + ".csv"), index=False)

## Create Label Dataframe

In [9]:
def labelsFromFiles(folder):
    with open(os.path.join(folder, os.path.basename(folder) + ".labels"), "w") as labels:
        labels.write("file, label\n")
        files = [file for file in os.listdir(folder) if os.path.isfile(os.path.join(folder, file)) and file.endswith(".txt")]
        # for file in files:
        #     labels.write(file + ",\n")
        for i in range(1, len(files)+1):
            labels.write("DATA" + str(i) + ".txt,\n")

In [10]:
# labelsFromFiles(DATA_FOLDER)