In [16]:
from fastai.tabular.all import *
import glob
import os
import ast

In [17]:


def get_csv_files(folder_path : Path):
    """
    Recursively searches a folder and its subfolders to find all file paths
    with the .csv extension using the glob module.

    Args:
        folder_path (str): The path to the folder to search.

    Returns:
        list: A list of strings, where each string is the full path to a CSV file
              found within the folder and its subfolders. Returns an empty list
              if no CSV files are found or if the folder path is invalid.
    """
    csv_file_paths = []
    if not os.path.isdir(folder_path):
        print(f"Error: '{folder_path}' is not a valid directory.")
        return csv_file_paths  # Return empty list for invalid path

    # Construct the glob pattern for recursive CSV files
    pattern = os.path.join(folder_path, "**", "*.csv") # "**" for recursive, "*.csv" for csv files

    # Use glob.glob with recursive=True to find all matching files
    for file_path in glob.glob(pattern, recursive=True):
        file_path = Path(file_path)
        csv_file_paths.append(file_path)

    return csv_file_paths


def get_loc(filepath : Path):
    return re.search(r"(.*)_(.*)_csidata", file.name).group(2)

def get_label(filepath : Path):
    return re.search(r"(.*)_(.*)_csidata", file.name).group(1)

def get_csi_data(filepath : Path):
    df = pd.read_csv(filepath).data
    df = df.apply(lambda x : np.array(ast.literal_eval(x)))
    arr = np.stack(df.values, axis = 0)
    return arr
    

In [7]:
filepath_csi_raw = Path("CSI_raw")
csv_files = get_csv_files(filepath_csi_raw)

csi_df = pd.DataFrame(columns = ["timestamp", "location", "activity"] + [i for i in range(128)])

for file in csv_files:
    csi_arr = get_csi_data(file)
    timestamp = pd.read_csv(file).datetime
    location = get_loc(file)
    activity = get_label(file)
    
    file_df = pd.DataFrame(csi_arr)
    file_df["timestamp"] = timestamp
    file_df["location"] = location
    file_df["activity"] = activity
    
    csi_df = pd.concat((csi_df, file_df), ignore_index  = True)
    
    
    
csi_df



Unnamed: 0,timestamp,location,activity,0,1,2,3,4,5,6,...,118,119,120,121,122,123,124,125,126,127
0,2025-03-01 12:15:41.120686,ly8room,standing,82,-96,4,0,18,-13,13,...,34,-22,35,-21,36,-18,35,-17,34,-15
1,2025-03-01 12:15:41.136687,ly8room,standing,83,-80,4,0,20,-1,17,...,36,-4,37,-3,36,-1,35,0,33,1
2,2025-03-01 12:15:41.153694,ly8room,standing,83,-80,4,0,17,13,16,...,30,21,30,23,29,24,27,24,25,24
3,2025-03-01 12:15:41.177624,ly8room,standing,83,-80,4,0,-13,-21,-14,...,-24,-31,-23,-33,-22,-33,-20,-33,-18,-33
4,2025-03-01 12:15:41.206498,ly8room,standing,83,-80,4,0,11,-4,8,...,18,-8,19,-7,19,-6,18,-5,18,-5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10549,2025-03-03 17:53:46.575742,MAKERSPACE,walking,83,-80,4,0,21,22,18,...,34,21,37,24,39,26,38,29,38,30
10550,2025-03-03 17:53:46.592644,MAKERSPACE,walking,83,-80,4,0,-6,34,-3,...,2,41,2,46,0,49,-1,51,-3,53
10551,2025-03-03 17:53:46.628752,MAKERSPACE,walking,83,-80,4,0,-4,-25,-4,...,-7,-27,-8,-31,-8,-33,-7,-35,-7,-36
10552,2025-03-03 17:53:46.655198,MAKERSPACE,walking,83,-80,4,0,16,-31,11,...,14,-33,16,-38,18,-41,19,-43,20,-45


In [24]:

# Parsing HAR Data

filepath_csi_raw = Path("data_collection/makerspace_csi_har_dataset/valid")
csv_files = get_csv_files(filepath_csi_raw)

csi_df = pd.DataFrame(columns = ["timestamp", "session_id"] + [i for i in range(128)])


session_id = 4
for file in csv_files:
    try:
        
        csi_arr = get_csi_data(file)
        timestamp = pd.read_csv(file).datetime
        file_df = pd.DataFrame(csi_arr)
        file_df["timestamp"] = timestamp
        file_df["session_id"] = session_id
        
        csi_df = pd.concat((csi_df, file_df), ignore_index  = True)
        session_id += 1
    except:
        print(f"Failed parsing {file}")
    
    
    
csi_df



Failed parsing data_collection\makerspace_csi_har_dataset\valid\Abel-Validation-motion_times.csv
Failed parsing data_collection\makerspace_csi_har_dataset\valid\Ivan-Validation-motion_times.csv
Failed parsing data_collection\makerspace_csi_har_dataset\valid\Matt-Validation-motion_times.csv


Unnamed: 0,timestamp,session_id,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,2025-03-20 12:13:53.035945,4,83,-80,4,0,5,-28,5,-26,...,5,-18,7,-21,8,-24,9,-27,8,-29
1,2025-03-20 12:13:53.063988,4,83,-80,4,0,26,0,24,0,...,17,2,21,3,24,3,26,3,28,2
2,2025-03-20 12:13:53.097526,4,83,-80,4,0,-21,-5,-19,-5,...,-14,-6,-16,-7,-19,-8,-21,-8,-22,-8
3,2025-03-20 12:13:53.131522,4,83,-80,4,0,30,6,27,6,...,21,6,24,9,28,10,31,9,33,9
4,2025-03-20 12:13:53.163518,4,83,-80,4,0,-14,-25,-12,-23,...,-6,-19,-8,-22,-9,-25,-11,-27,-13,-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27572,2025-03-20 12:28:22.089415,7,83,-80,4,0,-32,-2,-27,-3,...,-23,-4,-28,-3,-32,-2,-35,-1,-38,0
27573,2025-03-20 12:28:22.119408,7,83,-80,4,0,19,23,16,21,...,10,17,14,21,17,23,20,25,22,26
27574,2025-03-20 12:28:22.159415,7,83,-80,4,0,14,-5,13,-4,...,8,-1,11,-2,12,-3,14,-4,15,-5
27575,2025-03-20 12:28:22.182408,7,83,-80,4,0,30,0,27,1,...,17,2,22,2,26,1,30,0,33,-1


In [19]:
get_csv_files(Path("data_collection/makerspace_csi_har_dataset/train"))

[Path('data_collection/makerspace_csi_har_dataset/train/0.csv'),
 Path('data_collection/makerspace_csi_har_dataset/train/1.csv'),
 Path('data_collection/makerspace_csi_har_dataset/train/2.csv'),
 Path('data_collection/makerspace_csi_har_dataset/train/3.csv'),
 Path('data_collection/makerspace_csi_har_dataset/train/Abel-Training-motion_times.csv'),
 Path('data_collection/makerspace_csi_har_dataset/train/Ivan-Training-motion_times.csv'),
 Path('data_collection/makerspace_csi_har_dataset/train/Matt-Training-motion_times.csv')]

In [23]:
csi_df.to_csv("makerspace_data/train.csv")