In [None]:
import pandas as pd
import os

# Specify the root directory containing the data files
data_root = os.path.abspath('dataset/Data')

# Initialize an empty list to store DataFrames
data_frames = []
# Iterate through numeric directories
lst = os.listdir(data_root)
lst.sort()
for numeric_dir in lst:
    numeric_dir_path = os.path.join(data_root, numeric_dir)
    if not os.path.isdir(numeric_dir_path):
        continue

    # Iterate through "trajectory" directories
    for trajectory_dir in os.listdir(numeric_dir_path):
        trajectory_dir_path = os.path.join(numeric_dir_path, trajectory_dir)

        if not os.path.isdir(trajectory_dir_path):
            continue

        # Iterate through .plt files in the "trajectory" directory
        for file in os.listdir(trajectory_dir_path):
            if not file.endswith('.plt'):
                continue

            # Build the full file path
            file_path = os.path.join(trajectory_dir_path, file)

            # Check file length
            with open(file_path) as f:
                line_count = sum(1 for _ in f)
                if line_count > 2506:
                    print(f"Skipping {file_path} - File is too long")
                    continue

            # Activity name is derived from the file name
            activity = file[:-4]
            user = numeric_dir

            with open(file_path) as f:
                # Skip the first 6 lines
                for _ in range(6):
                    next(f)
                # Read the rest of the file line by line and create a list of dictionaries
                data = []
                for line in f:
                    lat, lon, _, altitude, date_days, date_string, time_string = line.split(',')
                    data.append({'user': user, 'activity': activity, 'lat': lat, 'lon': lon, 'altitude': altitude, 'date_days': date_days, 'date_string': date_string, 'time_string': time_string})
                
                # Append the list of dictionaries to the data_frames list
                data_frames.extend(data)

# Create a DataFrame from the list of dictionaries
final_df = pd.DataFrame(data_frames)

# The resulting DataFrame 'final_df' contains the data from all the files in the desired format
final_df


In [None]:
final_df.to_csv('trackpoints.csv', index=False)

In [None]:
#see if any users has any activities with more than 2506 rows
final_df.groupby(['user', 'activity']).size().reset_index(name='counts').query('counts > 2506')

In [None]:
#count unique users
final_df['user'].nunique()

#match used id with number 000-182 and print those who are wiithout match
match =  []
for i in range(183):
    if str(i).zfill(3) not in final_df['user'].unique():
        match.append(i)
        print(i)
        

In [None]:
#print all user ids 
final_df['user'].unique()

In [None]:
path = os.path.abspath('datasat/labeled_ids.txt')

with open(path) as f:
    #make list of all number in file and remove first zero if it is there
    list = [int(x) for x in f.read().split()]
    list = [x for x in list if x != 0]
    
print(list)

In [None]:
#print matches between list and match
for i in match:
    if str(i).zfill(3) in list:
        print(i)