### Import custom functions

In [None]:
from utils import *
from modules import *
from missing_sync import build_schema
%matplotlib widget

### 1. Get directories for faulty datasets

datasets without GPS (13 in total):
OE101001
OE101002
OE102002
OE102003
OE104003
OE104004
OE101012
OE106005
OE106011
OE106010
OE106016
OE106021
OE106032

In [None]:
# Import necessary modules
import os
import utils.for_setpath as path

# Define the list of participant codes without GPS
participant_codes = [
    'OE101001',
    'OE101002',
    'OE102002',
    'OE102003',
    'OE104003',
    'OE104004',
    'OE101012',
    'OE106005',
    'OE106011',
    'OE106010',
    'OE106016',
    'OE106021',
    'OE106032'
]

# Define the sessions mapping (session name to session number)
sessions = [
    ('Baixa', 4),
    ('Belem', 1),
    ('Parque', 6),
    ('Gulbenkian', 3),
    ('Lapa', 2),
    ('Graça', 5)
]
session_name_to_num = {name: num for name, num in sessions}

# Define the base directories
sourcedata = path.sourcedata

# Initialize a list to store participant information
participants_info = [] 

# Loop over each participant code
for participant_code in participant_codes:
    try:
        # Parse the participant code to get acquisition information
        participant_info = parse_participant_code(participant_code)
        city = participant_info['city']
        subject_id = participant_info['subject_id']
        session_name = participant_info['session']
        stimulus = participant_info['stimulus']
        
        # Get the session number using the session name
        ses_num = session_name_to_num.get(session_name)
        if ses_num is None:
            print(f"Session name '{session_name}' not recognized for participant code '{participant_code}'.")
            continue  # Skip to the next participant code if session name is invalid
        
        # Format the subject_id and session number with leading zeros
        subject_id_str = f"{subject_id:03d}"  # Format subject_id as a 3-digit number
        
        # Search for the session folder by name
        subject_dir = os.path.join(sourcedata, 'data', f"OE{subject_id_str}")
        if not os.path.exists(subject_dir):
            print(f"Subject directory not found for {participant_code}. Skipping...")
            continue
        
        session_folder = None
        for folder_name in os.listdir(subject_dir):
            if session_name.lower() in folder_name.lower():
                session_folder = os.path.join(subject_dir, folder_name)
                break
        
        if session_folder is None:
            print(f"Session folder containing '{session_name}' not found for participant {participant_code}.")
            continue
        
        # Print the participant code and the corresponding session folder path
        print(f"Participant Code: {participant_code}")
        print(f"Subject ID: {subject_id}")
        print(f"Session Name: {session_name}")
        print(f"Session Folder: {session_folder}")
        print("----------------------------")
        
        # Store participant information in a dictionary
        participant_data = {
            'participant_code': participant_code,
            'city': city,
            'subject_id': subject_id,
            'session_name': session_name,
            'session_number': ses_num,
            'session_folder': session_folder
        }
        participants_info.append(participant_data)

        
    except ValueError as ve:
        print(f"Error parsing participant code '{participant_code}': {ve}")
    except Exception as e:
        print(f"An unexpected error occurred for participant code '{participant_code}': {e}")



### Create and export geodata

In [None]:
# Import necessary modules
import os
import utils.for_setpath as path
import pandas as pd
from openpyxl import Workbook
from pythermalcomfort.models import utci
import numpy as np

# ---------------------------------------------------------------------
# Initialize the Excel workbook and sheet
wb = Workbook()
ws = wb.active
ws.append(["Participant Name", "Session Name", "Status", "Radiant Temperature"])

# Path information
sourcedata = os.path.join(path.sourcedata, 'data')
logdata    = os.path.join(path.sourcedata, 'supp','log')

# ---------------------------------------------------------------------
# FUNCTIONS
def process_session(session_path, participant_name, session_name):
    try:
        # Load the dataset
        data_path  = session_path
        datapicker = create_datapicker(path=data_path, schema=build_schema, calibrate_ubx_to_harp=False)
        dataset    = load_dataset(datapicker.selected_path, ubx=True, unity=False, calibrate_ubx_to_harp=False, schema=build_schema)
        status = 1  # Success

        # Create geodata
        geodata = dataset.to_geoframe()

        # Process geodata
        geodata['time'] = geodata.index.to_pydatetime()
        geodata         = tidy_geodata(geodata)
        geodata         = add_environmental_metrics(geodata)

        # Check if the radiant temperature column exists and contains non-zero values
        radiant_temp_status = 0  # Default to 0
        if 'tk_thermocouple_temperature_value' in geodata.columns:
            if geodata['tk_thermocouple_temperature_value'].sum() != 0:
                radiant_temp_status = 1  # Non-zero values exist

        # Save the full geodata to an Excel file in the log directory
        log_folder = os.path.join(logdata, f"sub-{participant_name}", f"ses-{session_name}")
        os.makedirs(log_folder, exist_ok=True)
        geodata_file = os.path.join(log_folder, f"sub-{participant_name}_ses-{session_name}_geodata.xlsx")

        # Ensure geodata is a DataFrame and save as Excel
        if not isinstance(geodata, pd.DataFrame):
            geodata = pd.DataFrame(geodata)
        geodata.to_excel(geodata_file, index=True)

    except Exception as e:
        print(f"An unexpected error occurred for participant '{participant_name}', session '{session_name}': {e}")
        print("Most likely needs to have an updated EEG .nedf file")
        status = 0  # Failed
        radiant_temp_status = 0  # Default to 0

    # Log the result in the Excel sheet
    ws.append([participant_name, session_name, status, radiant_temp_status])

# MAIN SCRIPT
for folder in participants_info:
    subj_folder = folder['session_folder']
    subj_name = f"OE{folder['subject_id']:03d}"
    session_name = folder['session_name']
    process_session(subj_folder, subj_name, session_name)

# Save the Excel file with the updated status in the log directory
result_file = os.path.join(logdata, "session_processing_results.xlsx")
wb.save(result_file)


# END

In [None]:
from modules import *
from pluma.schema.outdoor import build_schema
%matplotlib widget
datapicker = create_datapicker(path=r'',schema=build_schema, calibrate_ubx_to_harp=False)
display(datapicker)

### 1. Load dataset

In [None]:
datapicker = create_datapicker(path=r'Z:\Exp_4-outdoor_walk\lisbon\sourcedata\data\OE003', schema=build_schema)
display(datapicker)

### 2. Explore Dataset Maps

In [25]:
exploremap(dataset.to_geoframe(), column='tk_thermocouple_temperature_value', cmap='coolwarm')

Failed Stream Harp stream from device 		TK, stream GPS_Latitude(227): Input dataframe is empty.
Failed Stream Harp stream from device 		TK, stream GPS_Longitude(228): Input dataframe is empty.
Failed Stream Harp stream from device 		TK, stream GPS_Altitude(229): Input dataframe is empty.
Failed Stream Harp stream from device 		TK, stream GPS_HasFix(232): Input dataframe is empty.


### 3. Explore Dataset Streams

In [27]:
dir(dataset.streams.EEG.data)
# print(datapicker.dataset.streams.EEG.data)

# datapicker.dataset.streams.EEG.data["np_time"]



['__class__',
 '__contains__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_annotations',
 '_cals',
 '_check_bad_segment',
 '_comp',
 '_cropped_samp',
 '_dtype',
 '_dtype_',
 '_extra_attributes',
 '_filenames',
 '_first_samps',
 '_first_time',
 '_get_buffer_size',
 '_get_channel_positions',
 '_getitem',
 '_handle_tmin_tmax',
 '_init_kwargs',
 '_last_samps',
 '_last_time',
 '_orig_units',
 '_parse_get_set_params',
 '_pick_drop_channels',
 '_pick_projs',
 '_preload_data',
 '_projector',
 '_raw_extras',
 '_raw_lengths',
 '_read_comp_grade',
 '_read_picks',
 '_read_segment',
 '_read_

In [None]:
EEG = datapicker.dataset.streams.EEG.data

In [None]:
EEG.np_time

In [None]:
EEG.compute_psd().plot()

In [None]:
datapicker.dataset.streams.EEG.data



In [None]:
plot_traces({
    'accelX': datapicker.dataset.streams.Accelerometer.data["Accl_X"],
    'temp (C)': datapicker.dataset.streams.TK.AirQuality.Temperature.data/100,
    'angleX': datapicker.dataset.streams.Accelerometer.data["Orientation_X"],
    'altitude': datapicker.dataset.georeference.elevation,
    'iaq': datapicker.dataset.streams.TK.AirQuality.IAQIndex.data,
    'eeg': datapicker.dataset.streams.EEG.data.np_eeg,
})

## Export to IGOT

In [None]:
import os
import pandas as pd
from openpyxl import Workbook
from pythermalcomfort.models import utci
import numpy as np

# ---------------------------------------------------------------------
# Initialize the Excel workbook and sheet


# Path information
#logdata    = r'Z:\Exp_4-outdoor_walk\lisbon\sourcedata\supp\log'

# To store the mean values for each session
# all_means = []

# ---------------------------------------------------------------------
# FUNCTIONS

radiant_temp_status = 0  # Default to 0 if not found or if all zeros

# Generate the sessions.tsv file
geodata = datapicker.geodata  # Retrieve geodata






# Define custom parameters
humidity = geodata['tk_humidity_humidity_value'] / 100  # in fraction
wind_speed = np.sqrt(geodata['atmos_northwind_value']**2 + geodata['atmos_eastwind_value']**2)  # m/s (~2.5 m of elevation)
temp_atmos = geodata['atmos_airtemperature_value']  # in ºC
temp_tk = geodata['tk_airquality_temperature_value'] / 100  # in ºC
temp_tk_ptc = geodata['tk_ptc_airtemp_value'] / 100  # in ºC
temp_radiant = geodata['tk_thermocouple_temperature_value'] / 100  # in ºC

# Assign custom parameters to the geodata attribute
geodata['humidity'] = humidity
geodata['wind_speed'] = wind_speed
geodata['temp_atmos'] = temp_atmos
geodata['temp_tk'] = temp_tk
geodata['temp_tk_ptc'] = temp_tk_ptc
geodata['temp_radiant'] = temp_radiant

# Compute the UTCI
geodata['utci'] = utci(tdb=temp_atmos, tr=temp_radiant, v=wind_speed, rh=humidity)








# Get GPS coordinates and integrate them into geodata
coords = datapicker.geodata.geometry.get_coordinates(include_z=True)
coords.rename(columns={'y': 'latitude', 'x': 'longitude', 'z': 'elevation'}, inplace=True)
geodata = geodata.join(coords).drop(columns=['geometry'])

# Check if the radiant temperature column exists and contains non-zero values
if 'tk_thermocouple_temperature_value' in geodata:
    if geodata['tk_thermocouple_temperature_value'].sum() != 0:
        radiant_temp_status = 1  # Non-zero values exist

# Save the full geodata to an Excel file in the log directory
#log_folder = os.path.join(logdata, f"sub-{participant_name}", f"ses-{session_name}")
#os.makedirs(log_folder, exist_ok=True)
# geodata_file = os.path.join(log_folder, f"sub-{participant_name}_ses-{session_name}_geodata.xlsx")
# geodata.to_excel(geodata_file, index=False)

# Compute the mean of all numerical columns
# numeric_means = geodata.mean(numeric_only=True).to_dict()
# numeric_means['Participant'] = participant_name
# numeric_means['Session'] = session_name
# all_means.append(numeric_means)  # Append to the global all_means list

# Log the result


# def create_geodata(geodata):
#    """Compute UTCI across the whole time series."""
    


# ---------------------------------------------------------------------
# MAIN SCRIPT
# processed_participants = 0

# for participant_folder in os.listdir(sourcedata):
#     if participant_folder.startswith("OE"):
#         participant_path = os.path.join(sourcedata, participant_folder)
#         for session_folder in os.listdir(participant_path):
#             session_path = os.path.join(participant_path, session_folder)
#             if os.path.isdir(session_path):
#process_session( participant_folder, session_folder)

        # processed_participants += 1
        # if processed_participants >= 2:
        #     break  # Stop after processing 2 participants

# Save the Excel file with the updated status in the log directory
# result_file = os.path.join(logdata, "session_processing_results.xlsx")
# wb.save(result_file)

# Save the mean values to a final CSV file
# if all_means:
#     final_means_df = pd.DataFrame(all_means)
#     final_means_file = os.path.join(logdata, "final_means.csv")
#     final_means_df.to_csv(final_means_file, index=False)
#     print(f"Final CSV file with mean values saved to: {final_means_file}")
# else:
#     print("No valid data found to compute mean values.")

In [None]:
datapicker.geodata

In [None]:
strs = datapicker.selected_path.split('\\')
strs[len(strs)-1]

In [None]:
logdata = r'c:\EXPORT'
final_means_file = os.path.join(logdata, strs[len(strs)-1]+'.csv')
datapicker.geodata.to_csv(final_means_file, index=True)

### 4. Export Dataset to OGC API

In [None]:
record = DatasetRecord(datapicker.dataset, datapicker.geodata, properties=RecordProperties(
    title='<City> Outdoor Walk: <CityRegion> Subject <ID>',
    description='Outdoor walk data collection',
    license='CC BY-NC 4.0',
    tool='Bonsai',
    keywords=['<City>', 'Outdoor', 'Walk', 'Microclimate', 'Biosignals'],
    contacts=[
        Contact(
            name='Your Name',
            institution='Your Institution',
            email='youremail@yourdomain.com'
        )
    ],
    themes=[]
))

In [None]:
rpath = Path(record.id)
export_geoframe_to_geojson(datapicker.geodata, rpath.with_suffix('.geojson'))
with open(rpath.with_suffix('.json'), 'w') as f:
    f.write(record.to_json())

# Errors

## Numpy version

The output of the following was: `2.1.3`
```python
import numpy as np
print(np.__version__)
```

The error was:
```python
{
	"name": "AttributeError",
	"message": "`np.string_` was removed in the NumPy 2.0 release. Use `np.bytes_` instead.",
	"stack": "---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
File c:\\Users\\joaop\\git\\emotional-cities\
otebooks\\.conda\\Lib\\site-packages\\ipyfilechooser\\filechooser.py:317, in FileChooser._on_select_click(self, _b)
    315 if self._callback is not None:
    316     try:
--> 317         self._callback(self)
    318     except TypeError:
    319         # Support previous behaviour of not passing self
    320         self._callback()

File c:\\Users\\joaop\\git\\emotional-cities\
otebooks\\src\\ingestion\\modules.py:46, in create_datapicker.<locals>.dataset_changed(chooser)
     44 display(chooser)
     45 print(f\"Loading dataset: {Path(chooser.selected_path).name}...\" )
---> 46 dataset = load_dataset(chooser.selected_path, ubx=ubx, unity=unity, calibrate_ubx_to_harp=calibrate_ubx_to_harp, schema=schema)
     47 print(f\"Dataset: {dataset} loaded successfully, and {'not' if not dataset.has_calibration else 'sucessfully'} calibrated.\")
     48 chooser.dataset = dataset

File c:\\Users\\joaop\\git\\emotional-cities\
otebooks\\src\\ingestion\\helpers.py:22, in load_dataset(root, schema, reload, ubx, unity, calibrate_ubx_to_harp, export_path)
     15 def load_dataset(root, schema, reload=True, ubx=True, unity=False, calibrate_ubx_to_harp=True, export_path=None):
     16     # Path to the dataset. Can be local or remote.
     17     dataset = Dataset(
     18         root=root,
     19         datasetlabel=\"FMUL_\" + root.split(\"\\\\\")[-1],
     20         georeference= Georeference(),
     21         schema=schema)  # Create a Dataset object that will contain the ingested data.
---> 22     dataset.populate_streams(autoload=False)  # Add the \"schema\" that we want to load to our Dataset. If we want to load the whole dataset automatically, set autoload to True.
     24     if reload:
     25         # We will just load every single stream at the same time. This might take a while if loading from AWS
     26         # Some warnings will be printed if some sensors were not acquired during the experiment. These are normal and can be usually ignored.
     27         dataset.reload_streams(force_load=True)

File c:\\Users\\joaop\\git\\emotional-cities\
otebooks\\.conda\\Lib\\site-packages\\pluma\\schema\\__init__.py:223, in Dataset.populate_streams(self, root, autoload)
    221     root = ComplexPath(root)
    222 root = ensure_complexpath(root)
--> 223 self.streams = self.schema(root=root, parent_dataset=self, autoload=autoload)

File c:\\Users\\joaop\\git\\emotional-cities\
otebooks\\src\\ingestion\\missing_sync.py:43, in build_schema(root, parent_dataset, autoload)
     41 # Pupil streams
     42 streams.PupilLabs.DecodedFrames =       HarpStream(209, device=\"PupilLabs\", streamlabel=\"Pupil_RawFrames\", **kwargs)
---> 43 streams.PupilLabs.RawFrames =           PupilWorldCameraStream(210, device=\"PupilLabs\", streamlabel=\"Pupil_RawFrames\", **kwargs)
     44 streams.PupilLabs.Counter.IMU =         HarpStream(211, device=\"PupilLabs\", streamlabel=\"Counter_IMU\", **kwargs)
     45 streams.PupilLabs.PupilGaze =           PupilGazeStream(212, device=\"PupilLabs\", streamlabel=\"Pupil_Gaze\", **kwargs)

File c:\\Users\\joaop\\git\\emotional-cities\
otebooks\\.conda\\Lib\\site-packages\\pluma\\stream\\zeromq.py:73, in PupilWorldCameraStream.__init__(self, eventcode, **kw)
     63 def __init__(self, eventcode: int, **kw):
     64     super(PupilWorldCameraStream, self).__init__(
     65         eventcode,
     66         streamtype=StreamType.PUPIL,
     67         filenames=[
     68             \"PupilLabs/WorldCamera_Frame0.bin\",
     69             \"PupilLabs/WorldCamera_Frame1.bin\",
     70             \"PupilLabs/WorldCamera_Frame2.bin\",
     71         ],
     72         dtypes=[
---> 73             [(\"SensorId\", np.string_, 36)],
     74             [
     75                 (\"Format\", np.uint32),
     76                 (\"Width\", np.uint32),
     77                 (\"Height\", np.uint32),
     78                 (\"Sequence\", np.uint32),
     79                 (\"Timestamp\", np.uint64),
     80                 (\"DataBytes\", np.uint32),
     81                 (\"Reserved\", np.uint32),
     82             ],
     83             None,
     84         ],
     85         **kw,
     86     )

File c:\\Users\\joaop\\git\\emotional-cities\
otebooks\\.conda\\Lib\\site-packages\
umpy\\__init__.py:400, in __getattr__(attr)
    397     raise AttributeError(__former_attrs__[attr], name=None)
    399 if attr in __expired_attributes__:
--> 400     raise AttributeError(
    401         f\"`np.{attr}` was removed in the NumPy 2.0 release. \"
    402         f\"{__expired_attributes__[attr]}\",
    403         name=None
    404     )
    406 if attr == \"chararray\":
    407     warnings.warn(
    408         \"`np.chararray` is deprecated and will be removed from \"
    409         \"the main namespace in the future. Use an array with a string \"
    410         \"or bytes dtype instead.\", DeprecationWarning, stacklevel=2)

AttributeError: `np.string_` was removed in the NumPy 2.0 release. Use `np.bytes_` instead."
}
```
Solution:
```python
name: .conda
dependencies:
  - python=3.11
  - numpy=1.24.4
  - jupyter
  - matplotlib
  - geopandas
  - ipympl
  - pip
  - pip:
    - ipyfilechooser
    - opencv-python
    - git+https://github.com/emotional-cities/pluma-analysis.git@46cffe14b514d64a83483dbd07c93872ddc9577e
```
Confirmed version with `pip install "numpy<2.0"`