In [None]:
#convert NASA MAT files to CSV files 

In [None]:
#imports
import os
import scipy.io
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.cluster import KMeans

In [None]:
# Title: Visualize Altitude by Cluster and Phase

# Description:
# Creates scatter plots showing altitude over record index, 
# colored by cluster assignments and separated by flight phase, 
# to help visualize altitude patterns across different flight states.


In [None]:
raw_data_folder = 'raw_data'
output_folder = 'output'
os.makedirs(output_folder, exist_ok=True)

total_files = 0  # count csv files 

# Loop through folders in 'raw_data'
for folder in tqdm(os.listdir(raw_data_folder), desc='Folders'):
    folder_path = os.path.join(raw_data_folder, folder)
    if os.path.isdir(folder_path):
        # Loop through .mat file in subfolder
        for file in tqdm(os.listdir(folder_path), desc=f'Files in {folder}', leave=False):
            if file.endswith('.mat'):
                input_file_path = os.path.join(folder_path, file)
                try:
                    mat = scipy.io.loadmat(input_file_path)
                except FileNotFoundError:
                    print(f" Skipping missing file: {file}")
                    continue
                except Exception as e:
                    print(f" Error loading {file}: {e}")
                    continue

                unpacked = {}
                for k, v in mat.items():
                    if not k.startswith('__') and isinstance(v, np.ndarray):
                        try:
                            val = v.item()
                            if isinstance(val, (int, float, np.number)):
                                unpacked[k] = np.array([val])
                            elif isinstance(val, np.ndarray):
                                unpacked[k] = val.flatten()
                            elif isinstance(val, tuple):
                                first = val[0]
                                if isinstance(first, np.ndarray):
                                    unpacked[k] = first.flatten()
                                elif isinstance(first, (int, float, np.number)):
                                    unpacked[k] = np.array([first])
                                else:
                                    print(f" Skipping {k}: tuple contains {type(first)}")
                            else:
                                print(f" Skipping {k}: unexpected type {type(val)}")
                        except Exception as e:
                            print(f" Could not process {k}: {e}")

                if unpacked:
                    max_len = max(len(a) for a in unpacked.values())
                    for key in unpacked:
                        arr = unpacked[key].astype(float)
                        if len(arr) < max_len:
                            arr = np.pad(arr, (0, max_len - len(arr)), constant_values=np.nan)
                        unpacked[key] = arr

                    df_file = pd.DataFrame(unpacked)
                    df_file['filename'] = file

                    output_file_path = os.path.join(output_folder, f"{folder}_{file.replace('.mat', '.csv')}")
                    df_file.to_csv(output_file_path, index=False)
                    total_files += 1
                    print(f" Saved {output_file_path}")

print(f" Finished processing. Total files saved: {total_files}")
