In [26]:
import pandas as pd

# Load the DataFrame from the pickle file
df = pd.read_pickle('your_dataframe.pkl')

In [27]:
df_span1 = df[df['meta'].apply(lambda x: isinstance(
    x, dict) and 'devices' in x and x['devices'] == 10)].copy()
df_span2 = df[df['meta'].apply(lambda x: isinstance(
    x, dict) and 'devices' in x and x['devices'] == 9)].copy()

In [32]:
def flatten_dict(d, parent_key='', sep='_'):
    """
    Recursively flattens a nested dictionary.
    """
    items = []
    for k, v in d.items():
        new_key = f'{parent_key}{sep}{k}' if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


def flatten_row(row):
    # Check if 'measurements' is a dictionary
    measurements = row['measurements']
    if not isinstance(measurements, dict):
        return pd.Series()  # Return an empty Series for rows with float 'measurements'

    flattened_measurements = {}
    for sensor, values in measurements.items():
        for pv_key, pv_value in values.items():
            flattened_measurements[f'{sensor}.{pv_key}'] = pv_value

    # Flatten 'meta' with nested dictionaries
    meta = row['meta']
    flattened_meta = flatten_dict(meta, parent_key='meta')

    # Flatten 'time' with nested dictionaries
    time = row['time']
    flattened_time = flatten_dict(time, parent_key='time')

    # Combine all flattened data
    flattened_data = {**flattened_measurements,
                      **flattened_meta, **flattened_time}
    return pd.Series(flattened_data)


# Apply the updated flattening function to df_span1
df_span1_flattened = df_span1.apply(flatten_row, axis=1)

# Apply the updated flattening function to df_span2
df_span2_flattened = df_span2.apply(flatten_row, axis=1)

In [34]:
df_span1_flattened.columns

Index(['13.pv0', '13.pv1', '13.pv2', '13.pv3', '14.pv0', '14.pv1', '14.pv2',
       '14.pv3', '15.pv0', '15.pv1', '15.pv2', '15.pv3', '16.pv0', '16.pv1',
       '16.pv2', '16.pv3', '19.pv0', '19.pv1', '19.pv2', '19.pv3', '21.pv0',
       '21.pv1', '21.pv2', '21.pv3', '28.pv0', '28.pv1', '28.pv2', '28.pv3',
       '7.pv0', '7.pv1', '7.pv2', '7.pv3', '8.pv0', '8.pv1', '8.pv2', '8.pv3',
       '9.pv0', '9.pv1', '9.pv2', '9.pv3', 'meta_bootCount', 'meta_devices',
       'meta_errors', 'meta_location', 'meta_modem_localIP',
       'meta_modem_modemInfo', 'meta_modem_operator',
       'meta_modem_signalQuality', 'meta_modem_simCCID',
       'meta_power_battery_V', 'meta_power_solar_V', 'meta_uuid',
       'meta_version', 'time_server_UTC', 'time_server_epoch'],
      dtype='object')

In [35]:
df_span2_flattened.columns

Index(['10.pv0', '10.pv1', '10.pv2', '10.pv3', '11.pv0', '11.pv1', '11.pv2',
       '11.pv3', '12.pv0', '12.pv1', '12.pv2', '12.pv3', '13.pv0', '13.pv1',
       '13.pv2', '13.pv3', '14.pv0', '14.pv1', '14.pv2', '14.pv3', '15.pv0',
       '15.pv1', '15.pv2', '15.pv3', '16.pv0', '16.pv1', '16.pv2', '16.pv3',
       '17.pv0', '17.pv1', '17.pv2', '17.pv3', '18.pv0', '18.pv1', '18.pv2',
       '18.pv3', '19.pv0', '19.pv1', '19.pv2', '19.pv3', '20.pv0', '20.pv1',
       '20.pv2', '20.pv3', '21.pv0', '21.pv1', '21.pv2', '21.pv3', '4.pv0',
       '4.pv1', '4.pv2', '4.pv3', '5.pv0', '5.pv1', '5.pv2', '5.pv3', '6.pv0',
       '6.pv1', '6.pv2', '6.pv3', '7.pv0', '7.pv1', '7.pv2', '7.pv3', '8.pv0',
       '8.pv1', '8.pv2', '8.pv3', '9.pv0', '9.pv1', '9.pv2', '9.pv3',
       'meta_bootCount', 'meta_devices', 'meta_errors',
       'meta_modem_signalQuality', 'meta_power_battery_V',
       'meta_power_solar_V', 'time_server_UTC', 'time_server_epoch'],
      dtype='object')