# Data Formatting

In [6]:
import pandas as pd
import numpy as np
import json
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

In [7]:
# Read the JSON file with 'lines=True'
with open('./data/bbloData.json', 'r') as f:
    data = [json.loads(line) for line in f]

In [8]:
# Create a list of dictionaries with the desired structure
records = []
for item in data:
    curr_item = item['Item']
    
    # Helper function to extract values, defaulting to None if not present
    def get_value(field):
        if field in curr_item and 'S' in curr_item[field]:
            if field == 'Date' or field == 'Time':
                return curr_item[field]['S']  # Return as string for Date and Time
            elif field == 'Total':
                # Remove commas and convert to int
                return int(curr_item[field]['S'].replace(',', ''))
            else:
                return int(curr_item[field]['S'])  # Convert to int for numeric fields
        return None  # Default value for missing or NULL fields

    # Append processed record
    records.append({
        'Date': get_value('Date'),
        'Time': get_value('Time'),
        'Total': get_value('Total'),
        'lvl1nswe': get_value('lvl1nswe'),
        'lvl2e': get_value('lvl2e'),
        'lvl3e': get_value('lvl3e'),
        'lvl3nsw': get_value('lvl3nsw'),
        'lvl4e': get_value('lvl4e'),
        'lvl4nsw': get_value('lvl4nsw'),
    })

# Convert the list of dictionaries to a pandas DataFrame
df = pd.DataFrame(records)

# Display the DataFrame
print(df)

# Optionally save the DataFrame for later use
df.to_csv('./cleanedData.csv', index=False)  # Save to CSV


              Date   Time   Total  lvl1nswe  lvl2e  lvl3e  lvl3nsw  lvl4e  \
0       2024-06-30  00:00  1800.0     388.0  172.0  143.0    426.0  152.0   
1       2024-06-30  00:01  1800.0     388.0  172.0  143.0    426.0  152.0   
2       2024-06-30  00:02  1800.0     388.0  172.0  143.0    426.0  152.0   
3       2024-06-30  00:03  1800.0     388.0  172.0  143.0    426.0  152.0   
4       2024-06-30  00:04  1800.0     388.0  172.0  143.0    426.0  152.0   
...            ...    ...     ...       ...    ...    ...      ...    ...   
660171  2024-09-01  23:55  1800.0     373.0  170.0  143.0    422.0  150.0   
660172  2024-09-01  23:56  1800.0     373.0  170.0  143.0    422.0  150.0   
660173  2024-09-01  23:57  1800.0     373.0  170.0  143.0    422.0  150.0   
660174  2024-09-01  23:58  1800.0     373.0  170.0  143.0    422.0  150.0   
660175  2024-09-01  23:59  1800.0     373.0  170.0  143.0    422.0  150.0   

        lvl4nsw  
0         356.0  
1         356.0  
2         356.0  
3  