# Visualize Logged Data

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import os

# Settings
Edit these settings
- data_directory
- adjustment_timezone

In [None]:
# data_directory = './data/run_down_0607'
data_directory = './data/run_down_0610' 
adjustment_timezone = 'America/Los_Angeles'

In [None]:
# Read data, combine the GPS date and time columns into a single datetime column.
dfs = []
for f in os.listdir(data_directory):
    if f.startswith('dataLog') and f.endswith('.TXT'):
        file_path = os.path.join(data_directory, f)
        df = pd.read_csv(
            file_path,
            parse_dates={'datetimeUTC': ['gps_Date', 'gps_Time']},
        )
        dfs.append(df)
data = pd.concat(dfs, ignore_index=True)
data.info()

## Prepare the Data
Drop rows where we do not have GPS data.
This is done because we rely on the GPS date-time.

Create an offset datetime column so the data is more understandable.

In [None]:
# Drop rows without GPS data
dropIndex = data[ (data["gps_Lat"] == 0) ].index
data.drop(dropIndex , inplace=True)
# Create an offset datetime column
data['datetimeUTC'] = data['datetimeUTC'].dt.tz_localize('UTC')
data['datetime_adjusted'] = data['datetimeUTC'].dt.tz_convert(adjustment_timezone)

data.describe()

# Data Summary
- start time
- end time
- elapsed time
- number of records
- expected record count

In [None]:
# Print out the summary data listed above
print(f'Start Time: {data["datetime_adjusted"].min()}')
print(f'  End Time: {data["datetime_adjusted"].max()}')
elapsed_time = data["datetime_adjusted"].max()-data["datetime_adjusted"].min()
elapsed_time_seconds = elapsed_time.total_seconds()
print(f'   Elapsed: {elapsed_time} ({elapsed_time_seconds} s)')
print(f'    Average Hz: {data["output_Hz"].mean():.4f}')
print(f'  Record Count: {data.shape[0]}')
print(f'Expected Count: {elapsed_time_seconds * data["output_Hz"].mean():.1f}')

## Time Series Plots
Plot the data fields over time. Skip the Lat, Long and Heading fields.

In [None]:
fields = ["humidity_%", "humidity_%.1", "hPa", "pressure_Pa", "degC", "temp_degC", "altitude_m", "gps_Alt", "gps_SIV", "gps_GroundSpeed", "tvoc_ppb", "co2_ppm"]

def plot_it(field):
    fig, ax = plt.subplots(figsize=(20, 6))
    ax.grid(True)
    ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%B %d'))
    ax.xaxis.set_minor_locator(mdates.HourLocator(interval=2))
    ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
    ax.plot(data['datetime_adjusted'], data[field])
    plt.title(field)
    fig.autofmt_xdate()
    

for f in fields:
    # print(f)
    plot_it(f)