In [38]:
from src.subpipe.validate import ValidateBusData
import logging, os, sys, json
import pandas as pd

from src.utils.utils import (
    DATA_MONTH_DAY,
    SUBSCRIBER_DATA_PATH_JSON,
    SUBSCRIBER_FOLDER,
    curr_time_micro,
    sub_logger,
)

logging.basicConfig(
        format="",
        filename=f"logs/notebook-{DATA_MONTH_DAY}.log",
        encoding="utf-8",
        filemode="a",
        level=logging.INFO,
)

In [39]:
df = pd.read_json(os.path.join(SUBSCRIBER_FOLDER, "04-12.json"))
df = df.sort_values(["VEHICLE_ID", "ACT_TIME"], ascending=True)

In [40]:
latitude_lowest_min = df['GPS_LONGITUDE'].min(axis=0) 
latitude_highest_max = df['GPS_LONGITUDE'].max(axis=0) 
latitude_low_bool = latitude_lowest_min > -124 # -124 or more
latitude_high_bool = latitude_highest_max <= -122  # -122 or less
try:
    result = (latitude_low_bool) & (latitude_high_bool)
    assert result.all() == True
except:
    sub_logger(
        f"{curr_time_micro()} LONGITUDE BAD!!!!! Longitude had the following min and max values: "
        + f"{latitude_lowest_min}, {latitude_highest_max}."
    )
else:
    sub_logger(
        f"{curr_time_micro()} LONGITUDE GOOD! Longitude sits within -122 and -124! Min and max vals are: "
        + f"{latitude_lowest_min}, {latitude_highest_max}."
    )

[05-07-2024-17:57:10.472] LONGITUDE GOOD! Longitude sits within -122 and -124! Min and max vals are: -123.115868, -122.372643.


In [41]:
latitude_lowest_min = df['GPS_LATITUDE'].min(axis=0) 
latitude_highest_max = df['GPS_LATITUDE'].max(axis=0) 
latitude_low_bool = latitude_lowest_min >= 45 # -124 or more
latitude_high_bool = latitude_highest_max < 46  # -122 or less
try:
    result = (latitude_low_bool) & (latitude_high_bool)
    assert result.all() == True
except:
    sub_logger(
        f"{curr_time_micro()} LATITUDE BAD!!!!! Latitude had the following min and max values: "
        + f"{latitude_lowest_min}, {latitude_highest_max}."
    )
else:
    sub_logger(
        f"{curr_time_micro()} LATITUDE GOOD! Latitude sits within 45 and 46! Min and max vals are: "
        + f"{latitude_lowest_min}, {latitude_highest_max}."
    )

[05-07-2024-17:57:10.485] LATITUDE GOOD! Latitude sits within 45 and 46! Min and max vals are: 45.318728, 45.639137.


In [42]:
gathered_HDOPs = df[(df['GPS_HDOP'] >= 4) & (df['GPS_HDOP'] < 23.1)]
these_HDOPs_nan = ((gathered_HDOPs['GPS_LONGITUDE'].isna()) 
                   & (gathered_HDOPs['GPS_LATITUDE'].isna()))
try:
    assert these_HDOPs_nan.all() == True
except:
    sub_logger(
        f"{curr_time_micro()} HDOP BAD!!!!! There were some HDOPs with non-nan values on lat and long: "
        + f"\n{df[(gathered_HDOPs.notna())]}"
    )
else:
    sub_logger(
        f"{curr_time_micro()} HDOP GOOD! All HDOP values 4 upto (not including) 23.1 are NaN on lat and long: "
        + f"."
    )

[05-07-2024-17:57:10.494] HDOP GOOD! All HDOP values 4 upto (not including) 23.1 are NaN on lat and long: .


In [43]:
sat_min = df['GPS_SATELLITES'].min()
try:
    assert sat_min == 0
except:
    sub_logger(
        f"{curr_time_micro()} GPS Min Sats BAD!!!!! The minimum number of satellites were: "
        + f"\n{sat_min}"
    )
else:
    sub_logger(
        f"{curr_time_micro()} GPS Min Sats GOOD! Minimum number of satellites was {sat_min}!"
    )

[05-07-2024-17:57:10.501] GPS Min Sats GOOD! Minimum number of satellites was 0!


In [44]:
sat_max = df['GPS_SATELLITES'].max()
try:
    assert sat_max == 12
except:
    sub_logger(
        f"{curr_time_micro()} GPS Max Sats BAD!!!!! The minimum number of satellites were: "
        + f"\n{sat_max}"
    )
else:
    sub_logger(
        f"{curr_time_micro()} GPS Max Sats GOOD! Minimum number of satellites was {sat_max}!"
    )

[05-07-2024-17:57:10.508] GPS Max Sats GOOD! Minimum number of satellites was 12!


In [54]:
nine_or_more_sats = df[df['GPS_SATELLITES'] == 0]
not_all_nan_lat = nine_or_more_sats[nine_or_more_sats['GPS_LATITUDE'].notna()]
not_all_nan_long = nine_or_more_sats[nine_or_more_sats['GPS_LONGITUDE'].notna()]
try:
    assert not_all_nan_lat['GPS_LATITUDE'].notna().any() == True
    assert not_all_nan_long['GPS_LONGITUDE'].notna().any() == True  
except:
    sub_logger(
        f"{curr_time_micro()} ZERO SATELLITES ASSERT BAD!!!! It seems that all "
        + f"0 GPS satellite vehicles are missing lat and long"
    )
else:
    sub_logger(
        f"{curr_time_micro()} ZERO SATELLITES ASSERT GOOD! It seems that SOME "
        + f"0 GPS satellite vehicles HAVE a lat and long"
    )

[05-07-2024-18:03:37.733] ZERO SATELLITES ASSERT GOOD! It seems that SOME 0 GPS satellite vehicles HAVE a lat and long


In [56]:
nine_or_more_sats = df[df['GPS_SATELLITES'] == 12]
all_yes_lat = nine_or_more_sats[nine_or_more_sats['GPS_LATITUDE'].notna()]
all_yes_long = nine_or_more_sats[nine_or_more_sats['GPS_LONGITUDE'].notna()]
try:
    assert all_yes_lat['GPS_LATITUDE'].notna().all() == True
    assert all_yes_long['GPS_LONGITUDE'].notna().all() == True  
except:
    sub_logger(
        f"{curr_time_micro()} TWELVE SATELLITES ASSERT BAD!!!! It seems that some "
        + f"12 GPS satellite vehicles are missing lat and long"
    )
else:
    sub_logger(
        f"{curr_time_micro()} TWELVE SATELLITES ASSERT GOOD! It seems that ALL "
        + f"12 GPS satellite vehicles HAVE a lat and long"
    )

[05-07-2024-18:08:47.025] TWELVE SATELLITES ASSERT GOOD! It seems that ALL 12 GPS satellite vehicles HAVE a lat and long


In [64]:
print(df['ACT_TIME'].isna().all())

False


In [68]:
meters_bool = df['ACT_TIME'].isna().all() == False
try:
    assert meters_bool == True
except:
    sub_logger(
        f"{curr_time_micro()} ACTIVITY RECORD ASSERT BAD!!!! It seems that some "
        + f"records are missing an event activity time."
    )
else:
    sub_logger(
        f"{curr_time_micro()} ACTIVITY RECORD ASSERT GOOD! It seems that ALL "
        + f"records HAVE an event activity time."
    )

[05-07-2024-19:07:04.853] ACTIVITY RECORD ASSERT GOOD! It seems that ALL records HAVE an event activity time.


In [67]:
meters_bool = df['METERS'].isna().all() == False
try:
    assert meters_bool == True
except:
    sub_logger(
        f"{curr_time_micro()} METERS RECORD ASSERT BAD!!!! It seems that some "
        + f"records are missing an a meters metric."
    )
else:
    sub_logger(
        f"{curr_time_micro()} METERS RECORD ASSERT GOOD! It seems that ALL "
        + f"records HAVE an a meters metric."
    )

[05-07-2024-19:07:01.659] METERS RECORD ASSERT GOOD! It seems that ALL records HAVE an a meters metric.
