Importing libs

In [153]:
import gpxpy
import gpxpy.gpx
import pandas as pd
import pytz
from pytz import all_timezones

loading gpx file

In [154]:
gpx_file = "danielle GPX .GPX"
with open(gpx_file, "r") as file:
    gpx = gpxpy.parse(file)

In [155]:
gpx

GPX(tracks=[GPXTrack(segments=[GPXTrackSegment(points=[...])])])

In [156]:
gpx.tracks[0].segments[0].points[:10]

[GPXTrackPoint(34.021526, -118.288752, elevation=90.12919518072158),
 GPXTrackPoint(34.021566, -118.28889, elevation=91.4642141405493),
 GPXTrackPoint(34.02147, -118.288567, elevation=92.40057468786836),
 GPXTrackPoint(34.021553, -118.288315, elevation=91.63135995343328),
 GPXTrackPoint(34.021452, -118.288464, elevation=91.29162885062397),
 GPXTrackPoint(34.021578, -118.288401, elevation=91.90533726289868),
 GPXTrackPoint(34.021503, -118.288488, elevation=91.72100736759603),
 GPXTrackPoint(34.021594, -118.288463, elevation=91.78886562027037),
 GPXTrackPoint(34.02156, -118.288531, elevation=93.04319773428142),
 GPXTrackPoint(34.021578, -118.288662, elevation=93.90022918581963)]

In [157]:
import xml.etree.ElementTree as ET
import csv
from datetime import datetime


# Function to parse custom time format
def parse_custom_time(time_str):
    dt = datetime.strptime(time_str, "%m/%d/%Y, %I:%M:%S %p")
    return dt.strftime("%Y-%m-%d"), dt.strftime("%H:%M:%S")

Data Extraction from gpx

In [158]:
gpx_file_path = "danielle GPX .GPX"
tree = ET.parse(gpx_file_path)
root = tree.getroot()

# Define the namespaces
namespaces = {"default": "http://www.topografix.com/GPX/1/1"}

# Extract data from GPX
data = []
for trkpt in root.findall(".//default:trkpt", namespaces):
    lat = trkpt.get("lat")
    lon = trkpt.get("lon")
    ele = trkpt.find("default:ele", namespaces).text
    time = trkpt.find("default:time", namespaces).text
    date, time = parse_custom_time(time)  # Convert time to custom format
    data.append(
        {
            "latitude": lat,
            "longitude": lon,
            "altitude (m)": ele,
            "date": date,
            "time": time,
        }
    )

In [159]:
data[:2]

[{'latitude': '34.021526',
  'longitude': '-118.288752',
  'altitude (m)': '90.12919518072158',
  'date': '2024-04-16',
  'time': '15:55:47'},
 {'latitude': '34.021566',
  'longitude': '-118.288890',
  'altitude (m)': '91.4642141405493',
  'date': '2024-04-16',
  'time': '15:55:50'}]

In [160]:
gpx_data = pd.DataFrame(data)
gpx_data.head()

Unnamed: 0,latitude,longitude,altitude (m),date,time
0,34.021526,-118.288752,90.12919518072158,2024-04-16,15:55:47
1,34.021566,-118.28889,91.4642141405493,2024-04-16,15:55:50
2,34.02147,-118.288567,92.40057468786836,2024-04-16,15:55:54
3,34.021553,-118.288315,91.63135995343328,2024-04-16,15:56:01
4,34.021452,-118.288464,91.29162885062397,2024-04-16,15:56:26


Import GPX Data to CSV

In [161]:
gpx_data.to_csv("danielle.csv", index=False)

https://gist.github.com/pianosnake/b4a45ef6bgpx_data2ffb2e1b44bbcca107298

In [195]:
import math

EARTH_CIR_METERS = 40075016.686
degreesPerMeter = 360 / EARTH_CIR_METERS


def toRadians(degrees):
    return degrees * math.pi / 180


def latLngToBounds(lat, lng, zoom, width, height):
    metersPerPixelEW = EARTH_CIR_METERS / math.pow(2, zoom + 8)
    metersPerPixelNS = (
        EARTH_CIR_METERS / math.pow(2, zoom + 8) * math.cos(toRadians(lat))
    )

    shiftMetersEW = width / 2 * metersPerPixelEW
    shiftMetersNS = height / 2 * metersPerPixelNS

    shiftDegreesEW = shiftMetersEW * degreesPerMeter
    shiftDegreesNS = shiftMetersNS * degreesPerMeter

    minX = lng - shiftDegreesEW
    minY = lat - shiftDegreesNS
    maxX = lng + shiftDegreesEW
    maxY = lat + shiftDegreesNS

    return f"{minX:.4f},{minY:.6f},{maxX:.4f},{maxY:.6f}"

In [266]:
# convert data to boundary box

gpx_data["latitude"] = gpx_data["latitude"].astype(float)
gpx_data["longitude"] = gpx_data["longitude"].astype(float)

gpx_data["bbox"] = gpx_data.apply(
    lambda x: latLngToBounds(x["latitude"], x["longitude"], 12, 400, 400), axis=1
)

In [267]:
gpx_data.head()

Unnamed: 0,latitude,longitude,altitude (m),date,time,bbox
0,34.021526,-118.288752,90.12919518072158,2024-04-16,15:55:47,"-118.3574,33.964615,-118.2201,34.078437"
1,34.021566,-118.28889,91.4642141405493,2024-04-16,15:55:50,"-118.3576,33.964655,-118.2202,34.078477"
2,34.02147,-118.288567,92.40057468786836,2024-04-16,15:55:54,"-118.3572,33.964559,-118.2199,34.078381"
3,34.021553,-118.288315,91.63135995343328,2024-04-16,15:56:01,"-118.3570,33.964642,-118.2197,34.078464"
4,34.021452,-118.288464,91.29162885062397,2024-04-16,15:56:26,"-118.3571,33.964541,-118.2198,34.078363"


In [293]:
import requests
import csv
from io import StringIO


def get_air_quality(gpx_data):
    # Extract the hour from the time
    hour = gpx_data["time"].split(":")[0]
    header_array = [
        "Latitude",
        "Longitude",
        "UTC",
        "Parameter",
        "Unit",
        "AQI",
        "Category",
    ]
    # Construct the URL with the date, bbox, and time from gpx_data
    url = f"https://www.airnowapi.org/aq/data/?startDate={gpx_data['date']}T{hour}&endDate={gpx_data['date']}T{hour}&parameters=PM25,PM10&BBOX={gpx_data['bbox']}&dataType=A&format=text/csv&verbose=0&monitorType=0&includerawconcentrations=0&API_KEY=342FB14E-3637-470D-BEAE-A5DF1E193ADB"
    # url = f"https://www.airnowapi.org/aq/data/?startDate=2024-06-21T17&endDate=2024-06-21T18&parameters=PM25,PM10&BBOX=-124.205070,28.716781,-75.337882,45.419415&dataType=A&format=text/csv&verbose=0&monitorType=0&includerawconcentrations=0&API_KEY=342FB14E-3637-470D-BEAE-A5DF1E193ADB"
    # print(url)
    response = requests.get(url)
    return response.text

In [295]:
import pandas as pd
from io import StringIO

results = []
for index, row in gpx_data.head(400).iterrows():
    results.append(get_air_quality(row))

# Join the results into a single string
results_str = "\n".join(results)

data_io = StringIO(results_str)
df_temp = pd.read_csv(data_io, header=None)

# Set the column names
df_temp.columns = [
    "Latitude",
    "Longitude",
    "UTC",
    "Parameter",
    "AQI",
    "Category",
]

# Append the data to the final dataframe

df_temp.head()
df_temp.to_csv("air_quality.csv", index=False)

In [297]:
# Continue processing rows 401 to 700
for index, row in gpx_data.iloc[400:700].iterrows():
    results.append(get_air_quality(row))

# Join the results into a single string
results_str = "\n".join(results)

data_io = StringIO(results_str)
df_temp = pd.read_csv(data_io, header=None)

# Set the column names
df_temp.columns = [
    "Latitude",
    "Longitude",
    "UTC",
    "Parameter",
    "AQI",
    "Category",
]

# Append the data to the existing csv file
with open("air_quality.csv", "a") as f:
    df_temp.to_csv(f, header=False, index=False)

data extraction from survey data

In [165]:
# extract data from survey data folder
survey_data = pd.read_csv("SurveyData/Ping6.csv")
survey_data.head()

Unnamed: 0,mbl_cod,rsp_id,instance_id,scheduled_start_local,timezone_offset,actual_start_local,HAPPY_JOYFUL_9376046,ENERGETIC_9376047,IRRITABLE_ANGRY_9376048,FRUSTRATED_9376049,...,RESPIRATION,BODY_BATTERY,STEPS,CALORIES,FLOORS,INTENSITY_MINUTES,LONGITUDE,LATITUDE,AVG_AMP,VOX_ACTV
0,27181246,50695,1712628000,2024-04-08 19:00:00,-420,2024-04-08 19:10:26,7,6,5,4,...,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0
1,27181246,50695,1712800800,2024-04-10 19:00:00,-420,2024-04-10 19:34:53,7,5,4,5,...,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0
2,27181246,50695,1712887200,2024-04-11 19:00:00,-420,2024-04-11 19:00:08,7,6,4,4,...,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0
3,27181246,50695,1712973600,2024-04-12 19:00:00,-420,2024-04-12 19:30:07,7,7,7,7,...,0.0,0.0,0,0,0,0,34.025203,-118.279196,0.0,0.0
4,27181246,50695,1713060000,2024-04-13 19:00:00,-420,2024-04-13 19:04:30,7,5,4,4,...,0.0,0.0,0,0,0,0,32.706276,-117.157111,0.0,0.0


In [166]:
filtered_rows = gpx_data.loc[
    (gpx_data["latitude"] == 34.025203) & (gpx_data["longitude"] == -118.279196)
]
filtered_rows

Unnamed: 0,latitude,longitude,altitude (m),date,time,bbox
1726,34.025203,-118.279196,60.28713939525187,2024-04-17,20:04:33,"-118.29636213769531,34.01097585069514,-118.262..."


In [167]:
# Ping6.csv from survey data i want to print 3 rd row
survey_data.iloc[3]

mbl_cod                             27181246
rsp_id                                 50695
instance_id                       1712973600
scheduled_start_local    2024-04-12 19:00:00
timezone_offset                         -420
                                ...         
INTENSITY_MINUTES                          0
LONGITUDE                          34.025203
LATITUDE                         -118.279196
AVG_AMP                                  0.0
VOX_ACTV                                 0.0
Name: 3, Length: 107, dtype: object

In [181]:
for index, survey_row in survey_data.iterrows():
    survey_lat = survey_row["LONGITUDE"]
    survey_long = survey_row["LATITUDE"]

    for index, gpx_row in gpx_data.iterrows():
        gpx_lat = gpx_row["latitude"]
        gpx_long = gpx_row["longitude"]

        if survey_lat == gpx_lat and survey_long == gpx_long:
            print(survey_row)

mbl_cod                             27181246
rsp_id                                 50695
instance_id                       1712973600
scheduled_start_local    2024-04-12 19:00:00
timezone_offset                         -420
                                ...         
INTENSITY_MINUTES                          0
LONGITUDE                          34.025203
LATITUDE                         -118.279196
AVG_AMP                                  0.0
VOX_ACTV                                 0.0
Name: 3, Length: 107, dtype: object
