### Patagonia trip - extract meteo

In [15]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import os
import numpy as np
# define here some variables:
ROOT_GPX_FILES = '/Users/filippomichelon/Documents/PersonalCode/garmin_patagonia_gpx_files'


In [16]:
# define here the function to extract .gpx files from a folder and extract it
import gpxpy
import gpxpy.gpx
import pandas as pd
import os
from typing import List

def extract_gpx_files_from_folder(folder_path: str) -> pd.DataFrame:
    """
    Extracts multiple .gpx files from a specified folder and combines their data into a Pandas DataFrame.
    
    Parameters:
    folder_path (str): Path to the folder containing .gpx files.
    
    Returns:
    pd.DataFrame: DataFrame containing latitude, longitude, elevation, time, and file name.
    """
    data = []
    file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.gpx')]
    
    for file_path in file_paths:
        with open(file_path, 'r') as gpx_file:
            gpx = gpxpy.parse(gpx_file)
            for track in gpx.tracks:
                for segment in track.segments:
                    for point in segment.points:
                        data.append({
                            'latitude': point.latitude,
                            'longitude': point.longitude,
                            'elevation': point.elevation,
                            'time': point.time,
                            'file': os.path.basename(file_path)
                        })
    
    return pd.DataFrame(data)

df = extract_gpx_files_from_folder(ROOT_GPX_FILES)

### resample gps track in 1h blocks for analysis of precipitation

In [17]:
# Assuming your dataframe is called df
# Convert the 'time' column to datetime if not already
df['time'] = pd.to_datetime(df['time'])

# Set 'time' as the index
df.set_index('time', inplace=True)

# Resample to 1-day blocks and calculate the mean of latitude and longitude
df_resampled = df.resample('H').agg({'latitude': 'mean', 'longitude': 'mean'})

# Remove rows where 'latitude' or 'longitude' is NaN
df_resampled_clean = df_resampled.dropna(subset=['latitude', 'longitude'])

# Reset index to bring 'time' back as a column
df_resampled_clean.reset_index(inplace=True)

# Display the resulting cleaned DataFrame
df_resampled_clean

Unnamed: 0,time,latitude,longitude
0,2025-02-10 12:00:00+00:00,-48.445761,-72.567532
1,2025-02-10 13:00:00+00:00,-48.430517,-72.598926
2,2025-02-10 14:00:00+00:00,-48.377317,-72.661148
3,2025-02-10 15:00:00+00:00,-48.327832,-72.662870
4,2025-02-10 16:00:00+00:00,-48.284016,-72.678773
...,...,...,...
77,2025-02-25 17:00:00+00:00,-44.577181,-72.428295
78,2025-02-25 18:00:00+00:00,-44.526495,-72.527851
79,2025-02-25 19:00:00+00:00,-44.471645,-72.575331
80,2025-02-25 20:00:00+00:00,-44.393469,-72.580054


In [7]:
def get_meteo_dataset(lat, lon, time):
    """Get the meteo dataset from the open-meteo API for a specific location and time."""
    # Format the time for the API query (Open-Meteo accepts a start and end date range)
    start_time = time.strftime('%Y-%m-%dT%H:%M:%SZ')  # Format to match Open-Meteo API
    end_time = start_time  # Same as start time, as we want data for a single time point
    
    # Dynamic URL creation using input parameters (latitude, longitude, time)
    URL = f"https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lon}&current=temperature_2m,wind_speed_10m&hourly=temperature_2m,precipitation,wind_speed_10m,winddirection_10m&start_date={start_time[:10]}&end_date={start_time[:10]}&hourly=temperature_2m,precipitation,wind_speed_10m,winddirection_10m&format=csv"
    
    # Fetch the data using the dynamically created URL
    df = pd.read_csv(URL, skiprows=6)  # read the csv file, skipping the first 6 rows (headers)
    
    # Simplify column names by keeping only the first part of the name (removing duplicates)
    df.columns = [col.split(" ")[0] for col in df.columns]  # simplify column names

    # Remove duplicates if any (just keep the first occurrence)
    df = df.loc[:, ~df.columns.duplicated()]
    
    # Convert the time column to datetime
    df["time"] = pd.to_datetime(df["time"])  # convert the time column to datetime
    
    return df

# List to store all the weather data for each day
all_weather_data = []

# Loop over each row in the resampled DataFrame
for index, row in df_resampled_clean.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    time = row['time']
    
    # Fetch the weather data for the specific day
    meteo_df = get_meteo_dataset(lat, lon, time)
    
    # Append the fetched data to the list
    all_weather_data.append(meteo_df)

# Combine all the data into a single DataFrame
weather_data_combined = pd.concat(all_weather_data, ignore_index=True)

# Display the combined weather data
weather_data_combined


Unnamed: 0,time,temperature_2m,precipitation,wind_speed_10m,winddirection_10m
0,2025-02-10 00:00:00,10.0,2.1,7.0,55
1,2025-02-10 01:00:00,9.5,2.1,4.9,73
2,2025-02-10 02:00:00,9.1,1.4,5.2,326
3,2025-02-10 03:00:00,9.6,0.4,9.5,279
4,2025-02-10 04:00:00,8.7,0.0,5.2,304
...,...,...,...,...,...
1963,2025-02-25 19:00:00,14.0,0.2,5.7,252
1964,2025-02-25 20:00:00,14.3,0.0,4.5,256
1965,2025-02-25 21:00:00,14.6,0.0,1.8,270
1966,2025-02-25 22:00:00,14.6,0.0,0.4,270


##### work on plots

In [22]:
df_resampled_clean.time


0    2025-02-10 12:00:00+00:00
1    2025-02-10 13:00:00+00:00
2    2025-02-10 14:00:00+00:00
3    2025-02-10 15:00:00+00:00
4    2025-02-10 16:00:00+00:00
                ...           
77   2025-02-25 17:00:00+00:00
78   2025-02-25 18:00:00+00:00
79   2025-02-25 19:00:00+00:00
80   2025-02-25 20:00:00+00:00
81   2025-02-25 21:00:00+00:00
Name: time, Length: 82, dtype: datetime64[ns, SimpleTZ('Z')]

In [85]:
weather_data_combined

Unnamed: 0,time,temperature_2m,precipitation,wind_speed_10m,winddirection_10m
0,2025-02-10 00:00:00,2.9,3.3,5.0,360
1,2025-02-10 01:00:00,3.1,2.8,10.3,306
2,2025-02-10 02:00:00,3.1,1.5,19.0,299
3,2025-02-10 03:00:00,3.0,0.3,12.1,282
4,2025-02-10 04:00:00,2.4,1.3,10.0,300
...,...,...,...,...,...
307,2025-02-25 19:00:00,12.4,0.2,5.7,252
308,2025-02-25 20:00:00,12.7,0.0,4.5,256
309,2025-02-25 21:00:00,12.9,0.0,1.8,270
310,2025-02-25 22:00:00,12.9,0.0,0.4,270


In [None]:
# provo a fare qualcosa ora con windspeed e con precipitation