In [None]:
import pandas as pd
import csv
import gpxpy
import os
import numpy as np

In [None]:
print("Current Working Directory:", os.getcwd())

Current Working Directory: /home/jovyan/Thesis /PREPROCESSING AT /Filtering and Matching/WARUKU


In [None]:
def gpx_to_csv(gpx_file_path, csv_folder_path):
    if not os.path.exists(csv_folder_path):
        os.makedirs(csv_folder_path)
    gpx_files = [f for f in os.listdir(gpx_file_path) if f.endswith('.gpx')]

    for gpx_file_name in gpx_files
        gpx_file_full_path = os.path.join(gpx_file_path, gpx_file_name)
        csv_file_name = os.path.splitext(gpx_file_name)[0] + "WARGPX.csv"
        csv_file_full_path = os.path.join(csv_folder_path, csv_file_name)
        with open(gpx_file_full_path, 'r') as gpx_file:
            gpx = gpxpy.parse(gpx_file)

            with open(csv_file_full_path, 'w', newline='') as csv_file:
                csv_writer = csv.writer(csv_file)
                csv_writer.writerow(["Name", "Latitude", "Longitude", "Elevation","DateTime" ])

                for track in gpx.tracks:
                    for segment in track.segments:
                        for point in segment.points:
                            csv_writer.writerow([point.name, point.latitude, point.longitude, point.elevation, point.time])

if __name__ == "__main__":
    gpx_folder_path = "/home/jovyan/Thesis /PREPROCESSING AT /Filtering and Matching/WARUKU/GPX_RAW"
    csv_folder_path = "/home/jovyan/Thesis /PREPROCESSING AT /Filtering and Matching/WARUKU/GPX_CSV"
    gpx_to_csv(gpx_folder_path, csv_folder_path)


In [None]:
directory = "/home/jovyan/Thesis /PREPROCESSING AT /Filtering and Matching/WARUKU/GPX_CSV"
output_dict = {}
for csv_file_name in os.listdir(directory):
    if csv_file_name.endswith(".csv"):
        csv_file_path = os.path.join(directory, csv_file_name)
        GPX_df = pd.read_csv(csv_file_path)
        GPX_df['DateTime'] = pd.to_datetime(GPX_df['DateTime'])
        GPX_df['Actual_Datetime'] = GPX_df['DateTime'] + pd.Timedelta(hours=3)
        output_dict[csv_file_name] = GPX_df
        output_dict

{'(5)WARGPX.csv':      Name  Latitude  Longitude  Elevation                  DateTime  \
 0     NaN -1.269449  36.758399    1792.52 2024-02-08 11:05:55+00:00   
 1     NaN -1.269358  36.758379    1775.69 2024-02-08 11:06:15+00:00   
 2     NaN -1.269376  36.758372    1778.58 2024-02-08 11:06:35+00:00   
 3     NaN -1.269358  36.758367    1776.65 2024-02-08 11:06:55+00:00   
 4     NaN -1.269365  36.758357    1776.65 2024-02-08 11:07:15+00:00   
 ..    ...       ...        ...        ...                       ...   
 681   NaN -1.269387  36.758484    1787.71 2024-02-08 14:53:14+00:00   
 682   NaN -1.269369  36.758438    1783.38 2024-02-08 14:53:34+00:00   
 683   NaN -1.269413  36.758500    1782.42 2024-02-08 14:53:54+00:00   
 684   NaN -1.269450  36.758475    1783.38 2024-02-08 14:54:14+00:00   
 685   NaN -1.269373  36.758460    1782.42 2024-02-08 14:54:34+00:00   
 
               Actual_Datetime  
 0   2024-02-08 14:05:55+00:00  
 1   2024-02-08 14:06:15+00:00  
 2   2024-02-08 14

In [None]:
desired_date = pd.to_datetime('2024-02-08').date()
filtered_date = {}
for file_name, df in output_dict.items():
    filtered_df = df[df['Actual_Datetime'].dt.date == desired_date]
    filtered_date[file_name] = filtered_df
    filtered_date['(1)WARGPX.csv']

Unnamed: 0,Name,Latitude,Longitude,Elevation,DateTime,Actual_Datetime
0,,-1.269488,36.758411,1823.76,2024-02-08 11:16:31+00:00,2024-02-08 14:16:31+00:00
1,,-1.269424,36.758349,1811.74,2024-02-08 11:16:42+00:00,2024-02-08 14:16:42+00:00
2,,-1.269413,36.758348,1804.05,2024-02-08 11:16:52+00:00,2024-02-08 14:16:52+00:00
3,,-1.269387,36.758343,1798.28,2024-02-08 11:17:02+00:00,2024-02-08 14:17:02+00:00
4,,-1.269381,36.758332,1797.32,2024-02-08 11:17:12+00:00,2024-02-08 14:17:12+00:00
...,...,...,...,...,...,...
1323,,-1.269396,36.758539,1806.45,2024-02-08 14:57:02+00:00,2024-02-08 17:57:02+00:00
1324,,-1.269394,36.758564,1804.53,2024-02-08 14:57:12+00:00,2024-02-08 17:57:12+00:00
1325,,-1.269397,36.758561,1800.69,2024-02-08 14:57:22+00:00,2024-02-08 17:57:22+00:00
1326,,-1.269405,36.758543,1798.76,2024-02-08 14:57:32+00:00,2024-02-08 17:57:32+00:00


In [None]:
# FILTERING TIME Keeping only FROM 15.10 TO 16.10
def filtered_Time(df):
    return df[
        ((df['Actual_Datetime'].dt.hour == 15) & (df['Actual_Datetime'].dt.minute >= 10)) |
        ((df['Actual_Datetime'].dt.hour == 16) & (df['Actual_Datetime'].dt.minute <= 50))
    ]
filtered_time = {}
for file_name, df in filtered_date.items():
    filtered_time_df = filtered_Time(df)
    filtered_time[file_name] = filtered_time_df
    filtered_time['(1)WARGPX.csv']

Unnamed: 0,Name,Latitude,Longitude,Elevation,DateTime,Actual_Datetime
321,,-1.262708,36.753655,1829.53,2024-02-08 12:10:02+00:00,2024-02-08 15:10:02+00:00
322,,-1.262698,36.753742,1829.05,2024-02-08 12:10:12+00:00,2024-02-08 15:10:12+00:00
323,,-1.262696,36.753832,1829.53,2024-02-08 12:10:22+00:00,2024-02-08 15:10:22+00:00
324,,-1.262777,36.753869,1829.05,2024-02-08 12:10:32+00:00,2024-02-08 15:10:32+00:00
325,,-1.262880,36.753849,1829.53,2024-02-08 12:10:42+00:00,2024-02-08 15:10:42+00:00
...,...,...,...,...,...,...
922,,-1.263543,36.751334,1840.58,2024-02-08 13:50:12+00:00,2024-02-08 16:50:12+00:00
923,,-1.263648,36.751328,1840.10,2024-02-08 13:50:22+00:00,2024-02-08 16:50:22+00:00
924,,-1.263737,36.751344,1837.70,2024-02-08 13:50:32+00:00,2024-02-08 16:50:32+00:00
925,,-1.263763,36.751433,1836.26,2024-02-08 13:50:42+00:00,2024-02-08 16:50:42+00:00


In [None]:
Filtered = {}

for file_name, df in filtered_time.items():
    df["uID"] = range(len(df))
    Filtered[file_name] = df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["uID"] = range(len(df))  # Add a 'uID' column to each DataFrame
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["uID"] = range(len(df))  # Add a 'uID' column to each DataFrame
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["uID"] = range(len(df))  # Add a 'uID' column to each DataFrame
A va

In [None]:
for file_name, df in Filtered.items():
    df.drop(columns=['Name'], inplace=True)
    Filtered[file_name] = df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['Name'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['Name'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['Name'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['Name'], inplace=True)
A value 

In [None]:
for file_name, df in Filtered.items():
    csv_file_path = os.path.join(output_directory, file_name.replace('.csv', '_filtered.csv'))
    df.to_csv(csv_file_path, index=False)


In [None]:
##Filtering for KES

In [None]:
directory = '/home/jovyan/Thesis /PREPROCESSING AT /Filtering and Matching/WARUKU/KESTREL_RAW'
KESdfs = {}
for file_name in os.listdir(directory):
    if file_name.endswith('.csv'):
        file_path = os.path.join(directory, file_name)
        df_name = os.path.splitext(file_name)[0]
        KESdfs[df_name] = pd.read_csv(file_path)

In [None]:
for df_name, df in KESdfs.items():
    KESdfs[df_name] = df.iloc[1:]
    KESdfs['1_Feb_10_2024_2_56_40_PM']

Unnamed: 0,FORMATTED DATE_TIME,Temperature,Relative Humidity,Heat Index,Dew Point,Data Type,Record name,Start time,Duration (H:M:S),Location description,Location address,Location coordinates,Notes
1,2/2/2024 15:15:20,95.2,34.6,99.1,63,point,,,,PHONE/TABLET LOCATION,,,
2,2/2/2024 15:15:30,95.5,36.3,100.4,64.5,point,,,,PHONE/TABLET LOCATION,,,
3,2/2/2024 15:15:40,94.9,32.5,97.5,60.9,point,,,,PHONE/TABLET LOCATION,,,
4,2/2/2024 15:15:50,95.2,30.9,97.2,59.7,point,,,,PHONE/TABLET LOCATION,,,
5,2/2/2024 15:16:00,95.6,31.3,97.9,60.4,point,,,,PHONE/TABLET LOCATION,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5832,2/10/2024 14:56:00,90.6,67.1,107.4,78.2,point,,,,PHONE/TABLET LOCATION,,,
5833,2/10/2024 14:56:10,90.6,67,107.2,78.2,point,,,,PHONE/TABLET LOCATION,,,
5834,2/10/2024 14:56:20,90.6,66.9,107.2,78.1,point,,,,PHONE/TABLET LOCATION,,,
5835,2/10/2024 14:56:30,90.7,66.9,107.8,78.2,point,,,,PHONE/TABLET LOCATION,,,


In [None]:
for df_name, df in KESdfs.items():
     KESdfs[df_name]['Actual_Datetime'] = pd.to_datetime(df['FORMATTED DATE_TIME'])
     KESdfs['1_Feb_10_2024_2_56_40_PM'].head(5)

Unnamed: 0,FORMATTED DATE_TIME,Temperature,Relative Humidity,Heat Index,Dew Point,Data Type,Record name,Start time,Duration (H:M:S),Location description,Location address,Location coordinates,Notes,Actual_Datetime
1,2/2/2024 15:15:20,95.2,34.6,99.1,63.0,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:20
2,2/2/2024 15:15:30,95.5,36.3,100.4,64.5,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:30
3,2/2/2024 15:15:40,94.9,32.5,97.5,60.9,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:40
4,2/2/2024 15:15:50,95.2,30.9,97.2,59.7,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:50
5,2/2/2024 15:16:00,95.6,31.3,97.9,60.4,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:16:00


In [None]:
for df_name, df in KESdfs.items():
    KESdfs[df_name]['Temperature'] = pd.to_numeric(df['Temperature'], errors='coerce')
    KESdfs[df_name]['Heat Index'] = pd.to_numeric(df['Heat Index'], errors='coerce')
    KESdfs[df_name]['Dew Point'] = pd.to_numeric(df['Dew Point'], errors='coerce')
    KESdfs['1_Feb_10_2024_2_56_40_PM'].head(5)

Unnamed: 0,FORMATTED DATE_TIME,Temperature,Relative Humidity,Heat Index,Dew Point,Data Type,Record name,Start time,Duration (H:M:S),Location description,Location address,Location coordinates,Notes,Actual_Datetime
1,2/2/2024 15:15:20,95.2,34.6,99.1,63.0,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:20
2,2/2/2024 15:15:30,95.5,36.3,100.4,64.5,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:30
3,2/2/2024 15:15:40,94.9,32.5,97.5,60.9,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:40
4,2/2/2024 15:15:50,95.2,30.9,97.2,59.7,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:15:50
5,2/2/2024 15:16:00,95.6,31.3,97.9,60.4,point,,,,PHONE/TABLET LOCATION,,,,2024-02-02 15:16:00


In [None]:
for df_name, df in KESdfs.items():
    KESdfs[df_name]['Temperature°C'] = ((df['Temperature'] - 32) * 5/9).round(2)
    KESdfs[df_name]['HeatIndex°C'] = ((df['Heat Index'] - 32) * 5/9).round(2)
    KESdfs[df_name]['DewPoint°C'] = ((df['Dew Point'] - 32) * 5/9).round(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  KESdfs[df_name]['Temperature°C'] = ((df['Temperature'] - 32) * 5/9).round(2)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  KESdfs[df_name]['HeatIndex°C'] = ((df['Heat Index'] - 32) * 5/9).round(2)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  KESdfs[df_name]['DewPoint°C'] = ((df['Dew Point'] - 32

In [None]:
desired_date = pd.to_datetime('2024/02/08').normalize()
filt_date = {}
for df_name, df in KESdfs.items():
    filt_df = df.loc[df['Actual_Datetime'].dt.date == desired_date.date()]
    filt_date[df_name] = filt_df
    filt_date['1_Feb_10_2024_2_56_40_PM']

Unnamed: 0,FORMATTED DATE_TIME,Temperature,Relative Humidity,Heat Index,Dew Point,Data Type,Record name,Start time,Duration (H:M:S),Location description,Location address,Location coordinates,Notes,Actual_Datetime,Temperature°C,HeatIndex°C,DewPoint°C
4158,2/8/2024 11:03:50,79.8,50.9,79.7,60.0,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 11:03:50,26.56,26.50,15.56
4159,2/8/2024 11:04:00,80.0,50.8,79.7,60.1,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 11:04:00,26.67,26.50,15.61
4160,2/8/2024 11:04:10,80.0,50.5,79.7,60.0,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 11:04:10,26.67,26.50,15.56
4161,2/8/2024 11:04:20,80.2,50.2,80.1,60.0,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 11:04:20,26.78,26.72,15.56
4162,2/8/2024 11:04:30,80.2,50.1,80.1,59.9,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 11:04:30,26.78,26.72,15.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5815,2/8/2024 17:40:00,80.0,48.6,79.5,58.9,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 17:40:00,26.67,26.39,14.94
5816,2/8/2024 17:40:10,80.0,48.6,79.5,58.9,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 17:40:10,26.67,26.39,14.94
5817,2/8/2024 17:40:20,80.0,48.6,79.5,58.8,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 17:40:20,26.67,26.39,14.89
5818,2/8/2024 17:40:30,79.8,48.5,79.3,58.7,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 17:40:30,26.56,26.28,14.83


In [None]:
# FILTERING TIME Keeping  15.10 TO 16.10
def filt_Time(df):
    return df[
        ((df['Actual_Datetime'].dt.hour == 15) & (df['Actual_Datetime'].dt.minute >= 10)) |
        ((df['Actual_Datetime'].dt.hour == 16) & (df['Actual_Datetime'].dt.minute <= 50))
    ]
filt_time = {}
for df_name, df in filt_date.items():
    filt_time_df = filt_Time(df)
    filt_time[df_name] = filt_time_df
    filt_time['1_Feb_10_2024_2_56_40_PM']

Unnamed: 0,FORMATTED DATE_TIME,Temperature,Relative Humidity,Heat Index,Dew Point,Data Type,Record name,Start time,Duration (H:M:S),Location description,Location address,Location coordinates,Notes,Actual_Datetime,Temperature°C,HeatIndex°C,DewPoint°C
4915,2/8/2024 15:10:00,81.4,39.1,79.7,54.1,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 15:10:00,27.44,26.50,12.28
4916,2/8/2024 15:10:10,82.5,39.2,81.0,55.2,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 15:10:10,28.06,27.22,12.89
4917,2/8/2024 15:10:20,83.4,39.1,82.2,56.0,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 15:10:20,28.56,27.89,13.33
4918,2/8/2024 15:10:30,83.4,39,82.2,55.9,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 15:10:30,28.56,27.89,13.28
4919,2/8/2024 15:10:40,82.0,38.8,80.1,54.5,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 15:10:40,27.78,26.72,12.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5516,2/8/2024 16:50:10,75.0,52.8,73.9,56.6,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 16:50:10,23.89,23.28,13.67
5517,2/8/2024 16:50:20,75.1,53,74.1,56.8,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 16:50:20,23.94,23.39,13.78
5518,2/8/2024 16:50:30,74.9,53,73.9,56.6,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 16:50:30,23.83,23.28,13.67
5519,2/8/2024 16:50:40,75.9,53.2,75.0,57.7,point,,,,PHONE/TABLET LOCATION,,,,2024-02-08 16:50:40,24.39,23.89,14.28


In [None]:
for df_name, df in filt_time.items():
    columns_to_drop = ['Notes', 'Data Type', 'Temperature', 'Heat Index', 'Dew Point',
                       'Location coordinates', 'Location address', 'Location description',
                       'Duration (H:M:S)', 'Start time', 'Record name']
    df.drop(columns=columns_to_drop, inplace=True)
    filt_time['1_Feb_10_2024_2_56_40_PM']

Unnamed: 0,FORMATTED DATE_TIME,Relative Humidity,Actual_Datetime,Temperature°C,HeatIndex°C,DewPoint°C
4915,2/8/2024 15:10:00,39.1,2024-02-08 15:10:00,27.44,26.50,12.28
4916,2/8/2024 15:10:10,39.2,2024-02-08 15:10:10,28.06,27.22,12.89
4917,2/8/2024 15:10:20,39.1,2024-02-08 15:10:20,28.56,27.89,13.33
4918,2/8/2024 15:10:30,39,2024-02-08 15:10:30,28.56,27.89,13.28
4919,2/8/2024 15:10:40,38.8,2024-02-08 15:10:40,27.78,26.72,12.50
...,...,...,...,...,...,...
5516,2/8/2024 16:50:10,52.8,2024-02-08 16:50:10,23.89,23.28,13.67
5517,2/8/2024 16:50:20,53,2024-02-08 16:50:20,23.94,23.39,13.78
5518,2/8/2024 16:50:30,53,2024-02-08 16:50:30,23.83,23.28,13.67
5519,2/8/2024 16:50:40,53.2,2024-02-08 16:50:40,24.39,23.89,14.28


In [None]:
for file_name, df in filt_time.items():
    csv_file_path = os.path.join(Output_path_KES, file_name.replace('_PM','WARTEMPFILT.csv'))
    df.to_csv(csv_file_path, index=False)

In [None]:
## MERGING KESTREL TO GPX DATA

In [None]:
path1 = '/home/jovyan/Thesis /PREPROCESSING AT /Filtering and Matching/WARUKU/KES_FILT'
name1 = '8WARTEMPFILT.csv'
KERSTREL_df = pd.read_csv(os.path.join(path1,name1))

path2 = '/home/jovyan/Thesis /PREPROCESSING AT /Filtering and Matching/WARUKU/GPX_FILTERED'
name2 = '(8)WARGPX_filtered.csv'
GPX_df = pd.read_csv(os.path.join(path2,name2))


In [None]:
KERSTREL_df['Actual_Datetime'] = pd.to_datetime(KERSTREL_df['Actual_Datetime'])
GPX_df['Actual_Datetime'] = pd.to_datetime(GPX_df['Actual_Datetime'])
KERSTREL_df

Unnamed: 0,FORMATTED DATE_TIME,Relative Humidity,Actual_Datetime,Temperature°C,HeatIndex°C,DewPoint°C,TimeSecs
0,2/8/2024 15:10:00,39.8,2024-02-08 15:10:00,31.61,32.50,16.28,54600
1,2/8/2024 15:10:10,36.8,2024-02-08 15:10:10,31.17,31.22,14.67,54610
2,2/8/2024 15:10:20,35.4,2024-02-08 15:10:20,31.00,30.78,13.94,54620
3,2/8/2024 15:10:30,35.9,2024-02-08 15:10:30,31.22,31.22,14.33,54630
4,2/8/2024 15:10:40,35.9,2024-02-08 15:10:40,31.33,31.39,14.44,54640
...,...,...,...,...,...,...,...
601,2/8/2024 16:50:10,45.4,2024-02-08 16:50:10,24.50,23.50,11.89,60610
602,2/8/2024 16:50:20,45.5,2024-02-08 16:50:20,24.83,23.89,12.22,60620
603,2/8/2024 16:50:30,45.5,2024-02-08 16:50:30,24.72,23.78,12.17,60630
604,2/8/2024 16:50:40,45.4,2024-02-08 16:50:40,24.56,23.61,11.94,60640


In [None]:
KERSTREL_df['Actual_Datetime'] = KERSTREL_df['Actual_Datetime'].dt.tz_localize(None)
GPX_df['Actual_Datetime'] = GPX_df['Actual_Datetime'].dt.tz_localize(None)
KERSTREL_df['TimeSecs'] = KERSTREL_df['Actual_Datetime'].dt.hour * 3600  + KERSTREL_df['Actual_Datetime'].dt.minute* 60  + KERSTREL_df['Actual_Datetime'].dt.second
GPX_df['TimeSecs'] = GPX_df['Actual_Datetime'].dt.hour* 3600  + GPX_df['Actual_Datetime'].dt.minute* 60 + GPX_df['Actual_Datetime'].dt.second
KERSTREL_df

Unnamed: 0,FORMATTED DATE_TIME,Relative Humidity,Actual_Datetime,Temperature°C,HeatIndex°C,DewPoint°C,TimeSecs
0,2/8/2024 15:10:00,39.8,2024-02-08 15:10:00,31.61,32.50,16.28,54600
1,2/8/2024 15:10:10,36.8,2024-02-08 15:10:10,31.17,31.22,14.67,54610
2,2/8/2024 15:10:20,35.4,2024-02-08 15:10:20,31.00,30.78,13.94,54620
3,2/8/2024 15:10:30,35.9,2024-02-08 15:10:30,31.22,31.22,14.33,54630
4,2/8/2024 15:10:40,35.9,2024-02-08 15:10:40,31.33,31.39,14.44,54640
...,...,...,...,...,...,...,...
601,2/8/2024 16:50:10,45.4,2024-02-08 16:50:10,24.50,23.50,11.89,60610
602,2/8/2024 16:50:20,45.5,2024-02-08 16:50:20,24.83,23.89,12.22,60620
603,2/8/2024 16:50:30,45.5,2024-02-08 16:50:30,24.72,23.78,12.17,60630
604,2/8/2024 16:50:40,45.4,2024-02-08 16:50:40,24.56,23.61,11.94,60640


In [None]:

KERSTREL_df = KERSTREL_df.sort_values('Actual_Datetime')
GPX_df = GPX_df.sort_values('Actual_Datetime')s
merged_df = pd.merge_asof(KERSTREL_df, GPX_df, on='Actual_Datetime', direction = 'nearest')

In [None]:
merged_df

Unnamed: 0,FORMATTED DATE_TIME,Relative Humidity,Actual_Datetime,Temperature°C,HeatIndex°C,DewPoint°C,TimeSecs_x,Latitude,Longitude,Elevation,DateTime,uID,TimeSecs_y
0,2/8/2024 15:10:00,39.8,2024-02-08 15:10:00,31.61,32.50,16.28,54600,-1.282197,36.752509,1806.94,2024-02-08 12:10:03+00:00,0,54603
1,2/8/2024 15:10:10,36.8,2024-02-08 15:10:10,31.17,31.22,14.67,54610,-1.282197,36.752509,1806.94,2024-02-08 12:10:03+00:00,0,54603
2,2/8/2024 15:10:20,35.4,2024-02-08 15:10:20,31.00,30.78,13.94,54620,-1.282183,36.752488,1808.86,2024-02-08 12:10:23+00:00,1,54623
3,2/8/2024 15:10:30,35.9,2024-02-08 15:10:30,31.22,31.22,14.33,54630,-1.282183,36.752488,1808.86,2024-02-08 12:10:23+00:00,1,54623
4,2/8/2024 15:10:40,35.9,2024-02-08 15:10:40,31.33,31.39,14.44,54640,-1.282201,36.752480,1809.82,2024-02-08 12:10:43+00:00,2,54643
...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,2/8/2024 16:50:10,45.4,2024-02-08 16:50:10,24.50,23.50,11.89,60610,-1.281987,36.755657,1799.24,2024-02-08 13:50:03+00:00,300,60603
602,2/8/2024 16:50:20,45.5,2024-02-08 16:50:20,24.83,23.89,12.22,60620,-1.282018,36.755557,1799.73,2024-02-08 13:50:23+00:00,301,60623
603,2/8/2024 16:50:30,45.5,2024-02-08 16:50:30,24.72,23.78,12.17,60630,-1.282018,36.755557,1799.73,2024-02-08 13:50:23+00:00,301,60623
604,2/8/2024 16:50:40,45.4,2024-02-08 16:50:40,24.56,23.61,11.94,60640,-1.282050,36.755458,1801.17,2024-02-08 13:50:43+00:00,302,60643


In [None]:
weights = []
for index, row in merged_df.iterrows():
    time_diff_seconds = np.abs(row['TimeSecs_y'] - row['TimeSecs_x'])
    weight = 1 / time_diff_seconds
    weights.append(weight)
merged_df['weight'] = weights
merged_df

Unnamed: 0,FORMATTED DATE_TIME,Relative Humidity,Actual_Datetime,Temperature°C,HeatIndex°C,DewPoint°C,TimeSecs_x,Latitude,Longitude,Elevation,DateTime,uID,TimeSecs_y,weight
0,2/8/2024 15:10:00,39.8,2024-02-08 15:10:00,31.61,32.50,16.28,54600,-1.282197,36.752509,1806.94,2024-02-08 12:10:03+00:00,0,54603,0.333333
1,2/8/2024 15:10:10,36.8,2024-02-08 15:10:10,31.17,31.22,14.67,54610,-1.282197,36.752509,1806.94,2024-02-08 12:10:03+00:00,0,54603,0.142857
2,2/8/2024 15:10:20,35.4,2024-02-08 15:10:20,31.00,30.78,13.94,54620,-1.282183,36.752488,1808.86,2024-02-08 12:10:23+00:00,1,54623,0.333333
3,2/8/2024 15:10:30,35.9,2024-02-08 15:10:30,31.22,31.22,14.33,54630,-1.282183,36.752488,1808.86,2024-02-08 12:10:23+00:00,1,54623,0.142857
4,2/8/2024 15:10:40,35.9,2024-02-08 15:10:40,31.33,31.39,14.44,54640,-1.282201,36.752480,1809.82,2024-02-08 12:10:43+00:00,2,54643,0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,2/8/2024 16:50:10,45.4,2024-02-08 16:50:10,24.50,23.50,11.89,60610,-1.281987,36.755657,1799.24,2024-02-08 13:50:03+00:00,300,60603,0.142857
602,2/8/2024 16:50:20,45.5,2024-02-08 16:50:20,24.83,23.89,12.22,60620,-1.282018,36.755557,1799.73,2024-02-08 13:50:23+00:00,301,60623,0.333333
603,2/8/2024 16:50:30,45.5,2024-02-08 16:50:30,24.72,23.78,12.17,60630,-1.282018,36.755557,1799.73,2024-02-08 13:50:23+00:00,301,60623,0.142857
604,2/8/2024 16:50:40,45.4,2024-02-08 16:50:40,24.56,23.61,11.94,60640,-1.282050,36.755458,1801.17,2024-02-08 13:50:43+00:00,302,60643,0.333333


In [None]:
def weighted_mean(group):
    weighted_temp = np.average(group['Temperature°C'], weights=group['weight'])
    weighted_H_ind = np.average(group['HeatIndex°C'], weights=group['weight'])
    weighted_Dew = np.average(group['DewPoint°C'], weights=group['weight'])
    other_attributes = {}
    for column in group.columns:
        if column not in ['Latitude', 'Longitude', 'Temperature°C','HeatIndex°C', 'DewPoint°C','weight']:
            other_attributes[column] = group[column].iloc[0]
    result = pd.Series({'WMean_Temp': weighted_temp,'WMean_HeatInd' : weighted_H_ind,'WMean_Dew' : weighted_Dew, **other_attributes})

    return result
grouped_df = merged_df.groupby(['Latitude', 'Longitude']).apply(weighted_mean).reset_index()
grouped_df.drop(columns=[ 'DateTime', 'uID', 'TimeSecs_x', 'FORMATTED DATE_TIME'], inplace=True)
grouped_df.sort_values(by = 'Actual_Datetime')

Unnamed: 0,Latitude,Longitude,WMean_Temp,WMean_HeatInd,WMean_Dew,FORMATTED DATE_TIME,Relative Humidity,Actual_Datetime,TimeSecs_x,Elevation,DateTime,uID,TimeSecs_y
24,-1.282197,36.752509,31.478,32.116,15.797,2/8/2024 15:10:00,39.8,2024-02-08 15:10:00,54600,1806.94,2024-02-08 12:10:03+00:00,0,54603
26,-1.282183,36.752488,31.066,30.912,14.057,2/8/2024 15:10:20,35.4,2024-02-08 15:10:20,54620,1808.86,2024-02-08 12:10:23+00:00,1,54623
23,-1.282201,36.752480,31.165,31.123,14.308,2/8/2024 15:10:40,35.9,2024-02-08 15:10:40,54640,1809.82,2024-02-08 12:10:43+00:00,2,54643
27,-1.282172,36.752454,30.172,29.696,13.407,2/8/2024 15:11:00,35.9,2024-02-08 15:11:00,54660,1811.74,2024-02-08 12:11:03+00:00,3,54663
16,-1.282250,36.752452,30.399,30.194,14.066,2/8/2024 15:11:20,36.9,2024-02-08 15:11:20,54680,1813.18,2024-02-08 12:11:23+00:00,4,54683
...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,-1.281914,36.755856,25.077,24.110,12.482,2/8/2024 16:49:20,45.5,2024-02-08 16:49:20,60560,1797.32,2024-02-08 13:49:23+00:00,298,60563
38,-1.281955,36.755745,24.958,24.033,12.476,2/8/2024 16:49:40,45.8,2024-02-08 16:49:40,60580,1798.28,2024-02-08 13:49:43+00:00,299,60583
36,-1.281987,36.755657,24.542,23.577,12.009,2/8/2024 16:50:00,45.6,2024-02-08 16:50:00,60600,1799.24,2024-02-08 13:50:03+00:00,300,60603
35,-1.282018,36.755557,24.797,23.857,12.205,2/8/2024 16:50:20,45.5,2024-02-08 16:50:20,60620,1799.73,2024-02-08 13:50:23+00:00,301,60623


In [None]:
grouped_df.to_csv(os.path.join(directory, '8WARMERGEDFIN.csv'), index=False)