# Importing Libraries

In [1]:
from shapely.geometry import Point, LineString, shape
import matplotlib.pyplot as plt # Importing Libraries
import geopandas as gpd
import contextily as ctx
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Coding/Python/'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
from lonelyboy.geospatial import group_patterns_v2 as gsgp

get_ipython().magic('matplotlib qt')

In [3]:
PLT_FIG_WIDTH = 3.787
PLT_FIG_HEIGHT = PLT_FIG_WIDTH / 1.618

## Make a Toy Example Dataset

In [4]:
date_rng = [datetime(2019, 5, 16, 10, 4, 59),
             datetime(2019, 5, 16, 10, 6, 40),
             datetime(2019, 5, 16, 10, 7, 5),
             datetime(2019, 5, 16, 10, 9, 52),
             datetime(2019, 5, 16, 10, 11, 45),
             datetime(2019, 5, 16, 10, 14, 55),
             datetime(2019, 5, 16, 10, 18, 42),
             datetime(2019, 5, 16, 10, 21, 35)]

df = pd.DataFrame(date_rng, columns=['date'])
df['lat'] = [1,2,3,4,5,6,7,8]
df['lon'] = [9,10,11,12,13,14,15,16]
df['ts'] = pd.to_datetime(df['date'], unit='unix')
df.head(15)

Unnamed: 0,date,lat,lon,ts
0,2019-05-16 10:04:59,1,9,2019-05-16 10:04:59
1,2019-05-16 10:06:40,2,10,2019-05-16 10:06:40
2,2019-05-16 10:07:05,3,11,2019-05-16 10:07:05
3,2019-05-16 10:09:52,4,12,2019-05-16 10:09:52
4,2019-05-16 10:11:45,5,13,2019-05-16 10:11:45
5,2019-05-16 10:14:55,6,14,2019-05-16 10:14:55
6,2019-05-16 10:18:42,7,15,2019-05-16 10:18:42
7,2019-05-16 10:21:35,8,16,2019-05-16 10:21:35


In [76]:
rule=f'{5*60}S'
method='linear'

#resample and interpolate using the method given. Linear is suggested
upsampled = df.resample(rule, on='date', loffset=True, kind='timestamp').first()
interpolated = upsampled.interpolate(method=method)
# reset the index to normal and use the old index as new timestamp
interpolated['datetime'] = interpolated.index
interpolated.reset_index(drop=True, inplace=True)

df_resampled = pd.DataFrame(interpolated)

# 1-Minute Resampling

In [76]:
# df_resampled.loc[df_resampled.datetime.isin(interpolated60S)]
df_resampled

Unnamed: 0,date,lat,lon,datetime
0,2019-05-16 10:01:20,1.0,5.0,2019-05-16 10:01:00
1,NaT,1.333333,5.333333,2019-05-16 10:02:00
2,NaT,1.666667,5.666667,2019-05-16 10:03:00
3,2019-05-16 10:04:40,2.0,6.0,2019-05-16 10:04:00
4,NaT,2.333333,6.333333,2019-05-16 10:05:00
5,NaT,2.666667,6.666667,2019-05-16 10:06:00
6,2019-05-16 10:07:05,3.0,7.0,2019-05-16 10:07:00
7,NaT,3.5,7.5,2019-05-16 10:08:00
8,2019-05-16 10:09:52,4.0,8.0,2019-05-16 10:09:00


# 3-Minute Resampling

In [73]:
df_resampled

Unnamed: 0,date,lat,lon,datetime
0,2019-05-16 10:01:20,1,5,2019-05-16 10:00:00
1,2019-05-16 10:04:40,2,6,2019-05-16 10:03:00
2,2019-05-16 10:07:05,3,7,2019-05-16 10:06:00
3,2019-05-16 10:09:52,4,8,2019-05-16 10:09:00


# 5-Minute Resampling

In [10]:
df_resampled

Unnamed: 0,date,lat,lon,datetime
0,2019-05-16 10:01:20,1,5,2019-05-16 10:00:00
1,2019-05-16 10:07:05,3,7,2019-05-16 10:05:00


In [307]:
def resample_geospatial_V2(df, rule='60S', method='linear', crs={'init': 'epsg:4326'}, drop_lon_lat=False):
    df['datetime'] = pd.to_datetime(df['ts'], unit='s')
    x = df['datetime'].values.astype(np.int64)
    y = df[['lat', 'lon']].values

    f = interp1d(x, y, kind=method, axis=0)
    xnew_V2 = pd.date_range(start=df['datetime'].min().replace(second=0), end=df['datetime'].max().replace(second=0), freq=rule, closed='right')
    
    df_RESAMPLED = pd.DataFrame(f(xnew_V2), columns=['lon', 'lat'])
    df_RESAMPLED['datetime'] = pd.DataFrame(xnew_V2).reset_index(drop=True)
    df_RESAMPLED['geom'] = df_RESAMPLED[['lon', 'lat']].apply(lambda x: Point(x[0], x[1]), axis=1)

    #drop lat and lon if u like
    if drop_lon_lat:
        df_RESAMPLED = df_RESAMPLED.drop(['lat', 'lon'], axis=1)
    return gpd.GeoDataFrame(df_RESAMPLED, crs=crs, geometry='geom')

In [311]:
resample_geospatial_V2(df, rule=f'{1/2*60}S', method='linear')

Unnamed: 0,lon,lat,datetime
0,1.05,5.05,2019-05-16 10:01:30
1,1.2,5.2,2019-05-16 10:02:00
2,1.35,5.35,2019-05-16 10:02:30
3,1.5,5.5,2019-05-16 10:03:00
4,1.65,5.65,2019-05-16 10:03:30
5,1.8,5.8,2019-05-16 10:04:00
6,1.95,5.95,2019-05-16 10:04:30
7,2.137931,6.137931,2019-05-16 10:05:00
8,2.344828,6.344828,2019-05-16 10:05:30
9,2.551724,6.551724,2019-05-16 10:06:00


# LonelyBoy READY

In [5]:
from scipy.interpolate import interp1d
from shapely.geometry import Point

# def resample_geospatial_V3(df, features=['lat', 'lon'], rule='60S', method='linear', crs={'init': 'epsg:4326'}, drop_lon_lat=False):
def resample_geospatial_V4(df, features=['lat', 'lon'], rate=1, method='linear', crs={'init': 'epsg:4326'}, drop_lon_lat=False):
    df['datetime'] = pd.to_datetime(df['ts'], unit='s')
    x = df['datetime'].values.astype(np.int64)
    y = df[features].values

    # scipy interpolate needs at least 2 records 
    if (len(df) <= 1):
        return df.iloc[0:0]

    dt_start = df['datetime'].min().replace(second=0)
    dt_end = df['datetime'].max().replace(second=0)
    
    f = interp1d(x, y, kind=method, axis=0)
#     xnew_V2 = pd.date_range(start=df['datetime'].min().replace(second=0), end=df['datetime'].max().replace(second=0), freq=rule, closed='right')
    xnew_V3 = pd.date_range(start=dt_start.replace(minute=rate*(dt_start.minute//rate)), end=dt_end, freq=f'{rate*60}S', closed='right') 
    df_RESAMPLED = pd.DataFrame(f(xnew_V3), columns=features)
    df_RESAMPLED['datetime'] = pd.DataFrame(xnew_V3).reset_index(drop=True)
    df_RESAMPLED['geom'] = df_RESAMPLED[['lon', 'lat']].apply(lambda x: Point(x[0], x[1]), axis=1)

    #drop lat and lon if u like
    if drop_lon_lat:
        df_RESAMPLED = df_RESAMPLED.drop(['lat', 'lon'], axis=1)
    return gpd.GeoDataFrame(df_RESAMPLED, crs=crs, geometry='geom')

In [6]:
resample_geospatial_V4(df, rate=5, method='linear')

Unnamed: 0,lat,lon,datetime,geom
0,1.009901,9.009901,2019-05-16 10:05:00,POINT (9.009900990099009 1.00990099009901)
1,4.070796,12.070796,2019-05-16 10:10:00,POINT (12.07079646017699 4.070796460176991)
2,6.022026,14.022026,2019-05-16 10:15:00,POINT (14.02202643171806 6.022026431718062)
3,7.450867,15.450867,2019-05-16 10:20:00,POINT (15.45086705202312 7.450867052023121)


# SOME TESTS

In [95]:
df

Unnamed: 0,date,lat,lon,ts
0,2019-05-16 10:04:59,1,9,2019-05-16 10:04:59
1,2019-05-16 10:06:40,2,10,2019-05-16 10:06:40
2,2019-05-16 10:07:05,3,11,2019-05-16 10:07:05
3,2019-05-16 10:09:52,4,12,2019-05-16 10:09:52
4,2019-05-16 10:11:45,5,13,2019-05-16 10:11:45
5,2019-05-16 10:14:55,6,14,2019-05-16 10:14:55
6,2019-05-16 10:18:42,7,15,2019-05-16 10:18:42
7,2019-05-16 10:21:35,8,16,2019-05-16 10:21:35


In [18]:
# import datetime
# interpolated['datetime'].apply(datetime.datetime.timestamp).values

In [89]:
from scipy.interpolate import interp1d

x = df['date'].values.astype(np.int64)
y = df[['lat', 'lon']].values

f = interp1d(x, y, kind='linear', axis=0)

In [93]:
#resample and interpolate using the method given. Linear is suggested -- OLD V2
# xnew_V2 = pd.date_range(start=df['date'].min().replace(second=0), end=df['date'].max().replace(second=0), freq=f'{5*60}S', closed='right')
# xnew_V2

#resample and interpolate using the method given. Linear is suggested -- NEW V3
rate = 5

dt_start = df['date'].min().replace(second=0)
dt_end = df['date'].max().replace(second=0)

xnew_V3 = pd.date_range(start=dt_start.replace(minute=rate*(dt_start.minute//rate)),
                        end=dt_end, freq=f'{rate*60}S', closed='right') 
xnew_V3

DatetimeIndex(['2019-05-16 10:05:00', '2019-05-16 10:10:00',
               '2019-05-16 10:15:00', '2019-05-16 10:20:00'],
              dtype='datetime64[ns]', freq='300S')

In [96]:
# df_RESAMPLED = pd.DataFrame(f(xnew_V2), columns=['lon', 'lat'])
# df_RESAMPLED['datetime'] = pd.DataFrame(xnew_V2).reset_index(drop=True)
# df_RESAMPLED

df_RESAMPLED = pd.DataFrame(f(xnew_V3), columns=['lon', 'lat'])
df_RESAMPLED['datetime'] = pd.DataFrame(xnew_V3).reset_index(drop=True)
df_RESAMPLED

Unnamed: 0,lon,lat,datetime
0,1.009901,9.009901,2019-05-16 10:05:00
1,4.070796,12.070796,2019-05-16 10:10:00
2,6.022026,14.022026,2019-05-16 10:15:00
3,7.450867,15.450867,2019-05-16 10:20:00


# GROUND TRUTH

In [78]:
# interpolated60S = interpolated.loc[1:,'datetime']
# interpolated60S

1   2019-05-16 10:02:00
2   2019-05-16 10:03:00
3   2019-05-16 10:04:00
4   2019-05-16 10:05:00
5   2019-05-16 10:06:00
6   2019-05-16 10:07:00
7   2019-05-16 10:08:00
8   2019-05-16 10:09:00
Name: datetime, dtype: datetime64[ns]

In [145]:
xnew = interpolated60S
df_RESAMPLED = pd.DataFrame(f(xnew))
df_RESAMPLED['dt'] = pd.DataFrame(xnew).reset_index(drop=True)
df_RESAMPLED

Unnamed: 0,0,1,dt
0,1.2,5.2,2019-05-16 10:02:00
1,1.5,5.5,2019-05-16 10:03:00
2,1.8,5.8,2019-05-16 10:04:00
3,2.137931,6.137931,2019-05-16 10:05:00
4,2.551724,6.551724,2019-05-16 10:06:00
5,2.965517,6.965517,2019-05-16 10:07:00
6,3.329341,7.329341,2019-05-16 10:08:00
7,3.688623,7.688623,2019-05-16 10:09:00
