# imports

In [None]:
import os
import sys

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns

import statsmodels.api as sm
import statsmodels.formula.api as smf
import geopy
from geopy import distance
import math

import scipy
from scipy import stats

from numpy import (isscalar, r_, log, around, unique, asarray, zeros,
                   arange, sort, amin, amax, atleast_1d, sqrt, array,
                   compress, pi, exp, ravel, count_nonzero, sin, cos,
                   arctan2, hypot)


from scipy import optimize
from scipy import special

import figurefirst as fifi

# functions that really should be in a separate file

In [None]:
#split speed and difference dfs so that you can find averages on different time chunks
def split_df(df, chunk_size = 6000): #default chunks are 10 min
    chunks = list()
    num_chunks = len(df) // chunk_size + 1
    for i in range(num_chunks):
        chunks.append(df[i*chunk_size:(i+1)*chunk_size])
    return chunks 

def avg_df(chunks):
    avg_df=pd.DataFrame(index=chunks[0].columns)
    N=len(chunks)
    testdf=np.empty((N, 0)).tolist()
    for i in range(0,N):
        testdf[i]=pd.DataFrame(chunks[i].mean(), columns=[i])
        avg_df=pd.concat([avg_df, testdf[i]], axis=1)
    return avg_df

def avg_df_direction(chunks):
    avg_df=pd.DataFrame(index=chunks[0].columns)
    N=len(chunks)
    testdf=np.empty((N, 0)).tolist()
    for i in range(0,N):
        testdf[i]=pd.DataFrame(scipy.stats.circmean(chunks[i]*math.pi/180)*(180/math.pi), columns=[i])
        avg_df=pd.concat([avg_df, testdf[i]], axis=1)
    return avg_df

def std_df(chunks):
    N=len(chunks)
    testdf=np.empty((N, 0)).tolist()
    for i in range (0,N):
        testdf[i]=np.std(chunks[i])
    return testdf    

def dir_vs_speed(directionaldf, speeddf):
    dir_chunks=split_df(directionaldf)
    speeds=split_df(speeddf)
    avg_dir=avg_df(dir_chunks)
    avg_speed=avg_df(speeds)
    avgspeeds=list(np.array(avg_speed.mean()))
    avg_dir.columns=avgspeeds
    #newdf=avg_dir.T.sort_index(ascending=True)
    newdf=avg_dir
    return newdf


def spatialchanges_directional (direction=dir_df, speed=speed_df, gps=latlons, radians=False):

'''
this takes the difference between 2 sensors for all possible combinations. Returns a df sorted by distance
    
    dir_df - all columns of directional data from one data collection, assumed to be in range 0-360
    
    speed_df - all columns of horizontal speed data from one data collection
    
    latlons - list of tuples in form (lat, lon) which correspond to dir_df and speed_df

all inputs are expected to be organized alphabetically/numerically from sensor A (1) to I (9) 

'''    
    def combine(arr, s):  #to determine total permutation of sensor pairs
        return list(combinations(arr, s)) 
    
    
    ########this is for direction 
    if (radians==True): #to convert into degrees if needed
        dir_df=dir_df*180/np.pi
    
    columns=np.arange(0,len(dir_df.T)) #count total number of sensor recordings in df
    N=len(combine(columns, 2)) #compute total number of permutations for all 2 sensor pairs
    lists=np.empty((N, 0)).tolist()
    distances=np.empty((N, 0)).tolist()
    k=0
    
    for i in columns:
        totalcombinations=len(dir_df.T)-i
        for j in range (1, totalcombinations):
            lists[k]=np.abs(dir_df.iloc[:,i]-dir_df.iloc[:,i+j]) 
            distances[k]=distance.distance(latlons[i],latlons[i+j]).m #compute distance in meters from gps coords
            k=k+1

    newdir_df=pd.DataFrame(lists, index=np.round(distances,decimals=2))

    #fixes the angles
    M=len(newdir_df)
    for x in range (0,M):
        for y in (np.where(lists[x]>180)):
            newdir_df.iloc[x,y]=360-newdir_df.iloc[x,y] #maximum difference in angle can be 180 - if more than 180, will subtract to get the smaller angle
            
    newdir_df=newdir_df.sort_index(ascending=True).T
    
    ####this is for speed - nothing changes except no angle fixing
    columns_1=np.arange(0,len(speed_df.T))
    N_1=len(combine(columns_1, 2))
    lists_1=np.empty((N_1, 0)).tolist()
    distances_1=np.empty((N_1, 0)).tolist()
    k=0
    
    for i in columns_1:
        totalcombinations_1=len(speed_df.T)-i
        for j in range (1, totalcombinations_1):
            lists_1[k]=(np.abs(speed_df.iloc[:,i]+speed_df.iloc[:,i+j])/2) #gives average speed between two sensors
            distances_1[k]=distance.distance(latlons[i],latlons[i+j]).m #compute distance in meters from gps coords
            k=k+1

    newspeed_df=pd.DataFrame(lists_1, index=np.round(distances_1,decimals=2)).sort_index(ascending=True).T       
    
    return (newdir_df, newspeed_df)


def dir_vs_speed(directionaldf, speeddf):
'''
takes newdir_df and newspeed_df and splits into equal time segments; default is 10 min
returns an NxM df with row index equal to distance between 2 sensor pairs (in meters) 
and column headers the avg speed (m/s) over a specified window of time; 
values are the difference in direction over that distance during that 10 min avg speed

'''    
    dir_chunks=split_df(directionaldf)
    speeds=split_df(speeddf)
    avg_dir=avg_df(dir_chunks)
    avg_speed=avg_df(speeds)
    avgspeeds=list(np.array(avg_speed.mean()))
    avg_dir.columns=avgspeeds
    return avg_dir

def spatialchanges_speed (dir_df, speed_df, latlons, radians=False):

'''
essentially the same as above function, except this one returns 
avg direction between sensors and difference in speed between sensors 

'''
    
    def combine(arr, s): 
        return list(combinations(arr, s)) 
    
    
    ########this is for direction 
    
    if (radians==False):  #need to convert df to radians for scipy circmean function, which assumes data in range [0,2pi]
        dir_df=dir_df*np.pi/180
    
    columns=np.arange(0,len(dir_df.T))
    N=len(combine(columns, 2))
    lists=np.empty((N, 0)).tolist()
    distances=np.empty((N, 0)).tolist()
    k=0
    
    for i in columns:
        totalcombinations=len(dir_df.T)-i
        for j in range (1, totalcombinations):
            data=np.array([dir_df.iloc[:,i],dir_df.iloc[:,i+j]])
            lists[k]=scipy.stats.circmean(data, axis=0)*180/math.pi #computes mean between 2 sensors at each time recording
            distances[k]=distance.distance(latlons[i],latlons[i+j]).m
            k=k+1

    newdir_df=pd.DataFrame(lists, index=np.round(distances,decimals=2))
    newdir_df=newdir_df.sort_index(ascending=True).T

    
    ########this is for speed - nothing changes except no angle fixing
        
    columns_1=np.arange(0,len(speed_df.T))
    N_1=len(combine(columns_1, 2))
    lists_1=np.empty((N_1, 0)).tolist()
    distances_1=np.empty((N_1, 0)).tolist()
    k=0
    
    for i in columns_1:
        totalcombinations_1=len(speed_df.T)-i
        for j in range (1, totalcombinations_1):
            lists_1[k]=speed_df.iloc[:,i]-speed_df.iloc[:,i+j]
            distances_1[k]=distance.distance(latlons[i],latlons[i+j]).m
            k=k+1

    newspeed_df=pd.DataFrame(lists_1, index=np.round(distances_1,decimals=2))
    newspeed_df=newspeed_df.sort_index(ascending=True).T
    
    return (newdir_df, newspeed_df)

# read in data

In [None]:
winddf=pd.read_hdf('/test') #speed and direction pandas df
latlondf=pd.read_hdf('/test2') #corresponding latitude and longitude pandas df
RAWS=pd.read_excel('/Users/Desktop/5.1.22_RAWS.xls',  names=['Date time',  'Mean Wind Speed', 'Wind Vector Magnitude', 'Mean Wind Direction', 'Std Deviation of Wind Direction', 'Mean Wind Speed 100ft', 'Wind Vector Speed 100ft', 'Wind Direction 100ft', 'Std Dev of Wind Direction 100ft' ]) #data from sagehen station

In [None]:
#find the most frequently occuring gps location - gives a more accurate approximation than the mean
cords=(latlondf.lat_1.mode(), latlondf.lon_1.mode())

In [None]:
#calculate the difference in direction between each pair of sensors, also calculate the avg speed between each pair
b_diff= spatialchanges(df_dir.fillna(0), df_speeds.fillna(0), df_latlons)

#split into discrete time segments and find the average values over each time segment
b_dvs=dir_vs_speed(b_diff[0], B_diff[1])

In [None]:
# change variable name
reorg_ddf=pd.DataFrame(columns = ['distance','speed', 'direction', 'control_D_25', 'control_S2_25', 'control_D_100', 'control_S2_100'])
i=0
for k in b4_dvs.index:
    xtra = {'distance': k,
           'direction': b_dvs.iloc[i,:] }
    reorg_ddf= reorg_ddf.append(pd.DataFrame(xtra))
    i=i+1

In [None]:
reorg_ddf.speed=reorg_ddf.index #give avg speed its own column
reorg_ddf.reset_index(inplace=True)
reorg_ddf

In [None]:
#rename index to environment and assign column value based on environment (.001 - Black Rock, .04 - Lemmon Valley, .4 - Tahoe Forest, 1 - urban)
reorg_ddf.rename(columns={'index':'environment'}, inplace=True)
reorg_ddf[['environment']]=1

# spatial analysis
'''
function for determining distance between sensors
function for finding difference in direction, speed, etc

'''

#spatial plots

# temporal analysis 

'''

function for std and avgs

'''


# temporal plots

# regression/supplementary plots