In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import datetime as dt
import os
import pytz
from pytz import timezone
from scipy.stats import pearsonr

In [2]:
tz = timezone('Etc/GMT-6') # This data uses GMT-6 rather than standard CST/CDT
def toUTC(d):
    return tz.normalize(tz.localize(d)).astimezone(pytz.utc)

In [24]:
def string2datetime(stringDTSeries):
    try:
        return [toUTC(dt.datetime.strptime(DT,'%Y/%m/%d %I:%M:%S %p')) for DT in stringDTSeries]
    except(ValueError):
        try:
            return [toUTC(dt.datetime.strptime(DT,'%Y/%m/%d %H:%M:%S')) for DT in stringDTSeries]
        except(ValueError):
            try:
                return [toUTC(dt.datetime.strptime(DT,'%m/%d/%Y %I:%M:%S %p')) for DT in stringDTSeries]
            except(ValueError):
                return [toUTC(dt.datetime.strptime(DT,'%m/%d/%Y %H:%M:%S')) for DT in stringDTSeries]

In [4]:
path = '/Volumes/RT&O/C Brace/GPS Analysis/UIC_GPSData/'

In [5]:
os.listdir(path)

['CBR08_Final_Summary_File.xlsx',
 'CBR11_Final_Summary_File.xls',
 'CBR15_Final_Summary_File.xlsx',
 'CBR16_Final_Summary_File.xlsx',
 'CBR19_Final_Summary_File.xlsx',
 'CBR21_Final_Summary_File.xlsx',
 'CBR22_Final_Summary_File.xlsx',
 'CBR24_Final_SummaryRLU.xlsx',
 'CBR25_Final_Summary_Rname.xlsx',
 'CBR26_Final_Summary.xlsx',
 'CBR27_Final_Summary_Rname.xlsx',
 'CBR28_Final_Summary_Rname.xlsx',
 'CBR30_Final_Summary_Rnames.xlsx',
 '~$CBR08_Final_Summary_File.xlsx']

## Add LandUse coordinates from GPS Data

In [16]:
Baseline = pd.read_excel(path+'CBR15_Final_Summary_File.xlsx')

In [17]:
Baseline.LandUse.unique()

array(['trip', 'COM_URBMIX', 'home', 'RES_SF_DETACHED', 'COM_CULT_ENT',
       'OS_REC', 'Non-Parcel Road ROW', 'TCU_PARKING', 'COM_REGIONAL',
       'COM_OFFICE', 'RES_MF', 'COM_BIGBOX', 'COM_MALL', 'COM_HOTEL',
       'INST_MEDICAL'], dtype=object)

In [18]:
Gpsdata = pd.read_csv('Z:/C Brace/GPS Analysis/GPSWT/CBR15_Baseline.csv')

In [19]:
Baseline.head(3)

Unnamed: 0,Date,Sequence,BeginTime,EndTime,Mode,LandUse,MeanSpeed,Numberpoints,TotalGap,Duration
0,2015-07-02,1,11:12:13,12:47:30,Car,trip,77.185558,518,9.116665,95.283333
1,2015-07-02,2,12:47:35,12:54:18,,COM_URBMIX,2.076743,31,1.633333,6.716667
2,2015-07-02,3,12:54:23,13:00:53,Car,trip,33.312281,79,0.0,6.5


In [20]:
Gpsdata.head(3)

Unnamed: 0.1,Unnamed: 0,INDEX,TRACK ID,VALID,UTC DATE,UTC TIME,LOCAL DATE,LOCAL TIME,MS,LATITUDE,N/S,LONGITUDE,E/W,ALTITUDE,SPEED,HEADING,G-X,G-Y,G-Z,Wear/Non-Wear
0,0,1,1,FIXED,7/2/2015,16:12:13,7/2/2015,10:12:13,0,41.895511,N,87.618302,W,189.558014,0.0,0.0,0,0,0,Wear
1,1,2,1,FIXED,7/2/2015,16:12:14,7/2/2015,10:12:14,0,41.895511,N,87.618302,W,189.558014,0.0,0.0,0,0,0,Wear
2,2,3,1,FIXED,7/2/2015,16:12:15,7/2/2015,10:12:15,0,41.895511,N,87.618302,W,189.558014,0.0,0.0,0,0,0,Wear


In [25]:
x = Gpsdata['UTC DATE']+' '+Gpsdata['UTC TIME'] #full date/time
GpsDates = string2datetime(x) #convert to datetime object
GpsDates=pd.Series(GpsDates)

In [122]:
Gpsdata['Full Local Date'] = Gpsdata.apply(dt.datetime.strptime)

'2014/11/20'

In [26]:
loc_lat = []
loc_lon = []
wear_frac = []

for i in range(10):
    start = Baseline.BeginTime[i]
    end = Baseline.EndTime[i]
    d = Baseline.Date[i]
    d = d.strftime('%Y-%m-%d') 
    startdate = dt.datetime.strptime(d+' '+start,'%Y-%m-%d %H:%M:%S')
    enddate = dt.datetime.strptime(d+' '+end,'%Y-%m-%d %H:%M:%S')
       
    ind = (GpsDates > startdate) & (GpsDates < enddate)
    meanLat = np.mean(Gpsdata['LATITUDE'][ind])
    meanLon = np.mean(Gpsdata['LONGITUDE'][ind])
    loc_lat.append(meanLat)
    loc_lon.append(meanLon)
    
    wear_ind = (Gpsdata['Wear/Non-Wear']=='Wear') & ind
    wear_frac.append(sum(wear_ind)/sum(ind))
    


In [27]:
loc_lat

[41.57038914259569,
 41.49936256249998,
 41.49935257692305,
 41.49933590072069,
 41.50239988325773,
 41.53169000000001,
 41.510051351405494,
 41.499313397727256,
 41.56202825443331,
 41.81887991338584]

In [28]:
wear_frac

[1.0,
 1.0,
 1.0,
 0.78613886429745794,
 0.17995444191343962,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0]

# TODO:
* Daylight savings
* Add Wear time fraction for each row
* Loop through files and generate data for each brace (need to rename xls sheets)