# Script to get the volume data by sensor
Created by: Apoorba

Date: March 22, 2019

In [1]:
import boto3 #Need to read and write files to aws s3 bucket
import pandas as pd
from io import BytesIO as StringIO 

    Note: For Python 2.7 we imported BytesIO as StringIO
    Don't need to do this for Python 3.x
    Just use import StringIO as StringIO

## Read the 5 min aggregate data 

The data is obtained from "Vol_dat.csv" file. We got this file from "Vol-Lamarie-Cheyenne-V1" script. 

In [2]:
#Data I got from a Report
client=boto3.client('s3')
obj=client.get_object(Bucket='prod-sdc-tti-911061262852-us-east-1-bucket',Key="abibeka/Vol_dat.csv")
dat=pd.read_csv(obj['Body'])
dat.head()

Unnamed: 0,t1.controller,t1.time5m,t1.nrec,t2.direction,t2.milepost,t2.2015_adt
0,384,2018-02-01 00:05:00,20,I,325.8,13890
1,384,2018-02-01 00:10:00,9,I,325.8,13890
2,384,2018-02-01 00:15:00,17,I,325.8,13890
3,384,2018-02-01 00:20:00,17,I,325.8,13890
4,384,2018-02-01 00:25:00,10,I,325.8,13890


## Read the index file for the speed sensors.
Following is the index file for speed sensors; "SpeedSen_MP_314_360.csv". Got it from one of the WyDOT report.

In [3]:
client=boto3.client('s3')
obj=client.get_object(Bucket='prod-sdc-tti-911061262852-us-east-1-bucket',Key="abibeka/uploaded_files/SpeedSen_MP_314_360.csv")
AB_SS=pd.read_csv(obj['Body'])
AB_SS.head()

Unnamed: 0,DEVICEID,SITENAME,MP,Sensor_Loc
0,396,Laramie East,317.68,EB
1,3911,Telephone Canyon,320.7,WB
2,395,Summit,322.05,WB
3,2146,Summit,322.6,WB
4,2147,Summit,323.85,EB


## Process 5 min aggregate data
Remove the "tx." from the column name. Also only retain the relevant columns.

In [4]:
Rename1 ={x:x.split('.')[1] for x in dat}
dat=dat.rename(index=str,columns=Rename1)
dat =dat[['controller','time5m','nrec']]

## Create continous date indices
Basically fill out observation where volume was 0.

In [16]:
dat1 =dat.copy()
# Get index for sensor
dev=dat1['controller'].unique()
# Get index for time 
dat1['DateTime']=pd.to_datetime(dat1['time5m'],format='%Y-%m-%d %H:%M:%S')
# Set index
dat1=dat1.set_index(['controller','DateTime']) 
# Create time interval index without missing values
index = pd.date_range('2018-02-01','2018-02-28',closed='right',freq='5min')
# Create a multiindex
multi_index=pd.MultiIndex.from_product([dev,index],names=['deviceId','DateTime'])
# Reindex. Now the missing values have nan. 
dat1=dat1.reindex(multi_index)

In [17]:
dat1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,time5m,nrec
deviceId,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1
384,2018-02-01 00:05:00,2018-02-01 00:05:00,20.0
384,2018-02-01 00:10:00,2018-02-01 00:10:00,9.0
384,2018-02-01 00:15:00,2018-02-01 00:15:00,17.0
384,2018-02-01 00:20:00,2018-02-01 00:20:00,17.0
384,2018-02-01 00:25:00,2018-02-01 00:25:00,10.0


In [18]:
dat1.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,time5m,nrec
deviceId,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1
3911,2018-02-27 23:40:00,2018-02-27 23:40:00,13.0
3911,2018-02-27 23:45:00,2018-02-27 23:45:00,16.0
3911,2018-02-27 23:50:00,2018-02-27 23:50:00,16.0
3911,2018-02-27 23:55:00,2018-02-27 23:55:00,12.0
3911,2018-02-28 00:00:00,2018-02-28 00:00:00,10.0


## Reset the index and merge data with index table

In [19]:
# Add information about the speed sensor by merging with the index table
# First make dat1 a single index table then merge with AB_SS table and then change the new table to multi index. 
dat1= dat1.reset_index().merge(AB_SS,how="left",left_on=['deviceId'],right_on="DEVICEID").set_index(dat1.index.names)
# Drop unnecessary columns
dat1=dat1.drop(columns=['time5m','DEVICEID'])
# Replace nan wiht 0 in indices with missing values. 
dat1=dat1.fillna(0)
# Correct data type
dat1.nrec=dat1.nrec.astype(int)
# Remae columns
dat1=dat1.rename(columns={'nrec':'Volume','MP':"Milepost","Sensor_Loc":"Direction"})

In [20]:
dat1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Volume,SITENAME,Milepost,Direction
deviceId,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
384,2018-02-01 00:05:00,20,Summit East,325.8,WB
384,2018-02-01 00:10:00,9,Summit East,325.8,WB
384,2018-02-01 00:15:00,17,Summit East,325.8,WB
384,2018-02-01 00:20:00,17,Summit East,325.8,WB
384,2018-02-01 00:25:00,10,Summit East,325.8,WB


In [21]:
dat1.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Volume,SITENAME,Milepost,Direction
deviceId,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3911,2018-02-27 23:40:00,13,Telephone Canyon,320.7,WB
3911,2018-02-27 23:45:00,16,Telephone Canyon,320.7,WB
3911,2018-02-27 23:50:00,16,Telephone Canyon,320.7,WB
3911,2018-02-27 23:55:00,12,Telephone Canyon,320.7,WB
3911,2018-02-28 00:00:00,10,Telephone Canyon,320.7,WB


In [22]:
dat1.loc[384]

Unnamed: 0_level_0,Volume,SITENAME,Milepost,Direction
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-02-01 00:05:00,20,Summit East,325.8,WB
2018-02-01 00:10:00,9,Summit East,325.8,WB
2018-02-01 00:15:00,17,Summit East,325.8,WB
2018-02-01 00:20:00,17,Summit East,325.8,WB
2018-02-01 00:25:00,10,Summit East,325.8,WB
2018-02-01 00:30:00,11,Summit East,325.8,WB
2018-02-01 00:35:00,16,Summit East,325.8,WB
2018-02-01 00:40:00,11,Summit East,325.8,WB
2018-02-01 00:45:00,14,Summit East,325.8,WB
2018-02-01 00:50:00,12,Summit East,325.8,WB


In [23]:
#Can untack the columns to reshaped the data
#dat1.unstack(1).head()

## Write the final output in Z drive

In [19]:
dat1.to_csv('Z:/Apoorb/Process_dat.csv')

## Write the final output to a folder in bucket

In [28]:
csv_buffer=StringIO()
dat1.to_csv(csv_buffer,sep=",",index=True)
s3=boto3.resource('s3')
s3.Object('prod-sdc-tti-911061262852-us-east-1-bucket','abibeka/Data/ProcessVolDat.csv').put(Body=csv_buffer.getvalue())

{u'ETag': '"ddd71d35aebd07a5704e6d23d9a733fa"',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '0',
   'date': 'Thu, 21 Mar 2019 21:01:24 GMT',
   'etag': '"ddd71d35aebd07a5704e6d23d9a733fa"',
   'server': 'AmazonS3',
   'x-amz-id-2': 'sSoMFwRbRrKQqNNElqnJjhw0zBEQcZZbESDCoK6XXkvgrIS4bcRGol7Kb7/U9bZCbmJLBMUcjBs=',
   'x-amz-request-id': '1853024077C4F2B3'},
  'HTTPStatusCode': 200,
  'HostId': 'sSoMFwRbRrKQqNNElqnJjhw0zBEQcZZbESDCoK6XXkvgrIS4bcRGol7Kb7/U9bZCbmJLBMUcjBs=',
  'RequestId': '1853024077C4F2B3',
  'RetryAttempts': 0}}

## Read the file created above

In [29]:
client=boto3.client('s3')
obj=client.get_object(Bucket='prod-sdc-tti-911061262852-us-east-1-bucket',Key='abibeka/Data/ProcessVolDat.csv')
dat_test=pd.read_csv(obj['Body'])
dat_test.head()

Unnamed: 0,deviceId,DateTime,Volume,SITENAME,Milepost,Direction
0,384,2018-02-01 00:05:00,20,Summit East,325.8,WB
1,384,2018-02-01 00:10:00,9,Summit East,325.8,WB
2,384,2018-02-01 00:15:00,17,Summit East,325.8,WB
3,384,2018-02-01 00:20:00,17,Summit East,325.8,WB
4,384,2018-02-01 00:25:00,10,Summit East,325.8,WB
