<img src="https://news.illinois.edu/files/6367/543635/116641.jpg" alt="University of Illinois" width="250"/>

# Descriptive Statistics #
By Richard Sowers
* <r-sowers@illinois.edu>
* <https://publish.illinois.edu/r-sowers/>

Copyright 2021 University of Illinois Board of Trustees. All Rights Reserved.

### Explanation###
Descriptive Statistics for Greek Driver project

# imports and graphics configurations #

In [1]:
import pandas
import numpy
import re
import datetime

In [2]:
datadirectory="./"
csv_fname="20181024_d1_0830_0900"
N_observations=5

# process first line

In [3]:
with open(datadirectory+csv_fname+".csv","r") as f:
    header=f.readline().strip()
    data_raw=f.readline().strip()

In [4]:
data_raw[:100]

'1; Car; 48.85; 9.770344; 37.977391; 23.737688; 4.9178; 0.0518; -0.0299; 0.000000; 37.977391; 23.7376'

In [5]:
BREAK=3
rest=data_raw
parts=[]
for n in range(N_observations+1):
    matches = re.finditer(";", rest)
    position = list(matches)[BREAK].start()+1 #start position of BREAK-th occurrence of ';'
    (part,rest)=(rest[:position],rest[position:])
    parts.append(part)
    BREAK=5
    
parts.append(" ...")
out="\n".join([header]+parts+["..."])
out    

'track_id; type; traveled_d; avg_speed; lat; lon; speed; lon_acc; lat_acc; time\n1; Car; 48.85; 9.770344;\n 37.977391; 23.737688; 4.9178; 0.0518; -0.0299; 0.000000;\n 37.977391; 23.737688; 4.9207; -0.0124; -0.0354; 0.040000;\n 37.977391; 23.737688; 4.9160; -0.0519; -0.0413; 0.080000;\n 37.977390; 23.737688; 4.9057; -0.0914; -0.0478; 0.120000;\n 37.977390; 23.737689; 4.8871; -0.1679; -0.0550; 0.160000;\n ...\n...'

In [6]:
firstline=r"\begin{{verbatim}}{0:s}\end{{verbatim}}".format(out)
with open("firstlines.tex","w") as f:
    f.write(firstline)

In [7]:
headerlist=header.replace(" ","").strip(";").split(";")
datalist=data_raw.replace(" ","").strip(";").split(";")
datalist=list(map(lambda x : pandas.to_numeric(x,errors='ignore'),datalist))
print(headerlist)
print(datalist[:10])

['track_id', 'type', 'traveled_d', 'avg_speed', 'lat', 'lon', 'speed', 'lon_acc', 'lat_acc', 'time']
[1, 'Car', 48.85, 9.770344, 37.977391, 23.737688000000002, 4.9178, 0.0518, -0.0299, 0.0]


In [8]:
BREAK=4
(aggregate_header,observation_header)=(headerlist[:BREAK],headerlist[BREAK:])
(aggregate_data,observation_data)=(datalist[:BREAK],datalist[BREAK:])
aggregate=pandas.Series(aggregate_data,index=aggregate_header)
aggregate

track_id            1
type              Car
traveled_d      48.85
avg_speed     9.77034
dtype: object

In [9]:
aggregate.to_latex("aggregate.tex",bold_rows=True,header=False)

In [10]:
observation_frame=pandas.DataFrame(numpy.array(observation_data).reshape(-1,len(observation_header)),columns=observation_header)
observation_frame=observation_frame.set_index("time",drop=True,append=False)
dt=datetime.datetime.strptime(csv_fname[:8]+csv_fname[12:16],"%Y%m%d%H%M")
observation_frame.index=observation_frame.index.map(lambda t: dt+datetime.timedelta(seconds=t))
observation_frame.index=pandas.MultiIndex.from_product([[aggregate["track_id"]],[aggregate["type"]],observation_frame.index],names=["track_id","type","time"])

In [11]:
observation_frame.head(N_observations)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lat,lon,speed,lon_acc,lat_acc
track_id,type,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Car,2018-10-24 08:30:00.000,37.977391,23.737688,4.9178,0.0518,-0.0299
1,Car,2018-10-24 08:30:00.040,37.977391,23.737688,4.9207,-0.0124,-0.0354
1,Car,2018-10-24 08:30:00.080,37.977391,23.737688,4.916,-0.0519,-0.0413
1,Car,2018-10-24 08:30:00.120,37.97739,23.737688,4.9057,-0.0914,-0.0478
1,Car,2018-10-24 08:30:00.160,37.97739,23.737689,4.8871,-0.1679,-0.055


In [12]:
observation_frame.head(N_observations).to_latex("observationframe.tex",bold_rows=True)