Table RAS45003
Reported road casualties by severity (estimates): Great Britain, quarterly and annual 

In [1]:
from databaker.framework import *
import pandas as pd 
from io import BytesIO
import pyexcel
from pyexcel_ods import get_data

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/679968/ras45003.ods'

https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/679968/ras45003.ods

In [3]:
ods_file = BytesIO(session.get(inputURL).content)

In [4]:
ods_file

<_io.BytesIO at 0x251173b5570>

In [5]:
data = pyexcel.get_sheet(file_content=ods_file, file_type='ods', sheet_name='ras45003', start_row = 6,
                         row_limit = 12, start_column = 0, column_limit = 7)

In [6]:
data

ras45003:
+-------------------+---+--------+-------------------+----------+------------+--------------------------+
| Year and quarter  |   | Killed | Killed or         | Slightly | All        | Motor traffic 1          |
|                   |   |        | seriously injured | injured  | casualties | (billion vehicle miles)  |
+-------------------+---+--------+-------------------+----------+------------+--------------------------+
| 2010-2014 average |   | 1798.6 | 24455.6           | 172838   | 197293.6   | 304.99                   |
+-------------------+---+--------+-------------------+----------+------------+--------------------------+
| 2007              |   | 2946   | 30720             | 217060   | 247780     | 314.07                   |
+-------------------+---+--------+-------------------+----------+------------+--------------------------+
| 2008              |   | 2538   | 28572             | 202333   | 230905     | 311.04                   |
+-------------------+---+--------+--

In [7]:
df = pd.DataFrame(data.get_array())
df

Unnamed: 0,0,1,2,3,4,5,6
0,Year and quarter,,Killed,Killed or\nseriously injured,Slightly\ninjured,All\ncasualties,Motor traffic 1\n(billion vehicle miles)
1,2010-2014 average,,1798.6,24455.6,172838,197294,304.99
2,2007,,2946,30720,217060,247780,314.07
3,2008,,2538,28572,202333,230905,311.04
4,2009,,2222,26912,195234,222146,308.09
5,2010,,1850,24510,184138,208648,303.19
6,2011,,1901,25023,178927,203950,303.78
7,2012,,1754,24793,170930,195723,302.64
8,2013,,1713,23370,160300,183670,303.7
9,2014,,1775,24582,169895,194477,311.62


In [8]:
observations = df.iloc[:, :]
observations.rename(columns= observations.iloc[0], inplace=True)
observations.drop(observations.index[0], inplace = True)

In [9]:
observations.head()

Unnamed: 0,Year and quarter,Unnamed: 2,Killed,Killed or seriously injured,Slightly injured,All casualties,Motor traffic 1 (billion vehicle miles)
1,2010-2014 average,,1798.6,24455.6,172838,197294,304.99
2,2007,,2946.0,30720.0,217060,247780,314.07
3,2008,,2538.0,28572.0,202333,230905,311.04
4,2009,,2222.0,26912.0,195234,222146,308.09
5,2010,,1850.0,24510.0,184138,208648,303.19


In [10]:
list(observations)

['Year and quarter',
 '',
 'Killed',
 'Killed or\nseriously injured',
 'Slightly\ninjured',
 'All\ncasualties',
 'Motor traffic 1\n(billion vehicle miles) ']

In [11]:
observations.drop([''], axis = 1, inplace =True)

In [12]:
list(observations)

['Year and quarter',
 'Killed',
 'Killed or\nseriously injured',
 'Slightly\ninjured',
 'All\ncasualties',
 'Motor traffic 1\n(billion vehicle miles) ']

In [13]:
observations.columns.values[0] = 'Year'
observations.columns.values[1] = 'Killed'
observations.columns.values[2] = 'Killed or seriously injured'
observations.columns.values[3] = 'Slightly injured'
observations.columns.values[4] = 'All casualties'
observations.columns.values[5] = 'Motor traffic(billion vehicle miles)'

In [14]:
list(observations)

['Year',
 'Killed',
 'Killed or seriously injured',
 'Slightly injured',
 'All casualties',
 'Motor traffic(billion vehicle miles)']

In [15]:
observations.head()

Unnamed: 0,Year,Killed,Killed or seriously injured,Slightly injured,All casualties,Motor traffic(billion vehicle miles)
1,2010-2014 average,1798.6,24455.6,172838,197294,304.99
2,2007,2946.0,30720.0,217060,247780,314.07
3,2008,2538.0,28572.0,202333,230905,311.04
4,2009,2222.0,26912.0,195234,222146,308.09
5,2010,1850.0,24510.0,184138,208648,303.19


In [16]:
new_table = pd.melt(observations,
                       ['Year'], var_name="Reported road casualties",
                       value_name="Value")

In [17]:
new_table.count()

Year                        55
Reported road casualties    55
Value                       55
dtype: int64

In [18]:
new_table['Unit'] = 'Number'
new_table['Measure Type'] = 'People'

In [19]:
new_table

Unnamed: 0,Year,Reported road casualties,Value,Unit,Measure Type
0,2010-2014 average,Killed,1798.6,Number,People
1,2007,Killed,2946.0,Number,People
2,2008,Killed,2538.0,Number,People
3,2009,Killed,2222.0,Number,People
4,2010,Killed,1850.0,Number,People
5,2011,Killed,1901.0,Number,People
6,2012,Killed,1754.0,Number,People
7,2013,Killed,1713.0,Number,People
8,2014,Killed,1775.0,Number,People
9,2015,Killed,1730.0,Number,People


In [20]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('RAS45003.csv'), index = False)