In [41]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
import datetime as dt


In [42]:
path = "..\..\data\DEFRA.csv"
df = pd.read_csv(path,parse_dates=True,skiprows=3)


In [43]:
#gets the name/location of each sensor
sensorkeys = []
for col in df.columns:
    if "Birmingham" in col:
       sensorkeys.append((str(col)))

df = pd.read_csv(path,parse_dates=True,skiprows=4)

In [44]:
#merging date and time + setting new index
df['Time'] = df['Time'].replace('24:00:00', '00:00')
df['DateTime']= pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)
df.set_index('DateTime',inplace=True)
df.drop(['Date','Time'], axis=1,inplace=True)

#remove Nat Rows
df = df[:-4]

In [45]:
x = dt.datetime.strptime("00:00:00","%H:%M:%S").time()

#intialise empty dataframe
tempdf = pd.DataFrame

#subset midnight rows into new dataframe
tempdf = df.loc[df.index.time == x]
#drop midnight values from original 
df.drop(tempdf.index,inplace=True)
#increment day of midnights 
tempdf = tempdf.shift(1, freq='D')
#join the datframes together again
df = df.append(tempdf,verify_integrity=False)


In [48]:
columns = df.columns.to_list()
tempdf = df.filter(columns[:14])
tempdf2 = df.filter(columns[14:])

tempdf2.columns = tempdf.columns

In [49]:
df = pd.DataFrame #intialise empty dataframe

#put each sensor data list into one concatenated dataframe
df = pd.concat([tempdf,tempdf2], keys= sensorkeys)


# dropping status columns (these columns just contain the unit of measure)
unitsList = []
for col in df.columns:
    if "Status" in col:
        unitsList.append(df[col][0])
        df.drop(col,axis=1,inplace=True)

columns = []
for i in range(len(df.columns)):
    col = df.columns[i]
    columns.append(col + " [" + unitsList[i] + "]")

#renaming the columns with the units of measure
df.columns = columns

#replace strings for Null values with NaN 
df = df.replace("No data", np.nan)

#sort the datafarame index by datetime
df.sort_index(inplace=True)

# Time Series graphs

In [53]:
import plotly.graph_objects as go

In [54]:
# Create figure
fig = go.Figure()

#look for each sensor data by its key
for key in sensorkeys:
    df_t = df.loc[str(key)]
    fig.add_trace(go.Scatter(x=list(df_t.index), y=list(df_t["Ozone [V ugm-3]"]), name= str(key)))
    break

# Set title
fig.update_layout(
    title_text="Time series for Ozone [V ugm-3] measurements from DEFRA"
)

# Add range slider
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1d",
                     step="day",
                     stepmode="backward"),
                dict(count=7,
                     label="1w",
                     step="day",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    ),
    yaxis=dict(
       autorange = True,
       fixedrange= False
   )
    
)

fig.show()