In [None]:
pip install -U kaleido

In [None]:
#Library Imports
import dash
from dash import dcc, html, Input, Output, State
import plotly.graph_objs as go
import pandas as pd
import plotly.io as pio
from io import BytesIO
from sqlalchemy import create_engine


#Database Connection Setup
username = 'postgres'
password = 'password'
database = 'data_cleaning_chapter'
host = 'localhost'
database_url = f'postgresql://{username}:{password}@{host}/{database}'
engine = create_engine(database_url)

In [None]:
query = '''
SELECT MMSI, t AS Timestamp, SOG, COG, Heading
FROM AISInputSample
WHERE MMSI IN  (246541000, 636018799,311001076,304111000, 211269660, 219014579,219019011,259896000)  
ORDER BY MMSI, t;
'''
df = pd.read_sql_query(query, engine)
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%d/%m/%Y %H:%M:%S')
df

In [None]:
app = dash.Dash(__name__)
app.layout = html.Div([
  dcc.Dropdown(
    id='mmsi-dropdown',
    options=[{'label': i, 'value': i} for i in df['mmsi'].unique()],
    value=df['mmsi'].unique()[0] ),
    dcc.Graph(id='time-series-plot'),
    html.Button("Download as PDF", id="download-btn"),
    # pdf vector figures for the book
    dcc.Download(id="download-pdf") 
])

In [None]:
@app.callback(
  Output('time-series-plot', 'figure'),
  [Input('mmsi-dropdown', 'value')] )
def update_graph(selected_mmsi):
  filtered_df = df[df['mmsi'] == selected_mmsi]
  scaled_sog = filtered_df['sog'] * 5
  return {
    'data': [
      go.Scatter(
        x=filtered_df['timestamp'], y=scaled_sog,
          mode='lines', name='Scaled SOG (x5)' ),
      go.Scatter(
        x=filtered_df['timestamp'], y=filtered_df['cog'],
          mode='lines', name='COG' ),
      go.Scatter(
        x=filtered_df['timestamp'], y=filtered_df['heading'],
          mode='lines', name='Heading' ) ],
        'layout': go.Layout(
            xaxis_title='Timestamp',
            yaxis_title='Value',
            margin={'l': 80, 'b': 140, 't': 50, 'r': 10},
            font=dict(
                family="Times New Roman",
                size=18,
                color= "black"
            ),
            autosize=False,
            width=950,
            height=400,
            hovermode='closest',
            transition={'duration': 500} ) }


In [None]:
# pdf vector figures for the book
@app.callback(
    Output("download-pdf", "data"),
    Input("download-btn", "n_clicks"),
    State("time-series-plot", "figure"),
    prevent_initial_call=True
)
def download_pdf(n_clicks, fig):
    pdf_bytes = BytesIO()
    # Write image as a single-page PDF using Kaleido
    pio.write_image(fig, pdf_bytes, format="pdf", engine="kaleido", 
                    width=980, height=410)  # Match figure size exactly
    pdf_bytes.seek(0)
    return dcc.send_bytes(pdf_bytes.read(), "plot.pdf")

In [None]:
if __name__ == '__main__':
  app.run_server(debug=True)

In [None]:
# Define the window size for smoothing
window_size = 10  # Higer values result in a smoother singal
# Apply rolling mean
df['sog_mean_smoothed'] = df['sog'].rolling(window=window_size, center=True).mean()
df['sog_median_smoothed'] = df['sog'].rolling(window=window_size, center=True).median()
df['cog_mean_smoothed'] = df['cog'].rolling(window=window_size, center=True).mean()
df['cog_median_smoothed'] = df['cog'].rolling(window=window_size, center=True).median()
df['heading_mean_smoothed'] = df['heading'].rolling(window=window_size, center=True).mean()
df['heading_median_smoothed'] = df['heading'].rolling(window=window_size, center=True).median()


In [None]:
app = dash.Dash(__name__)
# App layout
app.layout = html.Div([
dcc.Dropdown(
  id='mmsi-dropdown',
  options=[{'label': i, 'value': i} for i in df['mmsi'].unique()],
    value=df['mmsi'].unique()[0] ),
    dcc.Graph(id='time-series-plot'),
    html.Button("Download as PDF", id="download-btn"),
    dcc.Download(id="download-pdf")])

In [None]:
# Initialize Dash app\n",
app = dash.Dash(__name__)
# App layout
app.layout = html.Div([
html.Div([
  dcc.Dropdown(
   id='mmsi-dropdown',
   options=[{'label': i, 'value': i} for i in df['mmsi'].unique()],
   value=df['mmsi'].unique()[0],
  style={'width': '48%', 'display': 'inline-block'}\
  ),
  dcc.Dropdown(
     id='signal-dropdown',
      options=[
      {'label': 'SOG', 'value': 'sog'},
      {'label': 'COG', 'value': 'cog'},
      {'label': 'Heading', 'value': 'heading'}
       ],
      value='sog',
      style={'width': '48%', 'float': 'right', 'display': 'inline-block'}
       ),
      ]),
    dcc.Graph(id='time-series-plot'),
    html.Button("Download as PDF", id="download-btn"),
    dcc.Download(id="download-pdf")
    ])

In [None]:
@app.callback(
    Output('time-series-plot', 'figure'),
    [Input('mmsi-dropdown', 'value'), Input('signal-dropdown', 'value')]
    )
def update_graph(selected_mmsi, selected_signal):
 filtered_df = df[df['mmsi'] == selected_mmsi]
# Assuming 'mean_smoothed' and 'median_smoothed' suffixes for your smoothed data columns
 mean_col = f'{selected_signal}_mean_smoothed'
 median_col = f'{selected_signal}_median_smoothed'
 return {
'data': [
 go.Scatter(
     x=filtered_df['timestamp'],
     y=filtered_df[selected_signal],
     mode='lines',
     name=selected_signal.upper()
  ),
   go.Scatter(
    x=filtered_df['timestamp'],
    y=filtered_df[mean_col],
    mode='lines',
    name=f'{selected_signal.upper()} Mean Smoothed'
    ),
    go.Scatter(
    x=filtered_df['timestamp'],
    y=filtered_df[median_col],
    mode='lines',
    name=f'{selected_signal.upper()} Median Smoothed'
    )
    ],
    'layout': go.Layout(
      xaxis_title='Timestamp',
      yaxis_title=selected_signal.upper(),
      margin={'l': 80, 'b': 140, 't': 50, 'r': 10},
     font=dict(
     family="Times New Roman",
     size=18,
     color= "black"
     ),
    autosize=False,
    width=950,
    height=400,
    hovermode='closest') 
 }
    

In [None]:
@app.callback(
    Output("download-pdf", "data"),
    Input("download-btn", "n_clicks"),
    State("time-series-plot", "figure"),
    prevent_initial_call=True
)
def download_pdf(n_clicks, fig):
    pdf_bytes = BytesIO()
    # Write image as a single-page PDF using Kaleido
    pio.write_image(fig, pdf_bytes, format="pdf", engine="kaleido", width=980, height=410)
    pdf_bytes.seek(0)
    return dcc.send_bytes(pdf_bytes.read(), "plot.pdf")

In [None]:
if __name__ == '__main__':
  app.run_server(debug=True, port=8051)

In [None]:
def detect_outliers(data, column):
  Q1 = data[column].quantile(0.25)
  Q3 = data[column].quantile(0.75)
  IQR = Q3 - Q1
  lower_bound = Q1 - 1.5 * IQR
  upper_bound = Q3 + 1.5 * IQR
  # Return a boolean Series where True indicates the presence of an outlier
  return (data[column] < lower_bound) | (data[column] > upper_bound)
df['sog_outliers'] = detect_outliers(df, 'sog')
df['cog_outliers'] = detect_outliers(df, 'cog')
df['heading_outliers'] = detect_outliers(df, 'heading')

In [None]:
df