# **Prediction of window use case using generic markov model**

## Section 1 : Imports

In [1]:
from generic_markov import generic_markov,evaluate_mk
from data_parsing import create_cluster_map, create_window_dataframe, parse_app_data

import numpy as np

import plotly.graph_objects as go

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure

from IPython.display import display
import os

USE_APP_DATA = True

## Section 2 : Data Generation (Optional)

This section is only used if no app data is available, random data is generated :
- Picking a coordinates using a biased distribution of gps areas
- Adding a time, picked randomly in [9:00 ; 10:10] or [17:00 ; 18:10]
- Each having their own probability of opening/closing the window

In [2]:
if not USE_APP_DATA:
    import random
    import csv
    from random import randint
    from data_parsing import format_time,parse_csv


    RANGE = 100
    window_state = []
    days = [randint(0, 6) for x in range(RANGE)]
    adresses_polygon = [[43.575414, 1.364311, 43.575223, 1.364048],
                        [43.579395, 1.378290, 43.579204, 1.378027],
                        [43.597612, 1.433209, 43.597421, 1.432946],
                        [43.594434, 1.465131, 43.594243, 1.464868],
                        [43.583149, 1.450255, 43.582958, 1.449992]]

    for k in range(10):
        rand = np.random.choice(list(range(5)), RANGE, p=[0.05, 0.5, 0.1, 0.1, 0.25])
        adresses = [[random.uniform(adresses_polygon[i][2], adresses_polygon[i][0]),
                     random.uniform(adresses_polygon[i][3], adresses_polygon[i][1])]
                    for i in rand]

        Starting_hours = np.linspace(9, 10.15)
        Stopping_hours = np.linspace(17, 18.15)

        possible_times = np.concatenate([Starting_hours, Stopping_hours])
        random_times = np.random.choice(possible_times, RANGE)
        Time = []

        FMT = '%H:%M:%S'
        for index in range(RANGE):
            Time.append(format_time(random_times[index]))
            if (rand[index] == 0 or rand[index] == 1):
                window_state.append(np.random.choice([0, 1], 1, p=[0.8, 0.2]))
            else:
                window_state.append(np.random.choice([0, 1], 1, p=[0.01, 0.99]))

        with open('/home/celadodc-rswl.com/corentin.tatger/PersoPdata/app_data/dummy_data_{}.csv'.format(k),
                  mode='w') as csv_file:
            fieldnames = ['Coordinates', 'Wd_state', 'Time', 'Day']
            writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

            writer.writeheader()
            for i in range(RANGE):
                writer.writerow({'Coordinates': adresses[i], 'Wd_state': window_state[i][0],
                                 'Time': Time[i], 'Day': days[i]})

    df_csv = parse_csv(
        "/home/celadodc-rswl.com/corentin.tatger/PersoPdata/app_data/")
    df_window = create_window_dataframe(df_csv)
    mk_window = generic_markov(df_window)
    


## Section 3 : App data parsing

If data from the android app is available, parses through it to generate and train the model.

In [3]:
if USE_APP_DATA:
    directory = 'json_data'
    mk_window = generic_markov()
    for file in os.listdir(directory):
        f = os.path.join(directory, file)
        df_app = parse_app_data(f)
        df_window = create_window_dataframe(df_app,verbose=False)
        for row_id in range(0, len(df_window)):
            mk_window.fit(df_window.iloc[[row_id]])
    display(mk_window.data_frame)

Unnamed: 0,Coordinates,Wd_change,Time,Time_delta,Day,Window_cluster,Coord_cluster,Start_cluster,End_cluster
0,"[43.53937, 1.39531]",Opened,14:24:40,NaT,0,2,2,3,2
1,"[43.54074, 1.39667]",Closed,14:24:51,0:00:10,0,3,2,2,3
2,"[43.54193, 1.39781]",Opened,14:25:01,NaT,0,2,2,3,2
3,"[43.54273, 1.39866]",Closed,14:25:06,0:00:10,0,3,2,2,3
4,"[43.54437, 1.40048]",Opened,14:25:17,NaT,0,2,2,3,2
5,"[43.54455, 1.4006733333333334]",Closed,14:25:19,0:00:11,0,3,2,2,3
6,"[43.55012, 1.40621]",Opened,14:25:59,NaT,0,4,4,5,4
7,"[43.55054, 1.40659]",Closed,14:26:01,0:00:40,0,5,4,4,5
8,"[43.54089166666667, 1.3968266666666667]",Opened,04:44:11,NaT,2,2,2,3,2
9,"[43.541045, 1.396985]",Closed,04:44:13,0:00:37,2,3,2,2,3


## Section 4 : Display clusters on a map

To improve readability and make debugging easier, a function displaying the clusters has been implemented. We use it here to check the if the clusters created using our dataset are logical. We call this function with an option to display data points considered a noise by the clustering algorithm, again to check for errors

In [4]:

    
    c_map = create_cluster_map(mk_window.data_frame,display_noise=True)
    c_map

Map(center=[43.53937, 1.39531], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'z…

## Section 5 : Evaluating the model performances

Using functions from the generic_markov library, we evaluate the model's accuracy

In [5]:
epoch_acc = []
for i in range(10):
    df_window = df_window.sample(frac=1)
    df_train = df_window.sample(frac=0.7)
    df_test = df_window.drop(df_train.index)

    Mk_chain = generic_markov(df_train.head())
    current_acc = [evaluate_mk(df_test, Mk_chain)]

    for row_id in range(0, len(df_train)):
        Mk_chain.fit(df_train.iloc[[row_id]])
        current_acc.append(evaluate_mk(df_test, Mk_chain))

    epoch_acc.append(current_acc)
    print("Epoch {} done.".format(i))

# Create figure
gofig = go.Figure()
# Add traces, one for each slider step
for step in range(10):
    gofig.add_trace(
        go.Scatter(
            visible=False,
            line=dict(color="#00CED1", width=6),
            name="v = " + str(step),
            x=np.arange(0, len(df_train), 1),
            y=epoch_acc[step]))
    gofig.update_yaxes(range=[0, 100])

# Make 10th trace visible
gofig.data[0].visible = True

# Create and add slider
steps = []
for i in range(len(gofig.data)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(gofig.data)},
                {"title": "Evolution of Markov Accuracy's"}],  # layout attribute
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=10,
    currentvalue={"prefix": "Frequency: "},
    pad={"t": 50},
    steps=steps
)]

gofig.update_layout(
    sliders=sliders
)
gofig.show()

Epoch 0 done.
Epoch 1 done.
Epoch 2 done.
Epoch 3 done.
Epoch 4 done.
Epoch 5 done.
Epoch 6 done.
Epoch 7 done.
Epoch 8 done.
Epoch 9 done.
