## Simulator to run Markov Prediction over some datasets

In [2]:
import os
import numpy as np
import pandas as pd
from main.utils.predictor import TransitionMatrix, MarkovChainPredictor, get_state, min_max_scaler
from predict.settings import DATASET_DIR

In [3]:
def read_data(data):
    return pd.read_csv(os.path.join(DATASET_DIR, f"{data}.csv"))

In [26]:
data = read_data("data2")    

In [27]:
data.describe()

Unnamed: 0,id,Rainfall,WaterLevel
count,232.0,232.0,232.0
mean,115.5,713.913793,1.0
std,67.116814,785.589103,0.061865
min,0.0,0.3,0.887839
25%,57.75,95.1,0.947593
50%,115.5,405.65,1.006257
75%,173.25,946.325,1.052894
max,231.0,3229.3,1.099573


In [28]:
data["WaterLevel"].values.shape

(232,)

In [29]:
max_height=1.08

In [30]:
def split_test_train(data):
    length = data.shape[0]
    test = int(length * 0.85)
    test_data = data[:test]
    train_data = data[test:]
    return pd.DataFrame({'WaterLevel': test_data}), pd.DataFrame({'WaterLevel': train_data})

In [31]:
train_data, test_data = split_test_train(data["WaterLevel"].values)

In [32]:
transition_matrix = TransitionMatrix(train_data, max_height=max_height)

In [33]:
transition_matrix.generate()

array([[0.99186992, 0.00813008, 0.        ],
       [0.02222222, 0.95555556, 0.02222222],
       [0.        , 0.03571429, 0.96428571]])

In [34]:
pd.DataFrame(transition_matrix.transitions)

Unnamed: 0,normal,almost_flooded,flooded
normal,122,1,0
almost_flooded,1,43,1
flooded,0,1,27


In [35]:
current_state = get_state(train_data["WaterLevel"].values[-1])

In [47]:
predictor = MarkovChainPredictor(transition_matrix.values, transition_matrix.states)
no_predictions = 15

In [48]:
predictions = predictor.generate_states(current_state, no_predictions=no_predictions)

In [49]:
test_data["WaterLevel"] = min_max_scaler(test_data["WaterLevel"].values, max_height)
criteria = [test_data['WaterLevel'].le(0.75), 
            test_data['WaterLevel'].between(0.75, 0.98), 
            test_data['WaterLevel'].ge(0.98)]

values = ["normal", "almost_flooded", "flooded"]
test_data['state'] = np.select(criteria, values, 0)

In [50]:
difference = pd.DataFrame({"Prediction": predictions, "Actual_state": test_data["state"][:no_predictions]})

In [51]:
difference.query('Prediction != Actual_state').shape

(0, 2)

In [54]:
difference

Unnamed: 0,Prediction,Actual_state
0,normal,normal
1,normal,normal
2,normal,normal
3,normal,normal
4,normal,normal
5,normal,normal
6,normal,normal
7,normal,normal
8,normal,normal
9,normal,normal


In [52]:
accuracy = (difference.shape[0] - difference.query('Prediction != Actual_state').shape[0])/difference.shape[0]

In [53]:
accuracy

1.0