In [83]:
import os
import numpy as np
import pandas as pd
from celery import shared_task
from django.conf import settings
from django.utils import timezone
from main.models import Record
import pprint
from predict.settings import DATASET_DIR

In [84]:
class States:
    """ A Collection of all States """
    NORMAL = 'normal'
    ALMOST_FLOODED = 'almost_flooded'
    FLOODED = 'flooded'


In [85]:
def get_state(value):
    """ get the state for which value is in """
    norm_value = value/settings.DRAIN_HEIGHT
    if norm_value < 0.75:
        return States.NORMAL
    elif 0.75 < norm_value < 0.98:
        return States.ALMOST_FLOODED
    else:
        return States.FLOODED



In [86]:
def min_max_scaler(X, max_height):
    """ Return min-max scaler for the dataframe """
    X_std = (X - X.min(axis=0)) / (max_height - X.min(axis=0))
    return X_std * (1 -0) + 0


In [87]:
max_height = settings.DRAIN_HEIGHT

In [88]:
# Generate a Numpy Array of Records
water_level_entries = np.array(Record.objects.values_list('water_level', flat=True))


In [89]:
# data = pd.DataFrame({'WaterLevel': water_level_entries})
data = "data1"
data = pd.read_csv(os.path.join(DATASET_DIR, f"{data}.csv"))

In [90]:
data

Unnamed: 0,date,Rainfall,Dam cap,WaterLevel,State
0,1957-01-02 00:00:00,17.2,2763.966,32.516000,0
1,1957-04-02 01:00:00,477.1,2763.966,32.616000,0
2,1957-08-02 02:00:00,2107.4,2763.966,32.716000,3513
3,1957-12-02 03:00:00,501.5,2763.966,32.816000,0
4,1958-01-02 04:00:00,23.7,2763.966,32.916000,0
...,...,...,...,...,...
227,2012-12-11 11:00:00,431.8,2171.387,29.035649,0
228,2013-01-11 12:00:00,14.9,2900.232,29.000000,0
229,2013-04-11 13:00:00,364.5,3346.624,29.000000,0
230,2013-08-11 14:00:00,2164.8,3346.624,29.000000,11087


### Initialize the necessary Variables

In [91]:
# Normalize Water Level data using MinMax Algorithm
data["WaterLevel"] = min_max_scaler(data["WaterLevel"].values, 34.5)

In [92]:
criteria = [
        data['WaterLevel'].le(0.55), 
        data['WaterLevel'].between(0.55, 0.80), 
        data['WaterLevel'].between(0.80, 0.98), 
        data['WaterLevel'].ge(0.98)]

In [93]:
# States can be Low, Normal, Almost Flooded and Flooded
states = {"L": "low", "N": "normal", "A": "almost_flooded", "F": "flooded"}
data['state'] = np.select(criteria, list(states.keys()), "N")
data["next_state"] = data["state"].shift()
data["next_next_state"] = data["next_state"].shift()

In [94]:
data

Unnamed: 0,date,Rainfall,Dam cap,WaterLevel,State,state,next_state,next_next_state
0,1957-01-02 00:00:00,17.2,2763.966,0.639273,0,N,,
1,1957-04-02 01:00:00,477.1,2763.966,0.657455,0,N,N,
2,1957-08-02 02:00:00,2107.4,2763.966,0.675636,3513,N,N,N
3,1957-12-02 03:00:00,501.5,2763.966,0.693818,0,N,N,N
4,1958-01-02 04:00:00,23.7,2763.966,0.712000,0,N,N,N
...,...,...,...,...,...,...,...,...
227,2012-12-11 11:00:00,431.8,2171.387,0.006482,0,L,L,L
228,2013-01-11 12:00:00,14.9,2900.232,0.000000,0,L,L,L
229,2013-04-11 13:00:00,364.5,3346.624,0.000000,0,L,L,L
230,2013-08-11 14:00:00,2164.8,3346.624,0.000000,11087,L,L,L


In [95]:
prob_state = np.array([])
for i in states.keys():
    prob_state = np.append(prob_state, [data.loc[data['state'] == i].shape[0],])

In [98]:
prob = prob_state/prob_state.sum()

In [99]:
prob

array([0.37931034, 0.20689655, 0.1637931 , 0.25      ])

In [100]:
next_state_probs = {k: states.copy() for k in states.keys()}

In [115]:
next_state_probs

{'L': {'L': 87, 'N': 1, 'A': 0, 'F': 0},
 'N': {'L': 0, 'N': 46, 'A': 1, 'F': 0},
 'A': {'L': 0, 'N': 1, 'A': 36, 'F': 1},
 'F': {'L': 0, 'N': 0, 'A': 1, 'F': 57}}

In [102]:
# Check for transitions between states and store count
for i in states:
    for j in states:
        next_state_probs[i][j] = data[
                (data["state"] == i) & (data["next_state"] == j)].shape[0]

In [103]:
pd.DataFrame(next_state_probs)

Unnamed: 0,L,N,A,F
L,87,0,0,0
N,1,46,1,0
A,0,1,36,1
F,0,0,1,57


In [104]:
# Calculate the Probability of Transition based on data from transitions
df = pd.DataFrame(next_state_probs)
for i in range(df.shape[0]):
    df.iloc[i] = df.iloc[i]/df.iloc[i].sum()
print(df.values)


[[1.         0.         0.         0.        ]
 [0.02083333 0.95833333 0.02083333 0.        ]
 [0.         0.02631579 0.94736842 0.02631579]
 [0.         0.         0.01724138 0.98275862]]


In [105]:
next_next_state_prob = {x: {k: states.copy() for k in states.keys()} for x in states.keys()}

In [114]:
pprint.PrettyPrinter(indent=2).pprint(next_next_state_prob)

{ 'A': { 'A': {'A': 34, 'F': 1, 'L': 0, 'N': 1},
         'F': {'A': 0, 'F': 1, 'L': 0, 'N': 0},
         'L': {'A': 0, 'F': 0, 'L': 0, 'N': 0},
         'N': {'A': 0, 'F': 0, 'L': 0, 'N': 1}},
  'F': { 'A': {'A': 1, 'F': 0, 'L': 0, 'N': 0},
         'F': {'A': 1, 'F': 56, 'L': 0, 'N': 0},
         'L': {'A': 0, 'F': 0, 'L': 0, 'N': 0},
         'N': {'A': 0, 'F': 0, 'L': 0, 'N': 0}},
  'L': { 'A': {'A': 0, 'F': 0, 'L': 0, 'N': 0},
         'F': {'A': 0, 'F': 0, 'L': 0, 'N': 0},
         'L': {'A': 0, 'F': 0, 'L': 86, 'N': 1},
         'N': {'A': 0, 'F': 0, 'L': 0, 'N': 1}},
  'N': { 'A': {'A': 1, 'F': 0, 'L': 0, 'N': 0},
         'F': {'A': 0, 'F': 0, 'L': 0, 'N': 0},
         'L': {'A': 0, 'F': 0, 'L': 0, 'N': 0},
         'N': {'A': 1, 'F': 0, 'L': 0, 'N': 44}}}


In [111]:
for i in states:
    for j in states:
        for k in states:
            next_next_state_prob[i][j][k] = data[
                (data["state"] == i) & (data["next_state"] == j) & (data["next_next_state"] == k)
            ].shape[0]

In [113]:
pd.DataFrame(next_next_state_prob)

Unnamed: 0,L,N,A,F
L,"{'L': 86, 'N': 1, 'A': 0, 'F': 0}","{'L': 0, 'N': 0, 'A': 0, 'F': 0}","{'L': 0, 'N': 0, 'A': 0, 'F': 0}","{'L': 0, 'N': 0, 'A': 0, 'F': 0}"
N,"{'L': 0, 'N': 1, 'A': 0, 'F': 0}","{'L': 0, 'N': 44, 'A': 1, 'F': 0}","{'L': 0, 'N': 1, 'A': 0, 'F': 0}","{'L': 0, 'N': 0, 'A': 0, 'F': 0}"
A,"{'L': 0, 'N': 0, 'A': 0, 'F': 0}","{'L': 0, 'N': 0, 'A': 1, 'F': 0}","{'L': 0, 'N': 1, 'A': 34, 'F': 1}","{'L': 0, 'N': 0, 'A': 1, 'F': 0}"
F,"{'L': 0, 'N': 0, 'A': 0, 'F': 0}","{'L': 0, 'N': 0, 'A': 0, 'F': 0}","{'L': 0, 'N': 0, 'A': 0, 'F': 1}","{'L': 0, 'N': 0, 'A': 1, 'F': 56}"


In [116]:
data

Unnamed: 0,date,Rainfall,Dam cap,WaterLevel,State,state,next_state,next_next_state
0,1957-01-02 00:00:00,17.2,2763.966,0.639273,0,N,,
1,1957-04-02 01:00:00,477.1,2763.966,0.657455,0,N,N,
2,1957-08-02 02:00:00,2107.4,2763.966,0.675636,3513,N,N,N
3,1957-12-02 03:00:00,501.5,2763.966,0.693818,0,N,N,N
4,1958-01-02 04:00:00,23.7,2763.966,0.712000,0,N,N,N
...,...,...,...,...,...,...,...,...
227,2012-12-11 11:00:00,431.8,2171.387,0.006482,0,L,L,L
228,2013-01-11 12:00:00,14.9,2900.232,0.000000,0,L,L,L
229,2013-04-11 13:00:00,364.5,3346.624,0.000000,0,L,L,L
230,2013-08-11 14:00:00,2164.8,3346.624,0.000000,11087,L,L,L


In [121]:
data.tail(90)

Unnamed: 0,date,Rainfall,Dam cap,WaterLevel,State,state,next_state,next_next_state
142,1992-08-07 22:00:00,2392.6,2842.085,0.557428,18921,N,N,N
143,1992-12-07 23:00:00,582.0,2842.086,0.550947,0,N,N,N
144,1992-01-08 00:00:00,17.8,2842.087,0.544465,0,L,N,N
145,1992-04-08 01:00:00,245.6,2842.088,0.537983,0,L,L,N
146,1993-08-08 02:00:00,1823.1,2842.089,0.531501,18921,L,L,L
...,...,...,...,...,...,...,...,...
227,2012-12-11 11:00:00,431.8,2171.387,0.006482,0,L,L,L
228,2013-01-11 12:00:00,14.9,2900.232,0.000000,0,L,L,L
229,2013-04-11 13:00:00,364.5,3346.624,0.000000,0,L,L,L
230,2013-08-11 14:00:00,2164.8,3346.624,0.000000,11087,L,L,L


In [122]:
df = pd.DataFrame(next_next_state_prob)
for i in range(df.shape[0]):
    print(i)
    df.iloc[i] = df.iloc[i]/df.iloc[i].sum()
print(df.values)


0


TypeError: unsupported operand type(s) for +: 'dict' and 'dict'