In [201]:
import os
import random
from django.conf import settings
import numpy as np
import pandas as pd

In [202]:
definition = [('0', "Drought"), ('2', "Almost Drought"), ('3', 'Normal')]

In [203]:
datapath = "/clean_flood.csv"

In [204]:
data = pd.read_csv(settings.DATASET_DIR + datapath, dtype={'State': str})

In [205]:
data["Rainfall"] = data["Rainfall"]/data["Rainfall"].mean() - (np.random.rand() - 0.1)

In [206]:
data["Rainfall"].describe()

count    232.000000
mean       0.166389
std        0.061865
min        0.054228
25%        0.113983
50%        0.172646
75%        0.219283
max        0.265962
Name: WaterLevel, dtype: float64

In [207]:
criteria = [data['Rainfall'].le(0.25), data['Rainfall'].between(0.25, 0.45), data['Rainfall'].ge(0.45)]
values = [0, 1, 2]

In [208]:
data['state'] = np.select(criteria, values, 0)

In [209]:
data.describe()

Unnamed: 0,id,Rainfall,WaterLevel,state
count,232.0,232.0,232.0,232.0
mean,115.5,713.913793,0.166389,0.0
std,67.116814,785.589103,0.061865,0.0
min,0.0,0.3,0.054228,0.0
25%,57.75,95.1,0.113983,0.0
50%,115.5,405.65,0.172646,0.0
75%,173.25,946.325,0.219283,0.0
max,231.0,3229.3,0.265962,0.0


In [220]:
data["state"] = data["state"].map(lambda x: random.choice([0, 1, 2, 1, 2, 0, 1]))

In [221]:
data["next_state"] =  data["state"].shift()

In [223]:
data.head()

Unnamed: 0,id,date,Rainfall,WaterLevel,state,next_state
0,0,1957-01-02 00:00:00,17.2,0.161871,2,
1,1,1957-04-02 01:00:00,477.1,0.164932,0,2.0
2,2,1957-08-02 02:00:00,2107.4,0.167994,0,0.0
3,3,1957-12-02 03:00:00,501.5,0.171055,1,0.0
4,4,1958-01-02 04:00:00,23.7,0.174117,1,1.0


In [226]:
states = {"drought": 0, "almost_drought":1, "normal": 2}
transitions =  {"drought": {}, "almost_drought":{}, "normal": {}}

In [227]:
for i in states.items():
    for j in states.items():
        transitions[i[0]][j[0]] = data[(data["state"] == i[1]) & (data["next_state"] == j[1])].shape[0]
    

In [228]:
transitions

{'normal': {'normal': 18, 'almost_flooded': 29, 'flooded': 22},
 'almost_flooded': {'normal': 32, 'almost_flooded': 50, 'flooded': 22},
 'flooded': {'normal': 19, 'almost_flooded': 25, 'flooded': 14}}

In [229]:
df = pd.DataFrame(transitions)

In [230]:
df.shape[0]

3

In [231]:
for i in range(df.shape[0]):
    df.iloc[i] = df.iloc[i]/df.iloc[i].sum()

In [232]:
transition_matrix = df.values

In [233]:
transition_matrix

array([[0.26086957, 0.46376812, 0.27536232],
       [0.27884615, 0.48076923, 0.24038462],
       [0.37931034, 0.37931034, 0.24137931]])

In [234]:
np.atleast_2d(transition_matrix)

array([[0.26086957, 0.46376812, 0.27536232],
       [0.27884615, 0.48076923, 0.24038462],
       [0.37931034, 0.37931034, 0.24137931]])

In [237]:
states = list(states.keys())

In [238]:
index_dict = {states[index]: index for index in range(len(states))}

In [239]:
state_dict = {index: states[index] for index in                           range(len(states))}

In [240]:
state_dict, index_dict

({0: 'normal', 1: 'almost_flooded', 2: 'flooded'},
 {'normal': 0, 'almost_flooded': 1, 'flooded': 2})

In [241]:
from main.utils.predictor import MarkovChain

In [242]:
predictor = MarkovChain(transition_matrix, states)

In [243]:
predictor.generate_states('drought', no=10)

['normal',
 'almost_flooded',
 'almost_flooded',
 'almost_flooded',
 'flooded',
 'almost_flooded',
 'almost_flooded',
 'flooded',
 'flooded',
 'normal']