## Load Data

In [1]:
import numpy as np
from os.path import dirname, join as pjoin
import scipy.io as sio
import pandas as pd

DATA_PATH = './data'
DATA_FILE_NAME = 'Brugge_en_d.mat'

data_dir = pjoin(DATA_PATH, DATA_FILE_NAME)
mat_contents = sio.loadmat(data_dir)

In [2]:
print("Type: {0}, Shape {1}".format(type(mat_contents['en_d']), mat_contents['en_d'].shape))
data = mat_contents['en_d'][0, 0]

Type: <class 'numpy.ndarray'>, Shape (1, 1)


In [3]:
# TIME
# series of days (498 days between day 0-3648), not continuous
# not only integer, contains several float
print("TIME")
print("numpy array shape: {}".format(data['TIME'].shape))

TIME
numpy array shape: (498, 1)
(498,)


In [4]:
# WBHP
# Well Bottom Hole Pressure
# daily data 498 days for 104 models
print("WBHP")
print("numpy array shape: {}".format(data['WBHP'].shape))
# Producers P1-P20
# and Injectors I1-I10
# shape of each is 498x104 double
print("Producer P1 numpy array shape: {}".format(data['WBHP'][0,0]['P1'].shape))
print("Injectre I1 numpy array shape: {}".format(data['WBHP'][0,0]['I1'].shape))

WBHP
numpy array shape: (1, 1)
Producer P1 numpy array shape: (498, 104)
Injectre I1 numpy array shape: (498, 104)


In [5]:
# WWCT
# Well Water Cut: water/entire_liquid
print("WWCT")
print("numpy array shape: {}".format(data['WWCT'].shape))
# Producers P1-P20
print("Producer P1 numpy array shape: {}".format(data['WWCT'][0,0]['P1'].shape))

WWCT
numpy array shape: (1, 1)
Producer P1 numpy array shape: (498, 104)


In [6]:
# WOPR
# Well Oil Production Rate: bbl/day
# What we want to forecast, this will be used as label
print("WOPR")
print("numpy array shape: {}".format(data['WOPR'].shape))
# Producers P1-P20, index 0-19
print("Producer P1 numpy array shape: {}".format(data['WOPR'][0,0]['P1'].shape))

WOPR
numpy array shape: (1, 1)
Producer P1 numpy array shape: (498, 104)


In [7]:
# WWPR
# Well Water Production Rate
print("WWPR")
print("numpy array shape: {}".format(data['WWPR'].shape))
# Producers P1-P20
print("Producer P1 numpy array shape: {}".format(data['WWPR'][0,0]['P1'].shape))

WWPR
numpy array shape: (1, 1)
Producer P1 numpy array shape: (498, 104)


In [8]:
# WWIR
# Well Water Injection Rate
print("WWIR")
print("numpy array shape: {}".format(data['WWIR'].shape))
# Injectors I1-I10
print("Injector 1 numpy array shape: {}".format(data['WWIR'][0,0]['I1'].shape))

WWIR
numpy array shape: (1, 1)
Injector 1 numpy array shape: (498, 104)


In [9]:
# FOPT
# Field Oil Production Total: sum of WOPR * days
print("FOPT")
print("numpy array shape: {}".format(data['FOPT'].shape))
# FWPT
# Field Water Production Total
print("FWPT")
print("numpy array shape: {}".format(data['FWPT'].shape))
# FPR
# Field Production Rate: FOPR + FWPR
print("FPR")
print("numpy array shape: {}".format(data['FPR'].shape))

FOPT
numpy array shape: (498, 104)
FWPT
numpy array shape: (498, 104)
FPR
numpy array shape: (498, 104)


In [10]:
# WWBT
# Well W B Total
print("WWBT")
print("numpy array shape: {}".format(data['WWBT'].shape))
# Producers P5,P11-P20
print("Producer P5 numpy array shape: {}".format(data['WWBT'][0,0]['P5'].shape))

WWBT
numpy array shape: (1, 1)
Producer P5 numpy array shape: (1, 104)


# Data Preparation

각 model 별로 아래와 같은 Dataframe 을 만들자.

|date|WOPR|WBHP|WWCT|WWPR|
|--|--|--|--|--|
|0.0|0.0|0.0|0.0|0.0|
|1.0|0.0|0.0|0.0|0.0
| . | . | . | . | . |
|3641.0|1998.772|1393.615|0.001552|3.107502|
|3648.0|1998.775|1393.951|0.001551|3.105065|

In [14]:
#  'well_num' => dfs_dic
dic_wells = {}
for well_index in range(20): # well, Producer P1-P20
    # 'model_num' => dataframe
    dic_models = {}
    well_key = 'P' + str(well_index+1)
    for model_index in range(104): # model, model 1-104
        well_data = np.array([
            data['WOPR'][0,0][well_key][:,model_index],
            data['WBHP'][0,0][well_key][:,model_index],
            data['WWCT'][0,0][well_key][:,model_index],
            data['WWPR'][0,0][well_key][:,model_index]
          ])
        # col1: WOPR, col2: WBHP, col3: WWCT, col4: WWPR
        # row1: day1, ... row 498: day3648
        well_data = well_data.T
        df = pd.DataFrame(data=well_data,
                          index=data['TIME'].flatten(),
                          columns=['WOPR', 'WBHP', 'WWCT', 'WWPR'])
        df.index.name = 'date'
        dic_models[str(model_index+1)] = df
        
    dic_wells[str(well_index+1)] = dic_models

In [15]:
# [well_num][model_num]
print(dic_well['1']['1'].head())
print(dic_well['1']['1'].tail())
print(dic_well['20']['1'].head())
print(dic_well['20']['1'].tail())

      WOPR  WBHP  WWCT  WWPR
date                        
0.0    0.0   0.0   0.0   0.0
1.0    0.0   0.0   0.0   0.0
2.0    0.0   0.0   0.0   0.0
3.0    0.0   0.0   0.0   0.0
4.0    0.0   0.0   0.0   0.0
            WOPR      WBHP      WWCT      WWPR
date                                          
3620.0  1998.766  1392.616  0.001556  3.114186
3637.0  1998.771  1393.426  0.001553  3.108850
3638.0  1998.771  1393.473  0.001553  3.108501
3641.0  1998.772  1393.615  0.001552  3.107502
3648.0  1998.775  1393.951  0.001551  3.105065
      WOPR  WBHP  WWCT  WWPR
date                        
0.0    0.0   0.0   0.0   0.0
1.0    0.0   0.0   0.0   0.0
2.0    0.0   0.0   0.0   0.0
3.0    0.0   0.0   0.0   0.0
4.0    0.0   0.0   0.0   0.0
            WOPR      WBHP      WWCT      WWPR
date                                          
3620.0  218.9907  1941.845  0.890304  1777.349
3637.0  217.7559  1942.740  0.890922  1778.584
3638.0  217.6854  1942.795  0.890958  1778.655
3641.0  217.4646  1942.956  0.