In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import datetime as datetime

In [2]:
prod = pd.read_csv('Production Data - 2.csv')
pres = pd.read_csv('Pressure Data - 2.csv')

In [3]:

#========================================================================================================
class Cases(object):
    
    def __init__(self, well_list):
        self.caseNo = 0
        self.well_list = well_list
        self.case_codes = []
        self.case_list = []
        
    def add_case(self):
        print('Provide a string of 0s and 1s, to include or exclude wells in this sequence.\n'+
        'For example: if the wells list is [P1 P2 I1 I2 I3], you can add 10101 to consider P1, I1 and I3 wells in the Regression.\n')
        print('Add new case for the following well list: {}, or input "end" to finalize.\nString length should be {}'.format(self.well_list, len(self.well_list)))
        s = ''
        while s!='end':
            try:
                s = input('Case {}:\t'.format(self.caseNo))
                if s in self.case_codes:
                    s = 'Duplicate'
                    raise AssertionError
                isZeroOne = list(map(lambda a: a=='1' or a=='0', s)) #this might slow down the code.
                assert(len(s) == len(self.well_list) and s.isnumeric() and sum(isZeroOne)==len(s)) 
                print('Case {} added.\n'.format(self.caseNo))
                self.caseNo+=1
                self.case_codes.append(s)
            except AssertionError:
                if s=='end':
                    print('Success! All {} cases have been added!\n'.format(self.caseNo))
                elif s=='Duplicate':
                    print('DuplicteError! This case has already been added.\n')
                else:
                    print('InputError! Only 0 and 1s can be added. Length of the string should be {}\n'.format(len(self.well_list)))
    
    def decode(self):
        self.case_list = []
        for case in self.case_codes:
            l=[]
            for i in range(len(case)):
                if case[i]=='1':
                    l.append(self.well_list[i])
            self.case_list.append(l)        
        return self.case_list    
    
    
        
#========================================================================================================
class Cumulatives(Cases):
    def __init__(self, prod_df, pressure_df, producers, injectors):
        Cases.__init__(self, producers+injectors)
        self.prod_df = prod_df #input dataframe with wells and allocated fluid data
        self.producers = producers #name of producers in the list
        self.injectors = injectors #name of injectors in the list
        self.offtake_df = None
        self.pressure_df = pressure_df
        
    
    def calculate_reservoir_volumes(self, oil, water, gas):
        Bo = 1.35 #reservoir bbl/surface bbl
        Bw = 1.04 #reservoir bbl/surface bbl
        Rs = 500  #gas [scf]/oil [stb]
        Bg = 0.0007 #reservoir cf/surface cf
        res_vol = oil*Bo+water*Bw+(gas-Rs*oil)*Bg
        return res_vol
    
    def create_cumulatives_df(self):
        df = pd.DataFrame()
        #Iterate through producers and calculate reservoir offtakes
        for p in self.producers:
            oil = self.prod_df['{} Alloc Oil'.format(p)]
            water = self.prod_df['{} Alloc Water'.format(p)]
            gas = self.prod_df['{} Alloc Gas'.format(p)]*1000
            
            offtake = self.calculate_reservoir_volumes(oil, water, gas)
            df['{}'.format(p)] = offtake.cumsum()
            
        #Iterate through injectors and calculate reservoir intakes    
        for i in self.injectors:
            inj_wat =  self.prod_df['{} Alloc Injection'.format(i)]
            intake = self.calculate_reservoir_volumes(0, inj_wat, 0)
            df['{}'.format(i)] = intake.cumsum()   
        df.insert(0, 'Timestamp', self.prod_df['Timestamp'])
        
        self.offtake_df = df    
        #return df
        
    def create_pressure_df(self):
        self.create_cumulatives_df()
        self.pressure_df = self.pressure_df.merge(self.offtake_df, how = 'right').dropna()
        self.pressure_df['Timestamp']= pd.to_datetime(self.pressure_df['Timestamp'])
        #self.pressure_df.set_index('Timestamp', inplace=True)
        return self.pressure_df
    

#========================================================================================================    
    
class RegressionModels(Cumulatives):
    def __init__(self, prod_df, pressure_df, producers, injectors, normalize_coef=True):
        Cumulatives.__init__(self, prod_df, pressure_df, producers, injectors)
        self.create_cumulatives_df()
        self.production_data = self.offtake_df
        self.pressure_data = self.create_pressure_df()
        self.normalize_coef=normalize_coef
        
        self.Xcases = None #list of lists with different input assumptions
        self.y = pressure_df['Reservoir Pressure'] #pressure data
        
        self.stats = None
        self.stats_normalized = None
        
    def create_scenarios(self):
        self.add_case()
        self.decode()
        self.Xcases=self.case_list
        print('Following scenarios will be considered:\n')
        return self.Xcases
    
    
    def fit_models(self):
        stats = pd.DataFrame({'Scenario Codes': self.case_codes, 'R-square': None, 'Models': None, 'Intercepts': None })
        #models = []
        #modelScores = []
        #coefs = []
        #intercept = []
        
        for w in self.well_list:
            stats[w]=0
        
        self.y = self.pressure_data['Reservoir Pressure']
        for i in range(len(self.Xcases)):
            X = self.pressure_data[self.Xcases[i]]
            m = LinearRegression().fit(X,self.y)
            
            filter_i = stats['Scenario Codes']==self.case_codes[i]
            stats.loc[filter_i, ['Models', 'R-square', 'Intercepts', ]] = [m, m.score(X, self.y), m.intercept_]
            #stats.loc[filter_i, 'Intercepts'] = m.intercept_
            stats.loc[filter_i, self.case_list[i]] = m.coef_
        
        self.stats=stats.copy()        
        if self.normalize_coef:
            stats[self.producers+self.injectors]=stats[self.producers+self.injectors].div(stats[self.producers].sum(axis=1), axis=0)
        self.stats_normalized=stats
        
        print('Models have succesfully been matched!')
        return stats
    
    
    def visualize_models(self):
        plt.figure(figsize=(16,8))
        plt.scatter(self.pressure_data['Timestamp'], self.pressure_data['Reservoir Pressure'])
        l_func = lambda i: plt.plot(self.pressure_data['Timestamp'],self.stats['Models'][i].predict(self.pressure_data[T.case_list[i]]))    
        [l_func(i) for i in range(len(T.case_list))]
        plt.legend(self.case_codes)
        plt.xlabel('Time')
        plt.ylabel('Reservoir Pressure [psi]')
        plt.grid()
        plt.title('Time vs. Pressure with all scenarios')
   