In [1]:
# import libraries
import pandas as pd     # library for data analysis and manipulation tools
import numpy as np      # library for scientific computing and data manipulation
import pyodbc           # library for connecting to SQL Server
import lasio           # library for reading and writing Log ASCII Standard (LAS) files
import re              # library for regular expression matching operations
import os              # library for interacting with operating system

In [2]:
# Establish a connection to the Access database
conn = pyodbc.connect(r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:\Users\brand\OneDrive - Universidad Central del Ecuador\Documentos\Maestría EOR\Tesis EOR\Info EPPEC\AC_SACHA_AGO_2023_U_T.mdb;')
# Define your SQL queries with just columns we need
# sql_query_maestra_all = 'SELECT * FROM MAESTRA'
sql_query_maestra = 'SELECT COMPLETION_UWI, WELLBORE_ID, COMPLETION_COORDINATE_X, COMPLETION_COORDINATE_Y, COMPLETION_LEGAL_NAME, RSVR_NAME, FIELD FROM MAESTRA'
# sql_query_sc_all = 'SELECT * FROM SC'
sql_query_sc = 'SELECT COMPLETION_LEGAL_NAME, FECHA_COMPLETACION, RESERVORIO FROM SC'
sql_mensual = 'SELECT * FROM MENSUAL'
sql_diaria = 'SELECT * FROM DIARIA'
sql_iny_diaria = 'SELECT * FROM INY_DIARIA'
sql_production_test = 'SELECT * FROM PRUEBA_POZO'

In [3]:
# Execute the queries and fetch the data into a DataFrames
df_maestra = pd.read_sql(sql_query_maestra, conn)
df_sc = pd.read_sql(sql_query_sc, conn)
df_mensual = pd.read_sql(sql_mensual, conn)
df_diaria = pd.read_sql(sql_diaria, conn)
df_iny_diaria = pd.read_sql(sql_iny_diaria, conn)
df_production_test = pd.read_sql(sql_production_test, conn)

# Close the database connection
conn.close()

  df_maestra = pd.read_sql(sql_query_maestra, conn)
  df_sc = pd.read_sql(sql_query_sc, conn)
  df_mensual = pd.read_sql(sql_mensual, conn)
  df_diaria = pd.read_sql(sql_diaria, conn)
  df_iny_diaria = pd.read_sql(sql_iny_diaria, conn)
  df_production_test = pd.read_sql(sql_production_test, conn)


# POZOS

In [4]:
df_maestra.head()

Unnamed: 0,COMPLETION_UWI,WELLBORE_ID,COMPLETION_COORDINATE_X,COMPLETION_COORDINATE_Y,COMPLETION_LEGAL_NAME,RSVR_NAME,FIELD
0,ECSCH7000500TI,S-3,295091.5183,9968342.0,SCH-003TI,LOWER T,SCHN1
1,ECSCH7000500UI,S-3,295091.5183,9968342.0,SCH-003UI,LOWER U,SCHN1
2,ECSCH70008000T,S-5,295027.1983,9962203.0,SCH-005IT,Arenisca T,SCHN1
3,ECSCH70008000U,S-5,295027.1983,9962203.0,SCH-005IU,MAIN U,SCHN1
4,ECSCH7000800TI,S-5,295027.1983,9962203.0,SCH-005TI,LOWER T,SCHN1


In [5]:
df_sc.head()

Unnamed: 0,COMPLETION_LEGAL_NAME,FECHA_COMPLETACION,RESERVORIO
0,SCH-002BUI,2016-06-24,UI
1,SCH-002TI,2016-06-24,TI
2,SCH-002UI,2016-06-24,UI
3,SCH-003TI,2016-06-24,TI
4,SCH-003UI,2016-06-24,UI


In [6]:
# determine years in the dataset
df_sc['FECHA_COMPLETACION'].dt.year.unique()

array([2016, 2017, 2019, 2018, 2020, 2022, 2023, 2021], dtype=int64)

In [7]:
# Marge the DataFrames
df_wells = df_maestra.merge(df_sc, on='COMPLETION_LEGAL_NAME', how='inner')

# change the name of the columns to NOMBRE_COMPLETO	X	Y   FCOMP   CAMPO
df_wells.rename(columns={'WELLBORE_ID':'NOMBRE_COMPLETO','COMPLETION_COORDINATE_X':'X',
                        'COMPLETION_COORDINATE_Y':'Y', 'FECHA_COMPLETACION':'FCOMP', 'FIELD':'CAMPO'}, inplace=True)

df_wells.drop(['COMPLETION_UWI', 'RESERVORIO', 'RSVR_NAME', 'COMPLETION_LEGAL_NAME'], axis=1, inplace=True)

#TODO: check if there are duplicated wells have to be removed

df_wells.drop_duplicates(subset=['NOMBRE_COMPLETO'], inplace=True)

df_wells.head().round(3)

Unnamed: 0,NOMBRE_COMPLETO,X,Y,CAMPO,FCOMP
0,S-3,295091.518,9968341.653,SCHN1,2016-06-24
2,S-5,295027.198,9962203.243,SCHN1,2016-06-24
6,S-6,293967.658,9968829.463,SCHN1,2016-06-24
8,S-7,293192.848,9966747.213,SCHN1,2016-06-24
11,S-8,292765.148,9965765.453,SCHN1,2016-06-24


# PRODUCCIÓN

## Mensual

In [8]:
# Delete the rows with have 'PLAN' in the COMP_S_NAME column
df_mensual = df_mensual[~df_mensual['COMP_S_NAME'].str.contains('PLAN')]
df_mensual.head()

Unnamed: 0,COMP_S_NAME,PROD_DT,VO_OIL_PROD,VO_GAS_PROD,VO_WAT_PROD,DIAS_ON
0,SCH-014TI,1973-09-30,5969.536,1790.86,0.0,30.0
1,SCH-014TI,1973-10-31,4481.408,1344.42,0.0,31.0
2,SCH-014TI,1973-11-30,6205.568,1861.67,0.0,30.0
3,SCH-014TI,1973-12-31,5958.272,1787.48,0.0,31.0
4,SCH-014TI,1974-01-31,5581.184,1674.36,0.0,31.0


In [9]:
# rename columns
df_mensual.rename(columns={'COMP_S_NAME': 'UNIQUEID', 'PROD_DT': 'Date',
                        'DIAS_ON': 'DAYS', 'VO_OIL_PROD': 'OILP', 'VO_WAT_PROD': 'WATP', 'VO_GAS_PROD': 'GASP'}, inplace=True)

# Compute the cumulative productions OIL_cum	GAS_cum	WAT_cum
df_mensual['OIL_cum'] = df_mensual.groupby(['UNIQUEID'])['OILP'].cumsum()
df_mensual['GAS_cum'] = df_mensual.groupby(['UNIQUEID'])['GASP'].cumsum()
df_mensual['WAT_cum'] = df_mensual.groupby(['UNIQUEID'])['WATP'].cumsum()


df_mensual.head()

Unnamed: 0,UNIQUEID,Date,OILP,GASP,WATP,DAYS,OIL_cum,GAS_cum,WAT_cum
0,SCH-014TI,1973-09-30,5969.536,1790.86,0.0,30.0,5969.536,1790.86,0.0
1,SCH-014TI,1973-10-31,4481.408,1344.42,0.0,31.0,10450.944,3135.28,0.0
2,SCH-014TI,1973-11-30,6205.568,1861.67,0.0,30.0,16656.512,4996.95,0.0
3,SCH-014TI,1973-12-31,5958.272,1787.48,0.0,31.0,22614.784,6784.43,0.0
4,SCH-014TI,1974-01-31,5581.184,1674.36,0.0,31.0,28195.968,8458.79,0.0


In [10]:
df_mensual['Date'].dt.year.unique()

array([1973, 1974, 2012, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023,
       1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985,
       1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996,
       1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
       2008, 2009, 2010, 2011, 2013, 2014, 2015, 1972], dtype=int64)

### MERGE DATA FRAMES
In order to filter the data of ```Lower T Sandstone``` and ```Lower U Sandstone```

In [11]:
merged_df = pd.merge(df_mensual, df_maestra[['COMPLETION_LEGAL_NAME', 'WELLBORE_ID', 'RSVR_NAME']],
                    left_on='UNIQUEID', right_on='COMPLETION_LEGAL_NAME', how='outer')

merged_df.head()

Unnamed: 0,UNIQUEID,Date,OILP,GASP,WATP,DAYS,OIL_cum,GAS_cum,WAT_cum,COMPLETION_LEGAL_NAME,WELLBORE_ID,RSVR_NAME
0,SCH-014TI,1973-09-30,5969.536,1790.86,0.0,30.0,5969.536,1790.86,0.0,SCH-014TI,S-14,LOWER T
1,SCH-014TI,1973-10-31,4481.408,1344.42,0.0,31.0,10450.944,3135.28,0.0,SCH-014TI,S-14,LOWER T
2,SCH-014TI,1973-11-30,6205.568,1861.67,0.0,30.0,16656.512,4996.95,0.0,SCH-014TI,S-14,LOWER T
3,SCH-014TI,1973-12-31,5958.272,1787.48,0.0,31.0,22614.784,6784.43,0.0,SCH-014TI,S-14,LOWER T
4,SCH-014TI,1974-01-31,5581.184,1674.36,0.0,31.0,28195.968,8458.79,0.0,SCH-014TI,S-14,LOWER T


In [12]:
merged_df.head()

Unnamed: 0,UNIQUEID,Date,OILP,GASP,WATP,DAYS,OIL_cum,GAS_cum,WAT_cum,COMPLETION_LEGAL_NAME,WELLBORE_ID,RSVR_NAME
0,SCH-014TI,1973-09-30,5969.536,1790.86,0.0,30.0,5969.536,1790.86,0.0,SCH-014TI,S-14,LOWER T
1,SCH-014TI,1973-10-31,4481.408,1344.42,0.0,31.0,10450.944,3135.28,0.0,SCH-014TI,S-14,LOWER T
2,SCH-014TI,1973-11-30,6205.568,1861.67,0.0,30.0,16656.512,4996.95,0.0,SCH-014TI,S-14,LOWER T
3,SCH-014TI,1973-12-31,5958.272,1787.48,0.0,31.0,22614.784,6784.43,0.0,SCH-014TI,S-14,LOWER T
4,SCH-014TI,1974-01-31,5581.184,1674.36,0.0,31.0,28195.968,8458.79,0.0,SCH-014TI,S-14,LOWER T


In [13]:
# verify the reservoir names
merged_df['RSVR_NAME'].unique()

array(['LOWER T', 'UPPER T', 'LOWER U', 'Arenisca T', 'MAIN U', 'UPPER U',
       'TIYUYACU', nan], dtype=object)

In [14]:
# Verify the number of wells of TIYUYACU reservoir
merged_df[merged_df['RSVR_NAME'] == 'TIYUYACU']['UNIQUEID'].unique()

array(['SCH-023ITY', 'SCH-029ITY', 'SCH-084ITY', 'SCH-086ITY',
       'SCH-117ITY'], dtype=object)

In [15]:
# Drop the rows with has TIYUYACU reservoir
df_mensual_Ui_Ti = merged_df[merged_df['RSVR_NAME'] != 'TIYUYACU']

# Filter the DataFrame to only include the Lower T and Lower U Sandstone reservoirs

# Drop the columns we don't need
df_mensual_Ui_Ti.drop(['COMPLETION_LEGAL_NAME', 'WELLBORE_ID', 'RSVR_NAME'], axis=1, inplace=True)

df_mensual_Ui_Ti.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mensual_Ui_Ti.drop(['COMPLETION_LEGAL_NAME', 'WELLBORE_ID', 'RSVR_NAME'], axis=1, inplace=True)


Unnamed: 0,UNIQUEID,Date,OILP,GASP,WATP,DAYS,OIL_cum,GAS_cum,WAT_cum
0,SCH-014TI,1973-09-30,5969.536,1790.86,0.0,30.0,5969.536,1790.86,0.0
1,SCH-014TI,1973-10-31,4481.408,1344.42,0.0,31.0,10450.944,3135.28,0.0
2,SCH-014TI,1973-11-30,6205.568,1861.67,0.0,30.0,16656.512,4996.95,0.0
3,SCH-014TI,1973-12-31,5958.272,1787.48,0.0,31.0,22614.784,6784.43,0.0
4,SCH-014TI,1974-01-31,5581.184,1674.36,0.0,31.0,28195.968,8458.79,0.0


## Diaria

In [16]:
# Delete the rows with have 'PLAN' in the COMPLETION_NAME column
df_diaria = df_diaria[~df_diaria['COMPLETION_NAME'].str.contains('PLAN')]
df_diaria.head()

Unnamed: 0,COMPLETION_NAME,PROD_DATE,HORES_ON,RATE_OIL,RATE_GAS,RATE_WAT
0,SCH-002BTI,2023-03-21,0.0,0.0,0.0,0.0
1,SCH-002BTI,2023-03-22,0.0,0.0,0.0,0.0
2,SCH-002BTI,2023-03-23,0.0,0.0,0.0,0.0
3,SCH-002BTI,2023-03-24,0.0,0.0,0.0,0.0
4,SCH-002BTI,2023-03-25,0.0,0.0,0.0,0.0


In [17]:
# rename columns
df_diaria.rename(columns={'COMPLETION_NAME': 'UNIQUEID', 'PROD_DATE': 'Date',
                        'RATE_OIL': 'OILP', 'RATE_WAT': 'WATP', 'RATE_GAS': 'GASP'}, inplace=True)
df_diaria.head()

Unnamed: 0,UNIQUEID,Date,HORES_ON,OILP,GASP,WATP
0,SCH-002BTI,2023-03-21,0.0,0.0,0.0,0.0
1,SCH-002BTI,2023-03-22,0.0,0.0,0.0,0.0
2,SCH-002BTI,2023-03-23,0.0,0.0,0.0,0.0
3,SCH-002BTI,2023-03-24,0.0,0.0,0.0,0.0
4,SCH-002BTI,2023-03-25,0.0,0.0,0.0,0.0


In [18]:
df_diaria['Date'].dt.year.unique()

array([2023, 2016, 2017, 2018, 2019, 2020, 2021, 2022], dtype=int64)

# INYECCION

# Diaria

In [19]:
df_iny_diaria.head()

Unnamed: 0,COMPLETION_NAME,PROD_DATE,HORES_ON,RATE_WAT_INJ,WELL_INJ_PRESSURE,PUMP_PRESS,TEMPERATURE,OIL_IN_WATER,SUSPENDED_SOLIDS,OXYGEN,...,SKIMER_TURBIDITY,SKIMER_OXYGEN,BOOSTER_SUSPENDED_SOLIDS,BOOSTER_OIL_IN_WATER,BOOSTER_TURBIDITY,BOOSTER_OXYGEN,BOOSTER_SALINITY,BOOSTER_SLOPE,BOOSTER_PLUGGING_INDEX,COMMENTS
0,SCH-005IT,2009-03-01,24.0,0.0,,,,,,,...,,,,,,,,,,
1,SCH-005IT,2009-03-02,24.0,0.0,,,,,,,...,,,,,,,,,,
2,SCH-005IT,2009-03-03,24.0,0.0,,,,,,,...,,,,,,,,,,
3,SCH-005IT,2009-03-04,24.0,0.0,,,,,,,...,,,,,,,,,,
4,SCH-005IT,2009-03-05,24.0,0.0,,,,,,,...,,,,,,,,,,


In [20]:
df_iny_diaria['PROD_DATE'].dt.year.unique()

array([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
       2020, 2021, 2022, 2023], dtype=int64)

In [21]:
df_iny_diaria.columns

Index(['COMPLETION_NAME', 'PROD_DATE', 'HORES_ON', 'RATE_WAT_INJ',
       'WELL_INJ_PRESSURE', 'PUMP_PRESS', 'TEMPERATURE', 'OIL_IN_WATER',
       'SUSPENDED_SOLIDS', 'OXYGEN', 'SALINITY', 'PLUGGING_INDEX', 'TURBIDITY',
       'SLOPE', 'WTK_SUSPENDED_SOLIDS', 'WTK_OIL_IN_WATER', 'WTK_TURBIDITY',
       'WTK_OXYGEN', 'SKIMER_SUSPENDED_SOLIDS', 'SKIMER_OIL_IN_WATER',
       'SKIMER_TURBIDITY', 'SKIMER_OXYGEN', 'BOOSTER_SUSPENDED_SOLIDS',
       'BOOSTER_OIL_IN_WATER', 'BOOSTER_TURBIDITY', 'BOOSTER_OXYGEN',
       'BOOSTER_SALINITY', 'BOOSTER_SLOPE', 'BOOSTER_PLUGGING_INDEX',
       'COMMENTS'],
      dtype='object')

In [22]:
# this information belong to updated report to 11/2023
# the well 'SCHAG-396IUI' isn't in the report
injector_names = [
    "SCH-048", "SCH-015", "SCH-076", "SCH-105I", "SCH-005", "SCH-090", "SCH-036", "SCH-103",
    "SCHI-200", "SCHI-067B", "SCHAD-361", "SCHAD-363S1", "SCH-045B", "SCHAF-382", "SCHAE-377",
    "SCHAB-315", "SCHS-261", "SCHAK-431", "SCHAA-307", "SCHM-340", "SCHE-232", "SCHAG-396IUI"]

In [23]:
# Wells which are in the df_iny_diaria['COMPLETION_NAME'] column and injector_names

# Empty dictionary to store matching injectors
matching_injectors = {}

# Iterate through each 'COMPLETION_NAME' in df_iny_diaria
for uniqueid in df_iny_diaria['COMPLETION_NAME']:
    # Check if the current 'COMPLETION_NAME' starts with any injector_name
    for injector_name in injector_names:
        if uniqueid.startswith(injector_name):
            # If the injector_name is not in the dictionary, add it with an empty set
            if injector_name not in matching_injectors:
                matching_injectors[injector_name] = set()
            # Add the current 'COMPLETION_NAME' to the set of matching injectors for the current injector_name
            matching_injectors[injector_name].add(uniqueid)

# Convert sets to lists before printing the dictionary
matching_injectors = {key: list(value) for key, value in matching_injectors.items()}

print("Dictionary with matching injectors:")
for key, values in matching_injectors.items():
    print(f"{key}: {values}")


Dictionary with matching injectors:
SCH-005: ['SCH-005IU', 'SCH-005IT']
SCH-015: ['SCH-015IU']
SCH-036: ['SCH-036IUI']
SCH-045B: ['SCH-045BIUI']
SCH-048: ['SCH-048IU', 'SCH-048IT']
SCH-076: ['SCH-076IU', 'SCH-076IT']
SCH-090: ['SCH-090U']
SCH-103: ['SCH-103IUI']
SCH-105I: ['SCH-105IT', 'SCH-105IU']
SCHAB-315: ['SCHAB-315IUI']
SCHAD-361: ['SCHAD-361IUI']
SCHAD-363S1: ['SCHAD-363S1IUI']
SCHAE-377: ['SCHAE-377IUI']
SCHAF-382: ['SCHAF-382IUI']
SCHAG-396IUI: ['SCHAG-396IUI']
SCHAK-431: ['SCHAK-431IUI']
SCHE-232: ['SCHE-232ITI']
SCHI-067B: ['SCHI-067BIUI']
SCHI-200: ['SCHI-200IUI']
SCHM-340: ['SCHM-340ITI']
SCHS-261: ['SCHS-261IUI']


In [24]:
# Wells which are in the df_iny_diaria['COMPLETION_NAME'] column and not in injector_names

# Create a list of 'COMPLETION_NAME' values that did not match any injector_name
not_matching_injectors = [uniqueid for uniqueid in df_iny_diaria['COMPLETION_NAME'] if not any(
                        injector_name in uniqueid for injector_name in matching_injectors)]

# Remove duplicates by converting the list to a set and then back to a list
not_matching_injectors = list(set(not_matching_injectors))

print('Disposal Wells')
not_matching_injectors

Disposal Wells


['SCH-086ITY',
 'SCH-084ITY',
 'SCH-023ITY',
 'SCHI-201IH',
 'SCHB-221HTY',
 'SCH-029ITY',
 'SCH-117ITY']

In [25]:
'''
We may delete the rows with the wells which are not in the injector_names list, but we will lose
the information of the disposal wells
'''

# Delete the rows with the wells which are not in the injector_names list
# df_iny_diaria = df_iny_diaria[~df_iny_diaria['COMPLETION_NAME'].isin(not_matching_injectors)]

# df_iny_diaria.head()

'\nWe may delete the rows with the wells which are not in the injector_names list, but we will lose\nthe information of the disposal wells\n'

In [26]:
# rename columns
df_iny_diaria = df_iny_diaria[['COMPLETION_NAME', 'PROD_DATE', 'HORES_ON', 'RATE_WAT_INJ', 'WELL_INJ_PRESSURE']].rename(
                                columns={'COMPLETION_NAME': 'Pozo', 'PROD_DATE': 'Date', 
                                        'RATE_WAT_INJ': 'Winj', 'WELL_INJ_PRESSURE': 'Pia'})
df_iny_diaria.head()

Unnamed: 0,Pozo,Date,HORES_ON,Winj,Pia
0,SCH-005IT,2009-03-01,24.0,0.0,
1,SCH-005IT,2009-03-02,24.0,0.0,
2,SCH-005IT,2009-03-03,24.0,0.0,
3,SCH-005IT,2009-03-04,24.0,0.0,
4,SCH-005IT,2009-03-05,24.0,0.0,


In [27]:
merged_df_iny = pd.merge(df_iny_diaria, df_sc[['COMPLETION_LEGAL_NAME', 'RESERVORIO']],
                    left_on='Pozo', right_on='COMPLETION_LEGAL_NAME', how = 'left')

# make a copy of the merged_df_iny DataFrame
df_iny_diaria = merged_df_iny.copy()

# delete 'COMPLETION_LEGAL_NAME', 'WELL_LEGAL_NAME' and 'HORES_ON' columns
df_iny_diaria.drop(['COMPLETION_LEGAL_NAME', 'HORES_ON'], axis=1, inplace=True)

# rename columns
df_iny_diaria.rename(columns={'RESERVORIO': 'Capa'}, inplace=True)

# change values in capa column, where T to TI, U to UI, U-I to UI
df_iny_diaria['Capa'] = df_iny_diaria['Capa'].replace({'T': 'TI', 'U': 'UI', 'U-I': 'UI'})

# Calculate the accumulated water injection WATER_INJ_.CUM
df_iny_diaria['WATER_INJ_.CUM'] = df_iny_diaria.groupby(['Pozo'])['Winj'].cumsum()

df_iny_diaria.head()

Unnamed: 0,Pozo,Date,Winj,Pia,Capa,WATER_INJ_.CUM
0,SCH-005IT,2009-03-01,0.0,,TI,0.0
1,SCH-005IT,2009-03-02,0.0,,TI,0.0
2,SCH-005IT,2009-03-03,0.0,,TI,0.0
3,SCH-005IT,2009-03-04,0.0,,TI,0.0
4,SCH-005IT,2009-03-05,0.0,,TI,0.0


In [28]:
# names of the layers for injection and re-injection process
df_iny_diaria['Capa'].unique()

array(['TI', 'UI', 'TY', nan], dtype=object)

In [29]:
# Visualize the wells which don't have a layer
df_iny_diaria[df_iny_diaria['Capa'].isna()]['Pozo'].unique()

array(['SCHAD-361IUI', 'SCHB-221HTY', 'SCHE-232ITI', 'SCHI-067BIUI',
       'SCHI-200IUI', 'SCHI-201IH'], dtype=object)

In [30]:
# Assign the layer to the wells cause the layer is in df_SC
df_iny_diaria.loc[df_iny_diaria['Pozo'] == 'SCHAD-361IUI', 'Capa'] = 'UI'
df_iny_diaria.loc[df_iny_diaria['Pozo'] == 'SCHI-067BIUI', 'Capa'] = 'UI'
df_iny_diaria.loc[df_iny_diaria['Pozo'] == 'SCHI-200IUI', 'Capa'] = 'UI'

df_iny_diaria.loc[df_iny_diaria['Pozo'] == 'SCHE-232ITI', 'Capa'] = 'TI'

df_iny_diaria.loc[df_iny_diaria['Pozo'] == 'SCHB-221HTY', 'Capa'] = 'TY'

df_iny_diaria.loc[df_iny_diaria['Pozo'] == 'SCHI-201IH', 'Capa'] = 'H'

# change name of the wells to determine as injector
df_iny_diaria.loc[df_iny_diaria['Pozo'] == 'SCH-090U', 'Pozo'] = 'SCH-090IU'

df_iny_diaria.head()

Unnamed: 0,Pozo,Date,Winj,Pia,Capa,WATER_INJ_.CUM
0,SCH-005IT,2009-03-01,0.0,,TI,0.0
1,SCH-005IT,2009-03-02,0.0,,TI,0.0
2,SCH-005IT,2009-03-03,0.0,,TI,0.0
3,SCH-005IT,2009-03-04,0.0,,TI,0.0
4,SCH-005IT,2009-03-05,0.0,,TI,0.0


In [31]:
df_iny_diaria[df_iny_diaria['Pozo'] == 'SCH-090IU']

Unnamed: 0,Pozo,Date,Winj,Pia,Capa,WATER_INJ_.CUM
59390,SCH-090IU,2009-03-01,0.0,,UI,0.0
59391,SCH-090IU,2009-03-02,0.0,,UI,0.0
59392,SCH-090IU,2009-03-03,0.0,,UI,0.0
59393,SCH-090IU,2009-03-04,0.0,,UI,0.0
59394,SCH-090IU,2009-03-05,0.0,,UI,0.0
...,...,...,...,...,...,...
64490,SCH-090IU,2023-08-28,3355.0,740.0,UI,16586914.0
64491,SCH-090IU,2023-08-29,3355.0,740.0,UI,16590269.0
64492,SCH-090IU,2023-08-30,3354.0,740.0,UI,16593623.0
64493,SCH-090IU,2023-08-31,3353.0,740.0,UI,16596976.0


## MENSUAL

In [32]:
merged_df_iny.head()

Unnamed: 0,Pozo,Date,HORES_ON,Winj,Pia,COMPLETION_LEGAL_NAME,RESERVORIO
0,SCH-005IT,2009-03-01,24.0,0.0,,SCH-005IT,TI
1,SCH-005IT,2009-03-02,24.0,0.0,,SCH-005IT,TI
2,SCH-005IT,2009-03-03,24.0,0.0,,SCH-005IT,TI
3,SCH-005IT,2009-03-04,24.0,0.0,,SCH-005IT,TI
4,SCH-005IT,2009-03-05,24.0,0.0,,SCH-005IT,TI


In [33]:
# 
df_iny_mensual = merged_df_iny[['Pozo', 'Date', 'HORES_ON', 'Winj', 'Pia']]
df_iny_mensual['DAYS'] = df_iny_mensual['HORES_ON']/24        # convert hours to days

df_iny_mensual.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_iny_mensual['DAYS'] = df_iny_mensual['HORES_ON']/24        # convert hours to days


Unnamed: 0,Pozo,Date,HORES_ON,Winj,Pia,DAYS
0,SCH-005IT,2009-03-01,24.0,0.0,,1.0
1,SCH-005IT,2009-03-02,24.0,0.0,,1.0
2,SCH-005IT,2009-03-03,24.0,0.0,,1.0
3,SCH-005IT,2009-03-04,24.0,0.0,,1.0
4,SCH-005IT,2009-03-05,24.0,0.0,,1.0


In [34]:
df_iny_mensual['Date'].dt.year.unique()

array([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
       2020, 2021, 2022, 2023], dtype=int64)

In [35]:
# Filter pressure values greater than 0 and not null
df_iny_mensual['Pia_Days'] = df_iny_mensual['Pia'].apply(lambda x: 1 if x > 0 and not pd.isnull(x) else 0)

# Calculate the accumulated values by month
df_iny_mensual['Month'] = df_iny_mensual.groupby(['Pozo', df_iny_mensual['Date'].dt.to_period('M')])['Date'].transform('min')
df_iny_month = df_iny_mensual.groupby(['Pozo', 'Month'])[['Winj', 'DAYS', 'Pia', 'Pia_Days']].sum().reset_index()

# Pressure average by month
df_iny_month['Pia_Avg'] = df_iny_month['Pia'] / df_iny_month['Pia_Days']

df_iny_month.head()

Unnamed: 0,Pozo,Month,Winj,DAYS,Pia,Pia_Days,Pia_Avg
0,SCH-005IT,2009-03-01,0.0,31.0,0.0,0,
1,SCH-005IT,2009-07-01,0.0,31.0,0.0,0,
2,SCH-005IT,2009-09-01,0.0,30.0,0.0,0,
3,SCH-005IT,2009-11-01,128971.0,30.0,0.0,0,
4,SCH-005IT,2009-12-01,134078.0,31.0,0.0,0,


In [36]:

# rename column
df_iny_month.rename(columns={'Pozo': 'UNIQUEID', 'Month': 'Date', 'Winj': 'WATER_INJ_VOL', 'Pia_Avg': 'WHP'}, inplace=True)

#create a new column INJ_TYPE
df_iny_month['INJ_TYPE'] = 'water'

df_iny_month = df_iny_month[['UNIQUEID', 'Date', 'INJ_TYPE', 'WATER_INJ_VOL', 'DAYS', 'WHP']]

df_iny_month.head()

Unnamed: 0,UNIQUEID,Date,INJ_TYPE,WATER_INJ_VOL,DAYS,WHP
0,SCH-005IT,2009-03-01,water,0.0,31.0,
1,SCH-005IT,2009-07-01,water,0.0,31.0,
2,SCH-005IT,2009-09-01,water,0.0,30.0,
3,SCH-005IT,2009-11-01,water,128971.0,30.0,
4,SCH-005IT,2009-12-01,water,134078.0,31.0,


# EVENTOS - PERFORADOS

## Notas

In [37]:
# Import the Excel file
excel_file_eventos = r'C:\Users\brand\OneDrive - Universidad Central del Ecuador\Documentos\Maestría EOR\Tesis EOR\Info EPPEC\2018_INFORMACION POZOS SACHA-EVENTOS -INTERVALOS_PAM.xlsx'

# Read the Excel file into a DataFrame
df_eventos = pd.read_excel(excel_file_eventos, sheet_name='POZOS EVENTOS', skiprows=6)

df_eventos.head()

Unnamed: 0,BLOQUE,ACTIVO,POZO,EVENTO,COD EVENTO,TRABAJO,FECHA INICIO,FECHA FIN
0,B60,SA,SCHA-001,DEV DRILLING,DRL,DRL,1969-01-21,1969-02-18 00:00:00
1,B60,SA,SCHA-001,DEV COMPLETION,COM,COM,1969-02-22,1969-02-25 00:00:00
2,B60,SA,SCH-002,DEV DRILLING,DRL,DRL,1969-07-21,1969-08-22 00:00:00
3,B60,SA,SCH-002,DEV COMPLETION,COM,COM,1969-08-29,1969-08-31 00:00:00
4,B60,SA,SCH-003,DEV DRILLING,DRL,DRL,1969-09-04,1969-10-03 00:00:00


In [38]:
# Put in the desired format
df_eventos = df_eventos[['POZO', 'FECHA FIN', 'TRABAJO', 'EVENTO']]
df_eventos.rename(columns={'POZO': 'IDENTIFICADOR', 'FECHA FIN': 'FECHA',
                        'TRABAJO': 'CATEGORÍA', 'EVENTO': 'DATO'}, inplace=True)
df_eventos.head()

Unnamed: 0,IDENTIFICADOR,FECHA,CATEGORÍA,DATO
0,SCHA-001,1969-02-18 00:00:00,DRL,DEV DRILLING
1,SCHA-001,1969-02-25 00:00:00,COM,DEV COMPLETION
2,SCH-002,1969-08-22 00:00:00,DRL,DEV DRILLING
3,SCH-002,1969-08-31 00:00:00,COM,DEV COMPLETION
4,SCH-003,1969-10-03 00:00:00,DRL,DEV DRILLING


## Perforados

In [39]:
# Import the Excel file
excel_file_perf = r'C:\Users\brand\OneDrive - Universidad Central del Ecuador\Documentos\Maestría EOR\Tesis EOR\Info EPPEC\NUEVA INFORMACIÓN 06112023\PRF-SA-RPR-231108-INTERVALOS SACHA.xlsx'

# Read the Excel file into a DataFrame
df_perf = pd.read_excel(excel_file_perf, sheet_name='INTERVALOS CAÑONEADOS', skiprows=13)

df_perf.head()

Unnamed: 0,BLOQUE,CAMPO,ACTIVO,NOMBRE LARGO,POZO,WELLBORE,FECHA CAÑONEO,Top MD (ft),Btm MD (ft),RAZON,ESTATUS,ARENA
0,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9730.0,9737.0,PRODUCTION,OPEN,"ARENA ""T"""
1,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9954.0,9982.0,PRODUCTION,OPEN,HOLLIN INFERIOR
2,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9740.0,9746.0,PRODUCTION,OPEN,"ARENA ""T"""
3,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9935.0,9939.0,PRODUCTION,OPEN,HOLLIN INFERIOR
4,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9940.0,9952.0,PRODUCTION,OPEN,HOLLIN INFERIOR


In [40]:
# Put in the desired format
# TODO: check if the columns are correct and their names
df_perf = df_perf[['POZO', 'FECHA CAÑONEO', 'Top MD (ft)', 'Btm MD (ft)', 'RAZON']].rename(
                columns={'POZO': 'IDENTIFICADOR', 'FECHA CAÑONEO': 'FECHA',
                        'Top MD (ft)': 'TOPE', 'Btm MD (ft)': 'BASE', 'RAZON': 'TIPO_DE_PUNZADO'})
df_perf.head()

Unnamed: 0,IDENTIFICADOR,FECHA,TOPE,BASE,TIPO_DE_PUNZADO
0,SCH-003,1969-10-08,9730.0,9737.0,PRODUCTION
1,SCH-003,1969-10-08,9954.0,9982.0,PRODUCTION
2,SCH-003,1969-10-08,9740.0,9746.0,PRODUCTION
3,SCH-003,1969-10-08,9935.0,9939.0,PRODUCTION
4,SCH-003,1969-10-08,9940.0,9952.0,PRODUCTION


# INSTALACIONES

In [41]:
import pdfplumber
import re
from collections import namedtuple

In [42]:
# pdf_file = pdfplumber.open('2017_SCHP-188 WO# 08_ZS.pdf')

In [43]:
loc_colums = ['No', 'Jts', 'Tope MD', 'Tope TVD', 'Longitud', 'OD Nom', 'Descripción']

In [44]:
# with pdfplumber.open('2017_SCHP-188 WO# 08_ZS.pdf') as pdf:
#     data = pdf.pages[0].extract_tables()
#     # data = data[0][1:]
# data

# PRUEBAS DE PRODUCCIÓN

In [45]:
df_production_test.head()

Unnamed: 0,PTYPE,COMPLETION_LEGAL_NAME,TEST_DATE,T_HRS,TEST_OIL_24,TEST_GAS_24,TEST_WAT_24,GRAV_OIL API,BSW,CASING_PRESS,...,REMARKS,TEST_PURPOSE,INYECTION_PSI,PLANT_PSI,INJECTION_VOL,INJECTION_API,RETURN_VOL,RETURN_BSW,RETURN_API,VOL_REAL
0,COMP,SCH-002BTI,2015-01-02,5.0,110.0,0.0,2.0,27.4,1.786,0.0,...,Realizar prueba de 12 horas.,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
1,COMP,SCH-002BTI,2015-01-05,5.0,145.0,0.0,3.0,27.4,2.027,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
2,COMP,SCH-002BTI,2015-01-15,5.0,145.0,0.0,3.0,27.4,2.027,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
3,COMP,SCH-002BTI,2015-01-20,5.0,122.0,0.0,3.0,27.4,2.4,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
4,COMP,SCH-002BTI,2015-01-25,5.0,120.0,0.0,2.0,27.4,1.639,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0


In [46]:
df_production_test.columns

Index(['PTYPE', 'COMPLETION_LEGAL_NAME', 'TEST_DATE', 'T_HRS', 'TEST_OIL_24',
       'TEST_GAS_24', 'TEST_WAT_24', 'GRAV_OIL API', 'BSW', 'CASING_PRESS',
       'PBHP', 'TUBING_PRESS', 'PIP', 'FLAP', 'SBHP', 'GAS_SP_GRAVITIY',
       'TUBING_TMP', 'PI', 'AMPS_A', 'PUMP_TMP', 'MOTOR_HZ', 'POWER_KW',
       'STAGE_COUNT', 'VOLTAGE', 'INTAKE_DEPTH', 'PUMP_TYPE', 'SALINITY',
       'REMARKS', 'TEST_PURPOSE', 'INYECTION_PSI', 'PLANT_PSI',
       'INJECTION_VOL', 'INJECTION_API', 'RETURN_VOL', 'RETURN_BSW',
       'RETURN_API', 'VOL_REAL'],
      dtype='object')

In [47]:

# TODO: check the columns we need and their names

# select the columns we need
df_production_test = df_production_test[['COMPLETION_LEGAL_NAME', 'TEST_DATE', 'TEST_OIL_24', 'TEST_WAT_24', 'BSW', 'T_HRS',
                    'TEST_GAS_24', 'CASING_PRESS', 'INTAKE_DEPTH', 'MOTOR_HZ', 'PIP', 'AMPS_A', 'PUMP_TYPE', 'GRAV_OIL API']]

# rename columns
df_production_test.rename(columns={'COMPLETION_LEGAL_NAME': 'UNIQUEID', 'TEST_DATE': 'DATE',
                        'TEST_OIL_24': 'OIL_FLOW', 'TEST_WAT_24': 'WATER_FLOW', 'BSW': 'BSW', 'T_HRS': 'DURATION',
                        'TEST_GAS_24': 'GAS_FLOW', 'CASING_PRESS': 'CHP', 'INTAKE_DEPTH': 'PUMP_DEPTH', 'MOTOR_HZ': 'Frecuency',
                        'PIP': 'PIP', 'AMPS_A': 'Amperage', 'ALS': 'PUMP_TYPE', 'GRAV_OIL API': 'OIL_GRAVITY'}, inplace=True)

df_production_test.head()

Unnamed: 0,UNIQUEID,DATE,OIL_FLOW,WATER_FLOW,BSW,DURATION,GAS_FLOW,CHP,PUMP_DEPTH,Frecuency,PIP,Amperage,PUMP_TYPE,OIL_GRAVITY
0,SCH-002BTI,2015-01-02,110.0,2.0,1.786,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
1,SCH-002BTI,2015-01-05,145.0,3.0,2.027,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
2,SCH-002BTI,2015-01-15,145.0,3.0,2.027,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
3,SCH-002BTI,2015-01-20,122.0,3.0,2.4,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
4,SCH-002BTI,2015-01-25,120.0,2.0,1.639,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4


In [48]:
df_production_test['UNIQUEID'].nunique()

258

# SURVEYS

In [49]:
#  Define a function to read the survey data
def read_survey(file):
    # Initialize a variable to store the well name
    well_name = None
    
    # Open the file and extract the well name
    with open(file, 'r') as f:
        for line in f:
            if "WELL NAME:" in line:
                well_name = line.split("WELL NAME:")[1].strip()
                break

    # Initialize a variable to count header rows
    header_rows = 0

    # Open the file and determine how many header rows there are
    with open(file, 'r') as f:
        for line in f:
            if line.startswith("#"):
                header_rows += 1
            else:
                break

    # Read the data from the file, skipping the header rows
    data = pd.read_csv(file, sep='\s+', skiprows=header_rows)

    # Add a 'well_name' column with the extracted well name
    data['well_name'] = well_name

    return data.iloc[1:]  # Return the data without the first 2 rows


In [50]:
# Define the path to the directory containing survey information
path_surveys = r'C:\Users\brand\OneDrive - Universidad Central del Ecuador\Documentos\Maestría EOR\Tesis EOR\Info EPPEC\4. Información Surveys'

# List the files in the directory
files = os.listdir(path_surveys)

print(files)

['S-1.las', 'S-10.las', 'S-100.las', 'S-101.las', 'S-102.las', 'S-103.las', 'S-104.las', 'S-105.las', 'S-106.las', 'S-107.las', 'S-108.las', 'S-109.las', 'S-11.las', 'S-110.las', 'S-111.las', 'S-112.las', 'S-113.las', 'S-114.las', 'S-115.las', 'S-116.las', 'S-117.las', 'S-118.las', 'S-119.las', 'S-12.las', 'S-120.las', 'S-121.las', 'S-122.las', 'S-123.las', 'S-124.las', 'S-125.las', 'S-126.las', 'S-127.las', 'S-128.las', 'S-129.las', 'S-13.las', 'S-130.las', 'S-131.las', 'S-132.las', 'S-133.las', 'S-134.las', 'S-135.las', 'S-136.las', 'S-137.las', 'S-138.las', 'S-139.las', 'S-14.las', 'S-140D.las', 'S-141.las', 'S-142.las', 'S-143.las', 'S-144.las', 'S-145.las', 'S-146.las', 'S-147D.las', 'S-148.las', 'S-148ST.las', 'S-149.las', 'S-15.las', 'S-150D.las', 'S-151D.las', 'S-152D.las', 'S-153.las', 'S-154D.las', 'S-155D.las', 'S-156.las', 'S-157.las', 'S-158.las', 'S-159.las', 'S-16.las', 'S-160D.las', 'S-161.las', 'S-162.las', 'S-163D.las', 'S-164D.las', 'S-165D.las', 'S-166D.las', 'S-167

In [51]:
# Create an empty list to store the DataFrames
survey_data = []

# Loop through the files and read each survey data file
for file in files:
    survey_data.append(read_survey(os.path.join(path_surveys, file)))

# Concatenate the DataFrames into a single DataFrame
df_surveys = pd.concat(survey_data)

df_surveys.head()

Unnamed: 0,MD,X,Y,Z,TVD,DX,DY,AZIM,INCL,DLS,well_name
1,-0.0,290807.1683,9963504.0,883.084,-0.0,0.0,0.0,0.0,0.0,0.0,S-1
2,10158.5,290807.1683,9963504.0,-9275.416,10158.5,0.0,0.0,0.0,0.0,0.0,S-1
1,-0.0,294593.0,9967797.0,897.00006,-0.0,0.0,0.0,0.0,0.0,0.0,S-10
2,9964.0,294593.0,9967797.0,-9066.99994,9964.0,0.0,0.0,0.0,0.0,0.0,S-10
1,-0.0,290696.9883,9959794.0,884.2,-0.0,0.0,0.0,0.0,0.0,0.0,S-100


In [52]:
# change the name of the columns to Wellbore	Md	TVD	XDelt	YDelt
df_surveys.rename(columns={'well_name':'Wellbore', 'MD':'Md', 'TVD':'TVD', 'DX':'XDelt', 'DY':'YDelt'}, inplace=True)
df_surveys = df_surveys[['Wellbore', 'Md', 'TVD', 'XDelt', 'YDelt']]
df_surveys.head()

Unnamed: 0,Wellbore,Md,TVD,XDelt,YDelt
1,S-1,-0.0,-0.0,0.0,0.0
2,S-1,10158.5,10158.5,0.0,0.0
1,S-10,-0.0,-0.0,0.0,0.0
2,S-10,9964.0,9964.0,0.0,0.0
1,S-100,-0.0,-0.0,0.0,0.0


# Datos Pozo-Capa

In [53]:
# Import the Excel files
excel_file_petro_2022 = r'C:\Users\brand\OneDrive - Universidad Central del Ecuador\Documentos\Maestría EOR\Tesis EOR\Info EPPEC\NUEVA INFORMACIÓN 06112023\Parámetros Petrofísicos pozos Sacha 2022.xlsx'
excel_file_petro_2020 = r'C:\Users\brand\OneDrive - Universidad Central del Ecuador\Documentos\Maestría EOR\Tesis EOR\Info EPPEC\NUEVA INFORMACIÓN 06112023\Parámetros Petrofísicos Sacha Dec 2020.xlsx'

# Read the Excel file into a DataFrame
df_petrophysical_2022 = pd.read_excel(excel_file_petro_2022, skiprows=1)
df_petrophysical_2020 = pd.read_excel(excel_file_petro_2020)

In [54]:
# Function to filter information
def petrophysical_processing(df_petrophysical: pd.DataFrame) -> pd.DataFrame:

    # Filter desired data
    df_petrophysical.drop(df_petrophysical[df_petrophysical['Flag Name'] == 'ROCK'].index, inplace=True)
    df_petrophysical.drop(df_petrophysical[(df_petrophysical['Zones'] != 'TLUS') & (df_petrophysical['Zones'] != 'TmTS')].index, inplace=True)
    df_petrophysical.head()

    # rename 'Zones' where injected water
    df_petrophysical['Zones'] = df_petrophysical['Zones'].replace({'TLUS': 'Ui', 'TmTS': 'Ti'})

    # Filter the rows with Flag Name = PAY
    df_pay = df_petrophysical.loc[df_petrophysical['Flag Name'] == 'PAY'].copy()

    # Make a filter in df_petrophysical to select rows that have the same 'Well' and 'Zones' values as df_pay
    df_res = df_petrophysical.loc[(df_petrophysical['Flag Name'] == 'RES') & (
                                    df_petrophysical['Well'].isin(df_pay['Well'])) & (
                                    df_petrophysical['Zones'].isin(df_pay['Zones']))]

    # Add to df_pay the value of Net Reservoir to df_pay
    df_pay['Gross_RES'] = df_res['Net'].values

    # Put in the desired format
    df_pay = df_pay[['Well', 'Zones', 'Gross_RES', 'Net', 'Gross', 'Av_Porosity', 'Top', 'Bottom', 'Av_PA_K']].rename(
                                    columns={'Well':'IDENTIFICADOR', 'Zones': 'CAPA', 'Net': 'ESPESOR_UTIL',
                                            'Gross_RES': 'ESPESOR_PERMEABLE', 'Gross': 'ESPESOR_TOTAL', 'Av_Porosity': 'POROSIDAD',
                                            'Top': 'TOPE_DE_CAPA', 'Bottom': 'BASE_DE_CAPA', 'Av_PA_K': 'PERMEABILIDAD_EN_X'})
    
    return df_pay

In [55]:
# Preprocessing the data in order to have the desired format and data
df_pay_2022 = petrophysical_processing(df_petrophysical_2022)
df_pay_2020 = petrophysical_processing(df_petrophysical_2020)

In [56]:
# Perform an inner merge on df_pay_2022 and df_pay_2020 to find matching rows
df_resultado = pd.merge(df_pay_2022, df_pay_2020[['IDENTIFICADOR', 'CAPA']], how='inner', on=['IDENTIFICADOR', 'CAPA'])

# Perform a left merge on df_pay_2020 and df_resultado and add an indicator column
df_pay_2020 = pd.merge(df_pay_2020, df_resultado[['IDENTIFICADOR', 'CAPA']], how='left', on=['IDENTIFICADOR', 'CAPA'], indicator=True)

# Filter out the rows where the indicator column is 'both'
df_pay_2020 = df_pay_2020[df_pay_2020['_merge'] != 'both']

# Drop the indicator column
df_pay_2020 = df_pay_2020.drop(columns=['_merge'])

# Concatenate df_pay_2022 and df_pay_2020
df_petrophysical = pd.concat([df_pay_2022, df_pay_2020], ignore_index=True, sort=False)

df_petrophysical.head()

Unnamed: 0,IDENTIFICADOR,CAPA,ESPESOR_PERMEABLE,ESPESOR_UTIL,ESPESOR_TOTAL,POROSIDAD,TOPE_DE_CAPA,BASE_DE_CAPA,PERMEABILIDAD_EN_X
0,SCHW-268R1,Ui,23.054,23.054,52.942,0.149,9823.044,9875.986,464.303
1,SCHW-268R1,Ti,30.57,29.568,109.5,0.127,10019.0,10128.5,260.5
2,SCHAI-415,Ui,27.047,26.546,30.421,0.134,9793.579,9824.0,266.673
3,SCHAI-415,Ti,24.042,24.042,60.062,0.146,10014.94,10075.0,440.43
4,SCHAI-413S1,Ui,26.96,26.96,43.888,0.112,9786.162,9830.05,135.45


# SAVE FILTERED DATA

In [58]:
# Save dataframes to Excel file
with pd.ExcelWriter('Data_draft.xlsx') as writer:
    df_wells.to_excel(writer, sheet_name='Pozos', index=False)
    df_surveys.to_excel(writer, sheet_name='Trayectorias', index=False)
    df_perf.to_excel(writer, sheet_name='Perforados', index=False)
    df_eventos.to_excel(writer, sheet_name='Notas', index=False)
    df_mensual_Ui_Ti.to_excel(writer, sheet_name='Producción Mensual', index=False)
    df_iny_month.to_excel(writer, sheet_name='Inyección Mensual OFM', index=False)
    df_production_test.to_excel(writer, sheet_name='Pruebas de Producción', index=False)
    df_iny_diaria.to_excel(writer, sheet_name='Inyección Diaria OFM', index=False)
    df_petrophysical.to_excel(writer, sheet_name='Datos Pozo-Capa', index=False)

# ACUMULADOS

In [59]:
merged_df.head()

Unnamed: 0,UNIQUEID,Date,OILP,GASP,WATP,DAYS,OIL_cum,GAS_cum,WAT_cum,COMPLETION_LEGAL_NAME,WELLBORE_ID,RSVR_NAME
0,SCH-014TI,1973-09-30,5969.536,1790.86,0.0,30.0,5969.536,1790.86,0.0,SCH-014TI,S-14,LOWER T
1,SCH-014TI,1973-10-31,4481.408,1344.42,0.0,31.0,10450.944,3135.28,0.0,SCH-014TI,S-14,LOWER T
2,SCH-014TI,1973-11-30,6205.568,1861.67,0.0,30.0,16656.512,4996.95,0.0,SCH-014TI,S-14,LOWER T
3,SCH-014TI,1973-12-31,5958.272,1787.48,0.0,31.0,22614.784,6784.43,0.0,SCH-014TI,S-14,LOWER T
4,SCH-014TI,1974-01-31,5581.184,1674.36,0.0,31.0,28195.968,8458.79,0.0,SCH-014TI,S-14,LOWER T


In [60]:
# amount of unique wells
unique_wells = list(merged_df['WELLBORE_ID'].unique())
# unique_wells

In [61]:
def Accum_by_sand(sand):
    # Crear una lista para almacenar los resultados
    results = []

    # Iterar a través de los nombres de pozos en unique_wells
    for well_name in unique_wells:
        # Filtrar el DataFrame por 'RSVR_NAME' igual a 'Lower T Sandstone'
        filtered_df = merged_df[(merged_df['WELLBORE_ID'] == well_name) & (merged_df['RSVR_NAME'] == sand)]
        
        # Calcular la suma de 'OILP' para el pozo actual
        accum_oil = filtered_df['OILP'].sum()
        
        # Agregar el resultado a la lista de resultados
        results.append([well_name, accum_oil])

    # Crear un DataFrame a partir de la lista de resultados
    result_df = pd.DataFrame(results, columns=['Well_name', 'Accum_oil'])

    return result_df

In [62]:
# Accum_T_Low = Accum_by_sand('LOWER T')
# Accum_T_Low.head()

Unnamed: 0,Well_name,Accum_oil
0,S-14,89751.149
1,S-28,9011453.8
2,S-32,4055975.774
3,S-37,6446044.132
4,S-88,5712634.25


In [63]:
# Accum_U_Low = Accum_by_sand('LOWER U')
# Accum_U_Low.head()

Unnamed: 0,Well_name,Accum_oil
0,S-14,55518.592
1,S-28,3981943.086
2,S-32,0.0
3,S-37,5052855.387
4,S-88,0.0
