In [85]:
# import libraries
import pandas as pd     # library for data analysis and manipulation tools
import numpy as np      # library for scientific computing and data manipulation
import pyodbc           # library for connecting to SQL Server
import lasio           # library for reading and writing Log ASCII Standard (LAS) files
import re              # library for regular expression matching operations
import os              # library for interacting with operating system

In [86]:
# Establish a connection to the Access database
conn = pyodbc.connect(r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:\Users\brand\OneDrive\Documents\Maestría EOR\Tesis EOR\Info EPPEC\AC_SACHA_AGO_2023_U_T.mdb;')

# Define your SQL queries with just columns we need
# sql_query_maestra_all = 'SELECT * FROM MAESTRA'
sql_query_maestra = 'SELECT COMPLETION_UWI, WELL_LEGAL_NAME, COMPLETION_COORDINATE_X, COMPLETION_COORDINATE_Y, COMPLETION_LEGAL_NAME, OW_WELL_PROD_ZONE, FIELD FROM MAESTRA'
# sql_query_sc_all = 'SELECT * FROM SC'
sql_query_sc = 'SELECT COMPLETION_LEGAL_NAME, FECHA_COMPLETACION FROM SC'
sql_mensual = 'SELECT * FROM MENSUAL'
sql_diaria = 'SELECT * FROM DIARIA'
sql_iny_diaria = 'SELECT * FROM INY_DIARIA'
sql_production_test = 'SELECT * FROM PRUEBA_POZO'

In [87]:
# Execute the queries and fetch the data into a DataFrames
df_maestra = pd.read_sql(sql_query_maestra, conn)
df_sc = pd.read_sql(sql_query_sc, conn)
df_mensual = pd.read_sql(sql_mensual, conn)
df_diaria = pd.read_sql(sql_diaria, conn)
df_iny_diaria = pd.read_sql(sql_iny_diaria, conn)
df_production_test = pd.read_sql(sql_production_test, conn)

# Close the database connection
conn.close()

  df_maestra = pd.read_sql(sql_query_maestra, conn)
  df_sc = pd.read_sql(sql_query_sc, conn)
  df_mensual = pd.read_sql(sql_mensual, conn)


  df_diaria = pd.read_sql(sql_diaria, conn)
  df_iny_diaria = pd.read_sql(sql_iny_diaria, conn)
  df_production_test = pd.read_sql(sql_production_test, conn)


# MAESTRA and SC

In [88]:
df_maestra.head()

Unnamed: 0,COMPLETION_UWI,WELL_LEGAL_NAME,COMPLETION_COORDINATE_X,COMPLETION_COORDINATE_Y,COMPLETION_LEGAL_NAME,OW_WELL_PROD_ZONE,FIELD
0,ECSCH7000400TI,SCH-002B,289935.82,9955795.0,SCH-002BTI,Lower T Sandstone,SCHS
1,ECSCH7000400UI,SCH-002B,289935.82,9955795.0,SCH-002BUI,Lower U Sandstone,SCHS
2,ECSCH7000300TI,SCH-002,289933.07,9955781.0,SCH-002TI,Lower T Sandstone,SCHS
3,ECSCH7000300UI,SCH-002,289933.07,9955781.0,SCH-002UI,Lower U Sandstone,SCHS
4,ECSCH7000500TI,SCH-003,295091.5183,9968342.0,SCH-003TI,Lower T Sandstone,SCHN1


In [89]:
df_sc.head()

Unnamed: 0,COMPLETION_LEGAL_NAME,FECHA_COMPLETACION
0,SCH-002BTI,2016-06-24
1,SCH-002BUI,2016-06-24
2,SCH-002TI,2016-06-24
3,SCH-002UI,2016-06-24
4,SCH-003TI,2016-06-24


In [90]:
# determine years in the dataset
df_sc['FECHA_COMPLETACION'].dt.year.unique()

array([2016, 2017, 2019, 2018, 2020, 2022, 2023, 2021], dtype=int64)

In [91]:
# Marge the DataFrames
df_wells = df_maestra.merge(df_sc, on='COMPLETION_LEGAL_NAME', how='inner')

# change the name of the columns to NOMBRE_COMPLETO	X	Y   FCOMP   CAMPO
df_wells.rename(columns={'WELL_LEGAL_NAME':'NOMBRE_COMPLETO','COMPLETION_COORDINATE_X':'X',
                        'COMPLETION_COORDINATE_Y':'Y', 'FECHA_COMPLETACION':'FCOMP', 'FIELD':'CAMPO'}, inplace=True)

df_wells.drop(['COMPLETION_UWI', 'OW_WELL_PROD_ZONE', 'COMPLETION_LEGAL_NAME'], axis=1, inplace=True)

#TODO: check if there are duplicated wells have to be removed

df_wells.drop_duplicates(subset=['NOMBRE_COMPLETO'], inplace=True)

df_wells.head().round(3)

Unnamed: 0,NOMBRE_COMPLETO,X,Y,CAMPO,FCOMP
0,SCH-002B,289935.82,9955795.32,SCHS,2016-06-24
2,SCH-002,289933.07,9955780.98,SCHS,2016-06-24
4,SCH-003,295091.518,9968341.653,SCHN1,2016-06-24
6,SCH-005,295027.198,9962203.243,SCHN1,2016-06-24
10,SCH-006,293967.658,9968829.463,SCHN1,2016-06-24


# MENSUAL

In [92]:
# Delete the rows with have 'PLAN' in the COMP_S_NAME column
df_mensual = df_mensual[~df_mensual['COMP_S_NAME'].str.contains('PLAN')]
df_mensual.head()

Unnamed: 0,COMP_S_NAME,PROD_DT,VO_OIL_PROD,VO_GAS_PROD,VO_WAT_PROD,DIAS_ON
0,SCH-002BTI,2007-11-30,9387.29,2816.19,1515.49,30.0
1,SCH-002BTI,2007-12-31,10136.88,3041.06,1317.22,31.0
2,SCH-002BTI,2008-01-31,8469.48,2540.84,1046.79,31.0
3,SCH-002BTI,2008-02-29,7967.25,2390.18,1148.6,29.0
4,SCH-002BTI,2008-03-31,8668.35,2600.51,1238.34,31.0


In [93]:
# rename columns
df_mensual.rename(columns={'COMP_S_NAME': 'UNIQUEID', 'PROD_DT': 'Date',
                        'DIAS_ON': 'DAYS', 'VO_OIL_PROD': 'OILP', 'VO_WAT_PROD': 'WATP', 'VO_GAS_PROD': 'GASP'}, inplace=True)
df_mensual.head()

Unnamed: 0,UNIQUEID,Date,OILP,GASP,WATP,DAYS
0,SCH-002BTI,2007-11-30,9387.29,2816.19,1515.49,30.0
1,SCH-002BTI,2007-12-31,10136.88,3041.06,1317.22,31.0
2,SCH-002BTI,2008-01-31,8469.48,2540.84,1046.79,31.0
3,SCH-002BTI,2008-02-29,7967.25,2390.18,1148.6,29.0
4,SCH-002BTI,2008-03-31,8668.35,2600.51,1238.34,31.0


In [94]:
df_mensual['Date'].dt.year.unique()

array([2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
       2018, 2019, 2020, 2021, 2022, 2023, 2003, 2004, 2005, 2006, 2001,
       2002, 1978, 1979, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993,
       1994, 1995, 1996, 1997, 1998, 1999, 2000, 1972, 1973, 1977, 1980,
       1981, 1982, 1983, 1984, 1985, 1974, 1976, 1975], dtype=int64)

## MERGE DATA FRAMES
In order to filter the data of ```Lower T Sandstone``` and ```Lower U Sandstone```

In [95]:
merged_df = pd.merge(df_mensual, df_maestra[['COMPLETION_LEGAL_NAME', 'WELL_LEGAL_NAME', 'OW_WELL_PROD_ZONE']],
                    left_on='UNIQUEID', right_on='COMPLETION_LEGAL_NAME', how='outer')

In [96]:
# Filter the DataFrame to only include the Lower T and Lower U Sandstone
# TODO: check if its necessary filter only both zones
df_mensual_Ui_Ti = merged_df.loc[(merged_df['OW_WELL_PROD_ZONE'] == 'Lower T Sandstone') | (merged_df['OW_WELL_PROD_ZONE'] == 'Lower U Sandstone')]

# Drop the columns we don't need
df_mensual_Ui_Ti.drop(['COMPLETION_LEGAL_NAME', 'WELL_LEGAL_NAME', 'OW_WELL_PROD_ZONE'], axis=1, inplace=True)

df_mensual_Ui_Ti.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mensual_Ui_Ti.drop(['COMPLETION_LEGAL_NAME', 'WELL_LEGAL_NAME', 'OW_WELL_PROD_ZONE'], axis=1, inplace=True)


Unnamed: 0,UNIQUEID,Date,OILP,GASP,WATP,DAYS
0,SCH-002BTI,2007-11-30,9387.29,2816.19,1515.49,30.0
1,SCH-002BTI,2007-12-31,10136.88,3041.06,1317.22,31.0
2,SCH-002BTI,2008-01-31,8469.48,2540.84,1046.79,31.0
3,SCH-002BTI,2008-02-29,7967.25,2390.18,1148.6,29.0
4,SCH-002BTI,2008-03-31,8668.35,2600.51,1238.34,31.0


# DIARIA

In [97]:
# Delete the rows with have 'PLAN' in the COMPLETION_NAME column
df_diaria = df_diaria[~df_diaria['COMPLETION_NAME'].str.contains('PLAN')]
df_diaria.head()

Unnamed: 0,COMPLETION_NAME,PROD_DATE,HORES_ON,RATE_OIL,RATE_GAS,RATE_WAT
0,SCH-002BTI,2023-03-21,0.0,0.0,0.0,0.0
1,SCH-002BTI,2023-03-22,0.0,0.0,0.0,0.0
2,SCH-002BTI,2023-03-23,0.0,0.0,0.0,0.0
3,SCH-002BTI,2023-03-24,0.0,0.0,0.0,0.0
4,SCH-002BTI,2023-03-25,0.0,0.0,0.0,0.0


In [98]:
# rename columns
df_diaria.rename(columns={'COMPLETION_NAME': 'UNIQUEID', 'PROD_DATE': 'Date',
                        'RATE_OIL': 'OILP', 'RATE_WAT': 'WATP', 'RATE_GAS': 'GASP'}, inplace=True)
df_diaria.head()

Unnamed: 0,UNIQUEID,Date,HORES_ON,OILP,GASP,WATP
0,SCH-002BTI,2023-03-21,0.0,0.0,0.0,0.0
1,SCH-002BTI,2023-03-22,0.0,0.0,0.0,0.0
2,SCH-002BTI,2023-03-23,0.0,0.0,0.0,0.0
3,SCH-002BTI,2023-03-24,0.0,0.0,0.0,0.0
4,SCH-002BTI,2023-03-25,0.0,0.0,0.0,0.0


In [99]:
df_diaria['Date'].dt.year.unique()

array([2023, 2016, 2017, 2018, 2019, 2020, 2021, 2022], dtype=int64)

# INYECCION

In [100]:
df_iny_diaria.head()

Unnamed: 0,COMPLETION_NAME,PROD_DATE,HORES_ON,RATE_WAT_INJ,WELL_INJ_PRESSURE,PUMP_PRESS,TEMPERATURE,OIL_IN_WATER,SUSPENDED_SOLIDS,OXYGEN,...,SKIMER_TURBIDITY,SKIMER_OXYGEN,BOOSTER_SUSPENDED_SOLIDS,BOOSTER_OIL_IN_WATER,BOOSTER_TURBIDITY,BOOSTER_OXYGEN,BOOSTER_SALINITY,BOOSTER_SLOPE,BOOSTER_PLUGGING_INDEX,COMMENTS
0,SCH-005IT,2009-03-01,24.0,0.0,,,,,,,...,,,,,,,,,,
1,SCH-005IT,2009-03-02,24.0,0.0,,,,,,,...,,,,,,,,,,
2,SCH-005IT,2009-03-03,24.0,0.0,,,,,,,...,,,,,,,,,,
3,SCH-005IT,2009-03-04,24.0,0.0,,,,,,,...,,,,,,,,,,
4,SCH-005IT,2009-03-05,24.0,0.0,,,,,,,...,,,,,,,,,,


In [101]:
df_iny_diaria['PROD_DATE'].dt.year.unique()

array([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
       2020, 2021, 2022, 2023], dtype=int64)

In [102]:
df_iny_diaria.columns

Index(['COMPLETION_NAME', 'PROD_DATE', 'HORES_ON', 'RATE_WAT_INJ',
       'WELL_INJ_PRESSURE', 'PUMP_PRESS', 'TEMPERATURE', 'OIL_IN_WATER',
       'SUSPENDED_SOLIDS', 'OXYGEN', 'SALINITY', 'PLUGGING_INDEX', 'TURBIDITY',
       'SLOPE', 'WTK_SUSPENDED_SOLIDS', 'WTK_OIL_IN_WATER', 'WTK_TURBIDITY',
       'WTK_OXYGEN', 'SKIMER_SUSPENDED_SOLIDS', 'SKIMER_OIL_IN_WATER',
       'SKIMER_TURBIDITY', 'SKIMER_OXYGEN', 'BOOSTER_SUSPENDED_SOLIDS',
       'BOOSTER_OIL_IN_WATER', 'BOOSTER_TURBIDITY', 'BOOSTER_OXYGEN',
       'BOOSTER_SALINITY', 'BOOSTER_SLOPE', 'BOOSTER_PLUGGING_INDEX',
       'COMMENTS'],
      dtype='object')

In [103]:
# rename columns
df_iny_diaria.rename(columns={'COMPLETION_NAME': 'UNIQUEID', 'PROD_DATE': 'Date',
                            'RATE_WAT_INJ': 'WATER_INJ_VOL', 'PUMP_PRESS': 'WHP'}, inplace=True)
df_iny_diaria = df_iny_diaria[['UNIQUEID', 'Date', 'WATER_INJ_VOL', 'HORES_ON', 'WHP']]
df_iny_diaria['DAYS'] = df_iny_diaria['HORES_ON']/24        # convert hours to days

df_iny_diaria.head()

Unnamed: 0,UNIQUEID,Date,WATER_INJ_VOL,HORES_ON,WHP,DAYS
0,SCH-005IT,2009-03-01,0.0,24.0,,1.0
1,SCH-005IT,2009-03-02,0.0,24.0,,1.0
2,SCH-005IT,2009-03-03,0.0,24.0,,1.0
3,SCH-005IT,2009-03-04,0.0,24.0,,1.0
4,SCH-005IT,2009-03-05,0.0,24.0,,1.0


In [104]:
df_iny_diaria['Date'].dt.year.unique()

array([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
       2020, 2021, 2022, 2023], dtype=int64)

In [105]:
# Group by month and sum the production
df_iny_diaria['Month'] = df_iny_diaria.groupby(['UNIQUEID', df_iny_diaria['Date'].dt.to_period('M')])['Date'].transform('min')
df_iny_month = df_iny_diaria.groupby(['UNIQUEID', 'Month'])[['WATER_INJ_VOL', 'DAYS', 'WHP']].sum().reset_index()

# rename column
df_iny_month.rename(columns={'Month': 'Date'}, inplace=True)

#create a new column INJ_TYPE
df_iny_month['INJ_TYPE'] = 'water'

df_iny_month = df_iny_month[['UNIQUEID', 'Date', 'INJ_TYPE', 'WATER_INJ_VOL', 'DAYS', 'WHP']]

df_iny_month.head()

Unnamed: 0,UNIQUEID,Date,INJ_TYPE,WATER_INJ_VOL,DAYS,WHP
0,SCH-005IT,2009-03-01,water,0.0,31.0,0.0
1,SCH-005IT,2009-07-01,water,0.0,31.0,0.0
2,SCH-005IT,2009-09-01,water,0.0,30.0,0.0
3,SCH-005IT,2009-11-01,water,128971.0,30.0,0.0
4,SCH-005IT,2009-12-01,water,134078.0,31.0,0.0


# EVENTOS - PERFORADOS

## Notas

In [106]:
# Import the Excel file
excel_file_eventos = r'C:\Users\brand\OneDrive\Documents\Maestría EOR\Tesis EOR\Info EPPEC\2018_INFORMACION POZOS SACHA-EVENTOS -INTERVALOS_PAM.xlsx'

# Read the Excel file into a DataFrame
df_eventos = pd.read_excel(excel_file_eventos, sheet_name='POZOS EVENTOS', skiprows=6)

df_eventos.head()

Unnamed: 0,BLOQUE,ACTIVO,POZO,EVENTO,COD EVENTO,TRABAJO,FECHA INICIO,FECHA FIN
0,B60,SA,SCHA-001,DEV DRILLING,DRL,DRL,1969-01-21,1969-02-18 00:00:00
1,B60,SA,SCHA-001,DEV COMPLETION,COM,COM,1969-02-22,1969-02-25 00:00:00
2,B60,SA,SCH-002,DEV DRILLING,DRL,DRL,1969-07-21,1969-08-22 00:00:00
3,B60,SA,SCH-002,DEV COMPLETION,COM,COM,1969-08-29,1969-08-31 00:00:00
4,B60,SA,SCH-003,DEV DRILLING,DRL,DRL,1969-09-04,1969-10-03 00:00:00


In [107]:
# Put in the desired format
df_eventos = df_eventos[['POZO', 'FECHA FIN', 'TRABAJO', 'EVENTO']]
df_eventos.rename(columns={'POZO': 'IDENTIFICADOR', 'FECHA FIN': 'FECHA',
                        'TRABAJO': 'CATEGORÍA', 'EVENTO': 'DATO'}, inplace=True)
df_eventos.head()

Unnamed: 0,IDENTIFICADOR,FECHA,CATEGORÍA,DATO
0,SCHA-001,1969-02-18 00:00:00,DRL,DEV DRILLING
1,SCHA-001,1969-02-25 00:00:00,COM,DEV COMPLETION
2,SCH-002,1969-08-22 00:00:00,DRL,DEV DRILLING
3,SCH-002,1969-08-31 00:00:00,COM,DEV COMPLETION
4,SCH-003,1969-10-03 00:00:00,DRL,DEV DRILLING


## Perforados

In [108]:
# Import the Excel file
excel_file_perf = r'C:\Users\brand\OneDrive\Documents\Maestría EOR\Tesis EOR\Info EPPEC\NUEVA INFORMACIÓN 06112023\PRF-SA-RPR-231108-INTERVALOS SACHA.xlsx'

# Read the Excel file into a DataFrame
df_perf = pd.read_excel(excel_file_perf, sheet_name='INTERVALOS CAÑONEADOS', skiprows=13)

df_perf.head()

Unnamed: 0,BLOQUE,CAMPO,ACTIVO,NOMBRE LARGO,POZO,WELLBORE,FECHA CAÑONEO,Top MD (ft),Btm MD (ft),RAZON,ESTATUS,ARENA
0,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9730.0,9737.0,PRODUCTION,OPEN,"ARENA ""T"""
1,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9954.0,9982.0,PRODUCTION,OPEN,HOLLIN INFERIOR
2,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9740.0,9746.0,PRODUCTION,OPEN,"ARENA ""T"""
3,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9935.0,9939.0,PRODUCTION,OPEN,HOLLIN INFERIOR
4,B60,SACHA,SA,SACHA-003,SCH-003,SCH-003,1969-10-08,9940.0,9952.0,PRODUCTION,OPEN,HOLLIN INFERIOR


In [109]:
# Put in the desired format
# TODO: check if the columns are correct and their names
df_perf = df_perf[['POZO', 'FECHA CAÑONEO', 'Top MD (ft)', 'Btm MD (ft)', 'RAZON']].rename(
                columns={'POZO': 'IDENTIFICADOR', 'FECHA CAÑONEO': 'FECHA',
                        'Top MD (ft)': 'TOPE', 'Btm MD (ft)': 'BASE', 'RAZON': 'OBSERVACIONES'})
df_perf.head()

Unnamed: 0,IDENTIFICADOR,FECHA,TOPE,BASE,OBSERVACIONES
0,SCH-003,1969-10-08,9730.0,9737.0,PRODUCTION
1,SCH-003,1969-10-08,9954.0,9982.0,PRODUCTION
2,SCH-003,1969-10-08,9740.0,9746.0,PRODUCTION
3,SCH-003,1969-10-08,9935.0,9939.0,PRODUCTION
4,SCH-003,1969-10-08,9940.0,9952.0,PRODUCTION


# INSTALACIONES

In [110]:
import pdfplumber
import re
from collections import namedtuple

In [111]:
# pdf_file = pdfplumber.open('2017_SCHP-188 WO# 08_ZS.pdf')

In [112]:
loc_colums = ['No', 'Jts', 'Tope MD', 'Tope TVD', 'Longitud', 'OD Nom', 'Descripción']

In [113]:
# with pdfplumber.open('2017_SCHP-188 WO# 08_ZS.pdf') as pdf:
#     data = pdf.pages[0].extract_tables()
#     # data = data[0][1:]
# data

# PRUEBAS DE PRODUCCIÓN

In [114]:
df_production_test.head()

Unnamed: 0,PTYPE,COMPLETION_LEGAL_NAME,TEST_DATE,T_HRS,TEST_OIL_24,TEST_GAS_24,TEST_WAT_24,GRAV_OIL API,BSW,CASING_PRESS,...,REMARKS,TEST_PURPOSE,INYECTION_PSI,PLANT_PSI,INJECTION_VOL,INJECTION_API,RETURN_VOL,RETURN_BSW,RETURN_API,VOL_REAL
0,COMP,SCH-002BTI,2015-01-02,5.0,110.0,0.0,2.0,27.4,1.786,0.0,...,Realizar prueba de 12 horas.,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
1,COMP,SCH-002BTI,2015-01-05,5.0,145.0,0.0,3.0,27.4,2.027,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
2,COMP,SCH-002BTI,2015-01-15,5.0,145.0,0.0,3.0,27.4,2.027,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
3,COMP,SCH-002BTI,2015-01-20,5.0,122.0,0.0,3.0,27.4,2.4,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
4,COMP,SCH-002BTI,2015-01-25,5.0,120.0,0.0,2.0,27.4,1.639,0.0,...,,A,0.0,0.0,0.0,0.0,0.0,,0.0,0.0


In [115]:
df_production_test.columns

Index(['PTYPE', 'COMPLETION_LEGAL_NAME', 'TEST_DATE', 'T_HRS', 'TEST_OIL_24',
       'TEST_GAS_24', 'TEST_WAT_24', 'GRAV_OIL API', 'BSW', 'CASING_PRESS',
       'PBHP', 'TUBING_PRESS', 'PIP', 'FLAP', 'SBHP', 'GAS_SP_GRAVITIY',
       'TUBING_TMP', 'PI', 'AMPS_A', 'PUMP_TMP', 'MOTOR_HZ', 'POWER_KW',
       'STAGE_COUNT', 'VOLTAGE', 'INTAKE_DEPTH', 'PUMP_TYPE', 'SALINITY',
       'REMARKS', 'TEST_PURPOSE', 'INYECTION_PSI', 'PLANT_PSI',
       'INJECTION_VOL', 'INJECTION_API', 'RETURN_VOL', 'RETURN_BSW',
       'RETURN_API', 'VOL_REAL'],
      dtype='object')

In [116]:

# TODO: check the columns we need and their names

# select the columns we need
df_production_test = df_production_test[['COMPLETION_LEGAL_NAME', 'TEST_DATE', 'TEST_OIL_24', 'TEST_WAT_24', 'BSW', 'T_HRS',
                    'TEST_GAS_24', 'CASING_PRESS', 'INTAKE_DEPTH', 'MOTOR_HZ', 'PIP', 'AMPS_A', 'PUMP_TYPE', 'GRAV_OIL API']]

# rename columns
df_production_test.rename(columns={'COMPLETION_LEGAL_NAME': 'UNIQUEID', 'TEST_DATE': 'DATE',
                        'TEST_OIL_24': 'OIL_FLOW', 'TEST_WAT_24': 'WATER_FLOW', 'BSW': 'BSW', 'T_HRS': 'DURATION',
                        'TEST_GAS_24': 'GAS_FLOW', 'CASING_PRESS': 'CHP', 'INTAKE_DEPTH': 'PUMP_DEPTH', 'MOTOR_HZ': 'Frecuency',
                        'PIP': 'PIP', 'AMPS_A': 'Amperage', 'ALS': 'PUMP_TYPE', 'GRAV_OIL API': 'OIL_GRAVITY'}, inplace=True)

df_production_test.head()

Unnamed: 0,UNIQUEID,DATE,OIL_FLOW,WATER_FLOW,BSW,DURATION,GAS_FLOW,CHP,PUMP_DEPTH,Frecuency,PIP,Amperage,PUMP_TYPE,OIL_GRAVITY
0,SCH-002BTI,2015-01-02,110.0,2.0,1.786,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
1,SCH-002BTI,2015-01-05,145.0,3.0,2.027,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
2,SCH-002BTI,2015-01-15,145.0,3.0,2.027,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
3,SCH-002BTI,2015-01-20,122.0,3.0,2.4,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4
4,SCH-002BTI,2015-01-25,120.0,2.0,1.639,5.0,0.0,0.0,,0.0,0.0,,P23 SSD,27.4


In [117]:
df_production_test['UNIQUEID'].nunique()

258

# SURVEYS

In [118]:
#  Define a function to read the survey data
def read_survey(file):
    # Initialize a variable to store the well name
    well_name = None
    
    # Open the file and extract the well name
    with open(file, 'r') as f:
        for line in f:
            if "WELL NAME:" in line:
                well_name = line.split("WELL NAME:")[1].strip()
                break

    # Initialize a variable to count header rows
    header_rows = 0

    # Open the file and determine how many header rows there are
    with open(file, 'r') as f:
        for line in f:
            if line.startswith("#"):
                header_rows += 1
            else:
                break

    # Read the data from the file, skipping the header rows
    data = pd.read_csv(file, sep='\s+', skiprows=header_rows)

    # Add a 'well_name' column with the extracted well name
    data['well_name'] = well_name

    return data.iloc[1:]  # Return the data without the first 2 rows


In [119]:
# Define the path to the directory containing survey information
path_surveys = r'C:\Users\brand\OneDrive\Documents\Maestría EOR\Tesis EOR\Info EPPEC\\4. Información Surveys'

# List the files in the directory
files = os.listdir(path_surveys)

print(files)

['S-1.las', 'S-10.las', 'S-100.las', 'S-101.las', 'S-102.las', 'S-103.las', 'S-104.las', 'S-105.las', 'S-106.las', 'S-107.las', 'S-108.las', 'S-109.las', 'S-11.las', 'S-110.las', 'S-111.las', 'S-112.las', 'S-113.las', 'S-114.las', 'S-115.las', 'S-116.las', 'S-117.las', 'S-118.las', 'S-119.las', 'S-12.las', 'S-120.las', 'S-121.las', 'S-122.las', 'S-123.las', 'S-124.las', 'S-125.las', 'S-126.las', 'S-127.las', 'S-128.las', 'S-129.las', 'S-13.las', 'S-130.las', 'S-131.las', 'S-132.las', 'S-133.las', 'S-134.las', 'S-135.las', 'S-136.las', 'S-137.las', 'S-138.las', 'S-139.las', 'S-14.las', 'S-140D.las', 'S-141.las', 'S-142.las', 'S-143.las', 'S-144.las', 'S-145.las', 'S-146.las', 'S-147D.las', 'S-148.las', 'S-148ST.las', 'S-149.las', 'S-15.las', 'S-150D.las', 'S-151D.las', 'S-152D.las', 'S-153.las', 'S-154D.las', 'S-155D.las', 'S-156.las', 'S-157.las', 'S-158.las', 'S-159.las', 'S-16.las', 'S-160D.las', 'S-161.las', 'S-162.las', 'S-163D.las', 'S-164D.las', 'S-165D.las', 'S-166D.las', 'S-167

In [120]:
# Create an empty list to store the DataFrames
survey_data = []

# Loop through the files and read each survey data file
for file in files:
    survey_data.append(read_survey(os.path.join(path_surveys, file)))

# Concatenate the DataFrames into a single DataFrame
df_surveys = pd.concat(survey_data)

df_surveys.head()

Unnamed: 0,MD,X,Y,Z,TVD,DX,DY,AZIM,INCL,DLS,well_name
1,-0.0,290807.1683,9963504.0,883.084,-0.0,0.0,0.0,0.0,0.0,0.0,S-1
2,10158.5,290807.1683,9963504.0,-9275.416,10158.5,0.0,0.0,0.0,0.0,0.0,S-1
1,-0.0,294593.0,9967797.0,897.00006,-0.0,0.0,0.0,0.0,0.0,0.0,S-10
2,9964.0,294593.0,9967797.0,-9066.99994,9964.0,0.0,0.0,0.0,0.0,0.0,S-10
1,-0.0,290696.9883,9959794.0,884.2,-0.0,0.0,0.0,0.0,0.0,0.0,S-100


In [121]:
# change the name of the columns to Wellbore	Md	TVD	XDelt	YDelt
df_surveys.rename(columns={'well_name':'Wellbore', 'MD':'Md', 'TVD':'TVD', 'DX':'XDelt', 'DY':'YDelt'}, inplace=True)
df_surveys = df_surveys[['Wellbore', 'Md', 'TVD', 'XDelt', 'YDelt']]
df_surveys.head()

Unnamed: 0,Wellbore,Md,TVD,XDelt,YDelt
1,S-1,-0.0,-0.0,0.0,0.0
2,S-1,10158.5,10158.5,0.0,0.0
1,S-10,-0.0,-0.0,0.0,0.0
2,S-10,9964.0,9964.0,0.0,0.0
1,S-100,-0.0,-0.0,0.0,0.0


In [122]:
# contar cuantas veces se repite cada pozo
surveys = df_surveys['Wellbore'].value_counts()

# SAVE FILTERED DATA

In [123]:
# Save dataframes to Excel file
with pd.ExcelWriter('Data_draft.xlsx') as writer:
    df_wells.to_excel(writer, sheet_name='Pozos', index=False)
    df_surveys.to_excel(writer, sheet_name='Trayectorias', index=False)
    df_mensual_Ui_Ti.to_excel(writer, sheet_name='Producción Mensual', index=False)
    df_iny_month.to_excel(writer, sheet_name='Inyección Mensual OFM', index=False)
    df_perf.to_excel(writer, sheet_name='Perforados', index=False)
    df_eventos.to_excel(writer, sheet_name='Notas', index=False)
    df_production_test.to_excel(writer, sheet_name='Pruebas de Producción', index=False)
    surveys.to_excel(writer, sheet_name='Surveys', index=True)

# ACUMULADOS

In [124]:
# amount of unique wells
unique_wells = list(merged_df['WELL_LEGAL_NAME'].unique())

In [125]:
def Accum_by_sand(sand):
    # Crear una lista para almacenar los resultados
    results = []

    # Iterar a través de los nombres de pozos en unique_wells
    for well_name in unique_wells:
        # Filtrar el DataFrame por 'OW_WELL_PROD_ZONE' igual a 'Lower T Sandstone'
        filtered_df = merged_df[(merged_df['WELL_LEGAL_NAME'] == well_name) & (merged_df['OW_WELL_PROD_ZONE'] == sand)]
        
        # Calcular la suma de 'OILP' para el pozo actual
        accum_oil = filtered_df['OILP'].sum()
        
        # Agregar el resultado a la lista de resultados
        results.append([well_name, accum_oil])

    # Crear un DataFrame a partir de la lista de resultados
    result_df = pd.DataFrame(results, columns=['Well_name', 'Accum_oil'])

    return result_df

In [126]:
Accum_T_Low = Accum_by_sand('Lower T Sandstone')
Accum_T_Low.head()

Unnamed: 0,Well_name,Accum_oil
0,SCH-002B,610218.002
1,SCH-002,0.0
2,SCH-003,2380627.342
3,SCH-005,1682342.913
4,SCH-006,78869.256


In [127]:
Accum_U_Low = Accum_by_sand('Lower U Sandstone')
Accum_U_Low.head()

Unnamed: 0,Well_name,Accum_oil
0,SCH-002B,231683.19
1,SCH-002,237197.0
2,SCH-003,99501.658
3,SCH-005,2626937.087
4,SCH-006,4433836.998
