# EDA Datathon2021 by Softypo

In [5]:
# Importing module libraries
import pandas as pd
import numpy as np
import lasio
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import dataframe_utilities as dfutil
import lasio_utilities as lasutil

## Data loading

In [6]:
target_wells = pd.read_csv('consolidated_well_data_Softypo_ft.csv', index_col=0, usecols=[0, 11])

In [7]:
# las files
lasfiles = lasutil.loader_lasio_multiprocess('Data for Datathon\well_log_files\Clean_LAS', outdf=False)

Loading LAS files:   0%|          | 0/1498 [00:00<?, ?it/s]

## performing EDA on las files

In [8]:
# target wells
print ('target wells:', len(target_wells))
# total wells
print ('total wells:', len(lasfiles))
# number of Canadian wells
print ('Canadian wells:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.ctry.value in ('CANADA', 'Canada', 'CAN')]))
# number of US wells
print ('US wells:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.ctry.value in ('US', 'USA', 'UNITED STATES OF AMERICA')]), '\n')
# number of wells in ft
print ('wells in ft:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.step.unit in ('F', 'f', 'Ft', 'FT', 'ft')]))
# number of wells in m
print ('wells in m:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.step.unit in ('M', 'm', 'meters', 'Meters', 'METERS')]), '\n')
# number of Canadian wells in ft
print ('Canadian wells in ft:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.step.unit in ('F', 'f', 'Ft', 'FT', 'ft') and las.well.ctry.value in ('CANADA', 'Canada', 'CAN')]))
# number of US wells in ft
print ('US wells in ft:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.step.unit in ('F', 'f', 'Ft', 'FT', 'ft') and las.well.ctry.value in ('US', 'USA', 'UNITED STATES OF AMERICA')]), '\n')
# number of Canadian wells in m
print ('Canadian wells in m:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.step.unit in ('M', 'm', 'meters', 'Meters', 'METERS') and las.well.ctry.value in ('CANADA', 'Canada', 'CAN')]))
# number of US wells in m
print ('US wells in m:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if las.well.step.unit in ('M', 'm', 'meters', 'Meters', 'METERS') and las.well.ctry.value in ('US', 'USA', 'UNITED STATES OF AMERICA')]), '\n')
# raw files
print ('raw wells:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if n.endswith(('_w', '_W'))==False]))
# cleaned files
print ('cleaned wells:', len([[n, las.well.stop.unit, las.well.ctry.value] for n, las in lasfiles.items() if n.endswith(('_w', '_W'))]))

target wells: 821
total wells: 1498
Canadian wells: 1023
US wells: 475 

wells in ft: 1141
wells in m: 357 

Canadian wells in ft: 666
US wells in ft: 475 

Canadian wells in m: 357
US wells in m: 0 

raw wells: 835
cleaned wells: 663


## selecting las files

In [9]:
# creating a list of not to be included files

# raw files
drop_raw = [n for n, las in lasfiles.items() if n.endswith(('_w', '_W'))==False]
# cleaned files
drop_cleaned = [n for n, las in lasfiles.items() if n.endswith(('_w', '_W'))]

In [10]:
# cleaned only las files in target
las_wt = {n: las for n, las in (lasutil.loader_lasio_multiprocess('Data for Datathon\well_log_files\Clean_LAS', todrop=drop_raw, outlas=False, outdf=True)).items() if n in target_wells.index}

Loading LAS files:   0%|          | 0/1498 [00:00<?, ?it/s]

LAS to Pandas Dataframe:   0%|          | 0/663 [00:00<?, ?it/s]

In [11]:
# raw only las files in target
las_rt = {n: las for n, las in (lasutil.loader_lasio_multiprocess('Data for Datathon\well_log_files\Clean_LAS', todrop=drop_cleaned, outlas=False, outdf=True)).items() if n in target_wells.index}

Loading LAS files:   0%|          | 0/1498 [00:00<?, ?it/s]

LAS to Pandas Dataframe:   0%|          | 0/835 [00:00<?, ?it/s]

## m to ft conversion

In [12]:
# changing wells from m to ft
def m_to_ft(lasdfdic):
    for uwi, well in lasdfdic.items():
        if well[0] in ('M', 'm'):
            well[1].index = well[1].index*3.28084
            well[0] = 'FT'
            well[1].index.names = ['Depth_ft']
m_to_ft(las_wt)
m_to_ft(las_rt)

## consolidating las cleaned and raw las files

In [13]:
# adding wells raw and cleaned
lasdf = {uwi: well[1] for uwi, well in las_rt.items()}
lasdf.update({uwi: well[1] for uwi, well in las_wt.items()})

In [14]:
# final number of lass files
len(lasdf)

821

## filtering logs and cumputing its mean

In [30]:
# filtering and calculating logs mean
lasfiltered = lasutil.filter(lasdf, ('GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'), mean=True)

Searching:   0%|          | 0/821 [00:00<?, ?it/s]

Calculating mean:   0%|          | 0/819 [00:00<?, ?it/s]

In [31]:
# filered wells
len([print(n, df.columns) for n, df in lasfiltered.items()])

051309240000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051309360000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051309370000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051309430000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051309640000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051309880000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051309940000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051310100000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051310320000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051310330000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051310430000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051310480000 Index(['GR', 'ILD', 'RHO', 'NPHI', 'DT', 'PEF'], dtype='object')
42051310530000 Index(['GR', 'ILD', 'RHO', 

819

In [19]:
# sample well
lasfiltered['42041304690000']

Unnamed: 0_level_0,GR,ILD,RHO,NPHI,DT,PEF
DEPT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3500.0,61.1774,9.4672,2.2969,0.2977,95.6049,
3550.0,74.7407,6.1281,2.2646,0.3555,99.9970,
3600.0,72.4716,4.5043,2.1911,0.3709,102.3559,
3650.0,82.3895,4.2962,2.2835,0.3190,101.0276,
3700.0,52.8176,3.5596,2.1695,0.3748,103.8290,
...,...,...,...,...,...,...
8050.0,83.6835,3.2762,2.4479,0.3786,96.7125,
8100.0,92.1415,3.8812,2.2545,0.3818,98.3479,
8150.0,53.4992,,,,,
8200.0,52.6284,7.1496,,,,


In [41]:
def to_target(las, target):
    for uwi, welllogs in las.items():
        targetdepth = float(target['BHT_md_ft'].loc[target.index==uwi])
        if 'df' not in locals(): df = pd.DataFrame(target, columns=welllogs.columns)
        for logname in welllogs:
            targetvalue = welllogs[logname].iloc[(abs(welllogs[logname].index.values-targetdepth)).argsort()[:2]].mean()
            df[logname].loc[df.index==uwi] = targetvalue
    return df
well_logs = to_target(lasfiltered, target_wells)

In [42]:
well_logs

Unnamed: 0,GR,ILD,RHO,NPHI,DT,PEF
42021301990000,73.422600,9.77235,2.57615,0.185000,,
100102606420W500,48.028750,2.31870,,,122.82240,
100141705519W500,71.541450,6.64715,,,92.89870,
100141503621W400,46.398625,6.77445,,,85.88015,
100043406718W500,19.313100,22.53665,,,53.18885,
...,...,...,...,...,...,...
100101606423W500,126.556100,6.70570,2.04565,0.387500,80.21380,
42177309850000,10.848950,28.64850,,0.014200,,
100100805919W500,78.612650,13.41250,2.52680,0.243900,72.97880,
100110905226W400,60.096150,7.09175,,,65.25850,


## standarizing log units and computing VSHALE, SW, and PHIT

In [43]:
# standarazind logs units
well_logs.loc[(well_logs.RHO > 100), 'RHO'] = well_logs.loc[(well_logs.RHO > 100), 'RHO']/1000
well_logs.loc[(well_logs.DT > 150), 'DT'] = well_logs.loc[(well_logs.DT > 150), 'DT']*0.3048

In [44]:
# vshale
well_logs['VSHALE'] = (well_logs['GR']-10)/(150-10)
# density porosity
well_logs['PHIT_D'] = (2.71-well_logs['RHO'])/(2.71-1)
# sonic willie porosity
well_logs['PHIT_S'] = (well_logs['DT']-47.6)/(200-47.6)
#PHIT mean
well_logs['PHIT'] = well_logs.loc[:, ['PHIT_D', 'PHIT_S', 'NPHI']].mean(axis=1)
# water saturation archie
well_logs['SW'] = np.minimum(1, (((0.05)/(well_logs['PHIT']**2*well_logs['ILD']))**2))
# volumetric PEF
well_logs['U'] = well_logs['RHO']*well_logs['PEF']

In [45]:
well_logs.drop(columns=['PHIT_D', 'PHIT_S', 'PEF'], inplace=True)

In [46]:
well_logs

Unnamed: 0,GR,ILD,RHO,NPHI,DT,VSHALE,PHIT,SW,U
42021301990000,73.422600,9.77235,2.576150,0.185000,,0.453019,0.131637,0.087182,
100102606420W500,48.028750,2.31870,,,122.822400,0.271634,0.493585,0.007834,
100141705519W500,71.541450,6.64715,,,92.898700,0.439582,0.297236,0.007249,
100141503621W400,46.398625,6.77445,,,85.880150,0.259990,0.251182,0.013685,
100043406718W500,19.313100,22.53665,,,53.188850,0.066522,0.036672,1.000000,
...,...,...,...,...,...,...,...,...,...
100101606423W500,126.556100,6.70570,2.045650,0.387500,80.213800,0.832544,0.330003,0.004688,
42177309850000,10.848950,28.64850,,0.014200,,0.006064,0.014200,1.000000,
100100805919W500,78.612650,13.41250,2.526800,0.243900,72.978800,0.490090,0.172521,0.015688,
100110905226W400,60.096150,7.09175,,,65.258500,0.357830,0.115869,0.275777,


In [47]:
well_logs.to_csv('well_logs_Softypo.csv')