# Midland Basin Well Data

## Import dependencies and load data

In [1]:
import pandas as pd
import numpy as np

In [2]:
# CSV Path
path = "Resources\Dist_Test_Data.csv"

In [28]:
# Load the data as a dataframe
df = pd.read_csv(path)
df.head()

Unnamed: 0,well id,Lease Name,Well Number,Operator compnay,County,Landing Zone,Surf Lat,Surf Long,BH Lat,BH Long,...,Date Completed,Total Fluid (bbl),Total Proppant (lb),"Well Spacing, ft",Well Name,Fluid (bbl/ft),Prop (lb/ft),Avg PPG,"Oil EUR, Mbbl","Gas EUR, MMcf"
0,42317400000000.0,SOUTH CURTIS RANCH 3409DP,9H,OCCIDENTAL PERMIAN LTD,MARTIN,SPBYL SH,32.098414,-102.117808,32.068754,-102.10791,...,,,,,SOUTH CURTIS RANCH 3409DP 9H,,,,,
1,42317400000000.0,SOUTH CURTIS RANCH 3401LP,1H,OCCIDENTAL PERMIAN LTD,MARTIN,SPBYL SH,32.094933,-102.129108,32.065761,-102.120891,...,,,,,SOUTH CURTIS RANCH 3401LP 1H,,,,,
2,42317400000000.0,SOUTH CURTIS RANCH 3409LP,9H,OCCIDENTAL PERMIAN LTD,MARTIN,SPBYL SH,32.096303,-102.123281,32.068087,-102.110897,...,,,,,SOUTH CURTIS RANCH 3409LP 9H,,,,,
3,42317400000000.0,SOUTH CURTIS RANCH 3401AP,1H,OCCIDENTAL PERMIAN LTD,MARTIN,WCMP A,32.095358,-102.127289,32.065879,-102.120283,...,,15292692.0,18797000.0,,SOUTH CURTIS RANCH 3401AP 1H,,,,,
4,42317400000000.0,SOUTH CURTIS RANCH 3403SP,3H,OCCIDENTAL PERMIAN LTD,MARTIN,SPBYL SD,32.095847,-102.125217,32.066857,-102.116449,...,,,,,SOUTH CURTIS RANCH 3403SP 3H,,,,,


In [4]:
# Summarize initial dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 22 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   well id              20 non-null     float64
 1   Lease Name           20 non-null     object 
 2   Well Number          20 non-null     object 
 3   Operator compnay     20 non-null     object 
 4   County               20 non-null     object 
 5   Landing Zone         20 non-null     object 
 6   Surf Lat             20 non-null     float64
 7   Surf Long            20 non-null     float64
 8   BH Lat               20 non-null     float64
 9   BH Long              20 non-null     float64
 10  Date Spud            20 non-null     object 
 11  Lateral Len (ft)     0 non-null      float64
 12  Date Completed       0 non-null      float64
 13  Total Fluid (bbl)    4 non-null      float64
 14  Total Proppant (lb)  4 non-null      float64
 15  Well Spacing, ft     0 non-null      float

## Data Cleaning

In [5]:
# Drop the columns where all values are null
sel_columns = df[["Well Name","Landing Zone","Surf Lat",'Surf Long',"BH Lat","BH Long"]]
dist_df = sel_columns.copy()

## Main Code

In [6]:
dist_df["Mid Point Lat"] = (dist_df["Surf Lat"] + dist_df["BH Lat"])/2
dist_df["Mid Point Long"] = (dist_df["Surf Long"] + dist_df["BH Long"])/2
dist_df["Left Well"] = ""
dist_df["Right Well"] = ""
dist_df["Left Distance"] = 0
dist_df["Right Distance"] = 0
dist_df["Final Distance"] = 0
dist_df.head()

Unnamed: 0,Well Name,Landing Zone,Surf Lat,Surf Long,BH Lat,BH Long,Mid Point Lat,Mid Point Long
0,SOUTH CURTIS RANCH 3409DP 9H,SPBYL SH,32.098414,-102.117808,32.068754,-102.10791,32.083584,-102.112859
1,SOUTH CURTIS RANCH 3401LP 1H,SPBYL SH,32.094933,-102.129108,32.065761,-102.120891,32.080347,-102.124999
2,SOUTH CURTIS RANCH 3409LP 9H,SPBYL SH,32.096303,-102.123281,32.068087,-102.110897,32.082195,-102.117089
3,SOUTH CURTIS RANCH 3401AP 1H,WCMP A,32.095358,-102.127289,32.065879,-102.120283,32.080619,-102.123786
4,SOUTH CURTIS RANCH 3403SP 3H,SPBYL SD,32.095847,-102.125217,32.066857,-102.116449,32.081352,-102.120833


In [33]:
well_dist_df = pd.DataFrame(columns=['Distance','Well Name'])
well_dist_df['Distance'].astype(float)
for index,rows in dist_df.iterrows():
    # Current well lat, long and landing zone
    lat1 = dist_df.at[index,'BH Lat']
    lon1 = dist_df.at[index,'BH Long']
    lz = dist_df.at[index,'Landing Zone']
    
    # print(index,lz)
    
    # Filter dataframe only for same landing zone
    filt_df = dist_df[dist_df['Landing Zone'] == lz]
    filt_df.reset_index(inplace=True)
    
    #print(filt_df)
    
    # Dataframe will hold all distances calculated
    well_dist_df['Well Name'] = filt_df['Well Name'].copy()
    well_dist_df['Distance'] = calc_dist(lon1,lat1,filt_df['BH Long'],filt_df['BH Lat']) # Calculate Distance from func
    
    # print(well_dist_df)
    
    # Sort distances ascending
    well_dist_df.sort_values(by='Distance',inplace=True,ignore_index=True)
    
    # Select closest wells
    for i,dist in well_dist_df.iterrows():
        temp_dist = well_dist_df.at[i,'Distance']
        current_well = well_dist_df.at[0,'Well Name']
        # assuming closest wells are going to be >250 ft
        if temp_dist>200:
            left_dist = temp_dist
            if (pd.isnull(left_dist) or left_dist > 2000):
                left_dist = 2000
            left_well = well_dist_df.at[i,'Well Name']
            right_dist = well_dist_df.at[i+1,'Distance']
            if (pd.isnull(right_dist) or right_dist > 2000):
                right_dist = 2000
            right_well = well_dist_df.at[i+1,'Well Name']
            break
    final_dist = (left_dist + right_dist)/2
    dist_df.loc[index,"Left Well"] = left_well
    dist_df.loc[index,"Right Well"] = right_well
    dist_df.loc[index,"Left Distance"] = left_dist
    dist_df.loc[index,"Right Distance"] = right_dist
    dist_df.loc[index,"Final Distance"] = final_dist
dist_df.head(15            )

Unnamed: 0,Well Name,Landing Zone,Surf Lat,Surf Long,BH Lat,BH Long,Mid Point Lat,Mid Point Long,Left Well,Right Well,Left Distance,Right Distance,Final Distance
0,SOUTH CURTIS RANCH 3409DP 9H,SPBYL SH,32.098414,-102.117808,32.068754,-102.10791,32.083584,-102.112859,SOUTH CURTIS RANCH 3413LP 13H,SOUTH CURTIS RANCH 3409LP 9H,650.277268,954.949554,802.613411
1,SOUTH CURTIS RANCH 3401LP 1H,SPBYL SH,32.094933,-102.129108,32.065761,-102.120891,32.080347,-102.124999,SOUTH CURTIS RANCH 3403DP 3H,SOUTH CURTIS RANCH 3405LP 5H,647.744874,1654.504533,1151.124703
2,SOUTH CURTIS RANCH 3409LP 9H,SPBYL SH,32.096303,-102.123281,32.068087,-102.110897,32.082195,-102.117089,SOUTH CURTIS RANCH 3407LP 7H,SOUTH CURTIS RANCH 3405DP 5H,694.699589,931.62879,813.16419
3,SOUTH CURTIS RANCH 3401AP 1H,WCMP A,32.095358,-102.127289,32.065879,-102.120283,32.080619,-102.123786,SOUTH CURTIS RANCH 3403AP 3H,SOUTH CURTIS RANCH 3407AP 7H,1164.976351,2000.0,1582.488175
4,SOUTH CURTIS RANCH 3403SP 3H,SPBYL SD,32.095847,-102.125217,32.066857,-102.116449,32.081352,-102.120833,SOUTH CURTIS RANCH 3403LP 3H,SOUTH CURTIS RANCH 3401SP 1H,637.609983,1148.013482,892.811733
5,SOUTH CURTIS RANCH 3407LP 7H,SPBYL SH,32.097974,-102.119695,32.067581,-102.113064,32.082777,-102.11638,SOUTH CURTIS RANCH 3409LP 9H,SOUTH CURTIS RANCH 3405LP 5H,694.699589,854.523996,774.611793
6,SOUTH CURTIS RANCH 3405SP 5H,SPBYU SD,32.098003,-102.119569,32.067539,-102.113569,32.082771,-102.116569,SOUTH CURTIS RANCH 3401MP 1H,,1348.210271,2000.0,1674.105135
7,SOUTH CURTIS RANCH 3407AP 7H,WCMP A,32.098386,-102.117933,32.068212,-102.110354,32.083299,-102.114144,SOUTH CURTIS RANCH 3409AP 9H,SOUTH CURTIS RANCH 3403AP 3H,1219.890066,2000.0,1609.945033
8,SOUTH CURTIS RANCH 3409AP 9H,WCMP A,32.098444,-102.117683,32.069134,-102.10656,32.083789,-102.112122,SOUTH CURTIS RANCH 3407AP 7H,SOUTH CURTIS RANCH 3403AP 3H,1219.890066,2000.0,1609.945033
9,SOUTH CURTIS RANCH 3409SP 9H,SPBYL SD,32.098297,-102.114792,32.069036,-102.106779,32.083667,-102.110785,SOUTH CURTIS RANCH 3403SP 3H,SOUTH CURTIS RANCH 3403LP 3H,2000.0,2000.0,2000.0


In [8]:
def calc_dist(lon1, lat1,lon2,lat2):
    
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    newlon = lon2 - lon1
    newlat = lat2 - lat1

    haver_formula = np.sin(newlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(newlon/2.0)**2

    dist = 2 * np.arcsin(np.sqrt(haver_formula ))
    ft = 3958 * 5280 * dist # 6367 for distance in KM for miles use 3958
    return ft