# Midland Basin Well Data

## Import dependencies and load data

In [2]:
import pandas as pd
import numpy as np

In [3]:
# CSV Path
path = "Resources\Dist_Test_Data.csv"

In [4]:
# Load the data as a dataframe
df = pd.read_csv(path)
df.head()

Unnamed: 0,well id,Lease Name,Well Number,Operator compnay,County,Landing Zone,Surf Lat,Surf Long,BH Lat,BH Long,...,Date Completed,Total Fluid (bbl),Total Proppant (lb),"Well Spacing, ft",Well Name,Fluid (bbl/ft),Prop (lb/ft),Avg PPG,"Oil EUR, Mbbl","Gas EUR, MMcf"
0,42329400000000.0,FR UNOBTANIUM 1411,2BB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.87056,-101.825582,31.904397,-101.837609,...,9/12/2018,24699748,24188000,,FR UNOBTANIUM 1411 2BB,48,1988,0.98,742.0,1464.0
1,42329400000000.0,FR UNOBTANIUM 1411 CC,6HB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.870806,-101.824485,31.90517,-101.834592,...,10/1/2018,25690596,24352000,,FR UNOBTANIUM 1411 CC 6HB,50,1993,0.95,605.0,2326.0
2,42329400000000.0,FR UNOBTANIUM 1411 F,12HB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.871761,-101.820501,31.90584,-101.830409,...,12/17/2019,25478032,24250000,,FR UNOBTANIUM 1411 F 12HB,51,2027,0.95,310.0,3027.0
3,42329400000000.0,FR UNOBTANIUM 1411 C,5BB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.870785,-101.824579,31.904779,-101.835542,...,9/25/2018,24923052,23490000,,FR UNOBTANIUM 1411 C 5BB,49,1935,0.94,599.0,1738.0
4,42329400000000.0,FR UNOBTANIUM 1411 D,9HB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.871256,-101.822479,31.905549,-101.83283,...,12/29/2019,23893552,24702000,,FR UNOBTANIUM 1411 D 9HB,47,2052,1.03,,


In [5]:
# Summarize initial dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 22 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   well id              12 non-null     float64
 1   Lease Name           12 non-null     object 
 2   Well Number          12 non-null     object 
 3   Operator compnay     12 non-null     object 
 4   County               12 non-null     object 
 5   Landing Zone         12 non-null     object 
 6   Surf Lat             12 non-null     float64
 7   Surf Long            12 non-null     float64
 8   BH Lat               12 non-null     float64
 9   BH Long              12 non-null     float64
 10  Date Spud            12 non-null     object 
 11  Lateral Len (ft)     12 non-null     int64  
 12  Date Completed       12 non-null     object 
 13  Total Fluid (bbl)    12 non-null     int64  
 14  Total Proppant (lb)  12 non-null     int64  
 15  Well Spacing, ft     0 non-null      float

## Data Cleaning

In [6]:
# Drop the columns where all values are null
df = df[["well id","Surf Lat",'Surf Long',"BH Lat","BH Long"]]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   well id    12 non-null     float64
 1   Surf Lat   12 non-null     float64
 2   Surf Long  12 non-null     float64
 3   BH Lat     12 non-null     float64
 4   BH Long    12 non-null     float64
dtypes: float64(5)
memory usage: 608.0 bytes


In [7]:
df["Mid Point Lat"] = (df["Surf Lat"] + df["BH Lat"])/2
df["Mid Point Long"] = (df["Surf Long"] + df["BH Long"])/2
df.head()

Unnamed: 0,well id,Surf Lat,Surf Long,BH Lat,BH Long,Mid Point Lat,Mid Point Long
0,42329400000000.0,31.87056,-101.825582,31.904397,-101.837609,31.887478,-101.831595
1,42329400000000.0,31.870806,-101.824485,31.90517,-101.834592,31.887988,-101.829538
2,42329400000000.0,31.871761,-101.820501,31.90584,-101.830409,31.8888,-101.825455
3,42329400000000.0,31.870785,-101.824579,31.904779,-101.835542,31.887782,-101.83006
4,42329400000000.0,31.871256,-101.822479,31.905549,-101.83283,31.888402,-101.827655


In [44]:
well_dist_df = pd.DataFrame(columns=['Distance'])
well_dist_df.astype(float)
#for index,rows in df.iterrows():
lat1 = df.at[0,'Mid Point Lat']
lon1 = df.at[0,'Mid Point Long']
well_dist_df['Distance'] = calc_dist(lat1,lon1,df['Mid Point Lat'],df['Mid Point Long'])
well_dist_df.sort_values(by='Distance',inplace=True,ignore_index=True)
left_dist = well_dist_df['Distance'][1]
right_dist = well_dist_df['Distance'][2]
final_dist = (left_dist + right_dist)/2
print(final_dist)

91.01872086540143


In [14]:
def calc_dist(lon1, lat1,lon2,lat2):
    
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    newlon = lon2 - lon1
    newlat = lat2 - lat1

    haver_formula = np.sin(newlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(newlon/2.0)**2

    dist = 2 * np.arcsin(np.sqrt(haver_formula ))
    ft = 3958 * 5280 * dist # 6367 for distance in KM for miles use 3958
    return ft