# Midland Basin Well Data

## Import dependencies and load data

In [68]:
import pandas as pd
import numpy as np

In [69]:
# CSV Path
path = "Resources\Dist_Test_Data.csv"

In [92]:
# Load the data as a dataframe
df = pd.read_csv(path)
df.head()


Unnamed: 0,well id,Lease Name,Well Number,Well Name,Operator compnay,County,Landing Zone,Surf Lat,Surf Long,BH Lat,...,Date Completed,Total Fluid (bbl),Total Proppant (lb),"Well Spacing, ft",Well Name.1,Fluid (bbl/ft),Prop (lb/ft),Avg PPG,"Oil EUR, Mbbl","Gas EUR, MMcf"
0,42300000000000.0,FR UNOBTANIUM 1411,2BB,FR UNOBTANIUM 1411 2BB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.87056,-101.825582,31.904397,...,9/12/2018,24699748,24188000,,FR UNOBTANIUM 1411 2BB,48,1988,0.98,742.0,1464.0
1,42300000000000.0,FR UNOBTANIUM 1411 CC,6HB,FR UNOBTANIUM 1411 CC 6HB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.870806,-101.824485,31.90517,...,10/1/2018,25690596,24352000,,FR UNOBTANIUM 1411 CC 6HB,50,1993,0.95,605.0,2326.0
2,42300000000000.0,FR UNOBTANIUM 1411 F,12HB,FR UNOBTANIUM 1411 F 12HB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.871761,-101.820501,31.90584,...,12/17/2019,25478032,24250000,,FR UNOBTANIUM 1411 F 12HB,51,2027,0.95,310.0,3027.0
3,42300000000000.0,FR UNOBTANIUM 1411 C,5BB,FR UNOBTANIUM 1411 C 5BB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.870785,-101.824579,31.904779,...,9/25/2018,24923052,23490000,,FR UNOBTANIUM 1411 C 5BB,49,1935,0.94,599.0,1738.0
4,42300000000000.0,FR UNOBTANIUM 1411 D,9HB,FR UNOBTANIUM 1411 D 9HB,CROWNQUEST OPERATING LLC,MIDLAND,WCMP B,31.871256,-101.822479,31.905549,...,12/29/2019,23893552,24702000,,FR UNOBTANIUM 1411 D 9HB,47,2052,1.03,,


In [94]:
# Summarize initial dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 23 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   well id              12 non-null     float64
 1   Lease Name           12 non-null     object 
 2   Well Number          12 non-null     object 
 3   Well Name            12 non-null     object 
 4   Operator compnay     12 non-null     object 
 5   County               12 non-null     object 
 6   Landing Zone         12 non-null     object 
 7   Surf Lat             12 non-null     float64
 8   Surf Long            12 non-null     float64
 9   BH Lat               12 non-null     float64
 10  BH Long              12 non-null     float64
 11  Date Spud            12 non-null     object 
 12  Lateral Len (ft)     12 non-null     int64  
 13  Date Completed       12 non-null     object 
 14  Total Fluid (bbl)    12 non-null     int64  
 15  Total Proppant (lb)  12 non-null     int64

## Data Cleaning

In [96]:
# Drop the columns where all values are null
sel_columns = df[["Well Name","Landing Zone","Surf Lat",'Surf Long',"BH Lat","BH Long"]]
dist_df = sel_columns.copy()

In [102]:
dist_df["Mid Point Lat"] = (dist_df["Surf Lat"] + dist_df["BH Lat"])/2
dist_df["Mid Point Long"] = (dist_df["Surf Long"] + dist_df["BH Long"])/2
dist_df.head()

Unnamed: 0,Well Name,Landing Zone,Surf Lat,Surf Long,BH Lat,BH Long,Mid Point Lat,Mid Point Long
0,FR UNOBTANIUM 1411 2BB,WCMP B,31.87056,-101.825582,31.904397,-101.837609,31.887478,-101.831595
1,FR UNOBTANIUM 1411 CC 6HB,WCMP B,31.870806,-101.824485,31.90517,-101.834592,31.887988,-101.829538
2,FR UNOBTANIUM 1411 F 12HB,WCMP B,31.871761,-101.820501,31.90584,-101.830409,31.8888,-101.825455
3,FR UNOBTANIUM 1411 C 5BB,WCMP B,31.870785,-101.824579,31.904779,-101.835542,31.887782,-101.83006
4,FR UNOBTANIUM 1411 D 9HB,WCMP B,31.871256,-101.822479,31.905549,-101.83283,31.888402,-101.827655


In [117]:
well_dist_df = pd.DataFrame(columns=['Distance','Well Name'])
well_dist_df['Distance'].astype(float)
for index,rows in df.iterrows():
    lat1 = dist_df.at[index,'Mid Point Lat']
    lon1 = dist_df.at[index,'Mid Point Long']
    lz = dist_df.at[index,'Landing Zone']
    print(index,lz)
    filt_df = dist_df[dist_df['Landing Zone'] == lz]
    filt_df.reset_index(inplace=True)
    print(filt_df)
    
    well_dist_df['Well Name'] = filt_df['Well Name'].copy()
    well_dist_df['Distance'] = calc_dist(lat1,lon1,filt_df['Mid Point Lat'],filt_df['Mid Point Long'])
    print(well_dist_df)
    
    well_dist_df.sort_values(by='Distance',inplace=True,ignore_index=True)
    for i,dist in well_dist_df.iterrows():
        temp_dist = well_dist_df.at[i,'Distance']
        if temp_dist>250:
             left_dist = temp_dist
             right_dist = well_dist_df.at[i+1,'Distance']
             break
    print(well_dist_df)
    print(left_dist)
    print(right_dist)
    final_dist = (left_dist + right_dist)/2
    print(final_dist)

0 WCMP B
   index                  Well Name Landing Zone   Surf Lat   Surf Long  \
0      0     FR UNOBTANIUM 1411 2BB       WCMP B  31.870560 -101.825582   
1      1  FR UNOBTANIUM 1411 CC 6HB       WCMP B  31.870806 -101.824485   
2      2  FR UNOBTANIUM 1411 F 12HB       WCMP B  31.871761 -101.820501   
3      3   FR UNOBTANIUM 1411 C 5BB       WCMP B  31.870785 -101.824579   
4      4   FR UNOBTANIUM 1411 D 9HB       WCMP B  31.871256 -101.822479   
5      6   FR UNOBTANIUM 1411 D 8BB       WCMP B  31.871235 -101.822573   
6      7    FR UNOBTANIUM 1411 31BR       WCMP B  31.870518 -101.825769   
7      8  FR UNOBTANIUM 1411 E 11BB       WCMP B  31.871738 -101.820593   

      BH Lat     BH Long  Mid Point Lat  Mid Point Long  
0  31.904397 -101.837609      31.887478     -101.831595  
1  31.905170 -101.834592      31.887988     -101.829538  
2  31.905840 -101.830409      31.888800     -101.825455  
3  31.904779 -101.835542      31.887782     -101.830060  
4  31.905549 -101.832830 

In [66]:
def calc_dist(lon1, lat1,lon2,lat2):
    
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    newlon = lon2 - lon1
    newlat = lat2 - lat1

    haver_formula = np.sin(newlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(newlon/2.0)**2

    dist = 2 * np.arcsin(np.sqrt(haver_formula ))
    ft = 3958 * 5280 * dist # 6367 for distance in KM for miles use 3958
    return ft