# Optimizing Oil Well Development: Predicting Reservoir Volumes and Maximizing Profitability

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.utils import resample

In [2]:
geo_data_0 = pd.read_csv('geo_data_0.csv')
geo_data_1 = pd.read_csv('geo_data_1.csv')
geo_data_2 = pd.read_csv('geo_data_2.csv')

In [47]:
geo_data_0.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 5 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   id       100000 non-null  object 
 1   f0       100000 non-null  float64
 2   f1       100000 non-null  float64
 3   f2       100000 non-null  float64
 4   product  100000 non-null  float64
dtypes: float64(4), object(1)
memory usage: 3.8+ MB


In [49]:
geo_data_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 5 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   id       100000 non-null  object 
 1   f0       100000 non-null  float64
 2   f1       100000 non-null  float64
 3   f2       100000 non-null  float64
 4   product  100000 non-null  float64
dtypes: float64(4), object(1)
memory usage: 3.8+ MB


In [8]:
geo_data_0.head(10)

Unnamed: 0,id,f0,f1,f2,product
0,txEyH,0.705745,-0.497823,1.22117,105.280062
1,2acmU,1.334711,-0.340164,4.36508,73.03775
2,409Wp,1.022732,0.15199,1.419926,85.265647
3,iJLyR,-0.032172,0.139033,2.978566,168.620776
4,Xdl7t,1.988431,0.155413,4.751769,154.036647
5,wX4Hy,0.96957,0.489775,-0.735383,64.741541
6,tL6pL,0.645075,0.530656,1.780266,49.055285
7,BYPU6,-0.400648,0.808337,-5.62467,72.943292
8,j9Oui,0.643105,-0.551583,2.372141,113.35616
9,OLuZU,2.173381,0.563698,9.441852,127.910945


In [9]:
geo_data_1.head(10)

Unnamed: 0,id,f0,f1,f2,product
0,kBEdx,-15.001348,-8.276,-0.005876,3.179103
1,62mP7,14.272088,-3.475083,0.999183,26.953261
2,vyE1P,6.263187,-5.948386,5.00116,134.766305
3,KcrkZ,-13.081196,-11.506057,4.999415,137.945408
4,AHL4O,12.702195,-8.147433,5.004363,134.766305
5,HHckp,-3.32759,-2.205276,3.003647,84.038886
6,h5Ujo,-11.142655,-10.133399,4.002382,110.992147
7,muH9x,4.234715,-0.001354,2.004588,53.906522
8,YiRkx,13.355129,-0.332068,4.998647,134.766305
9,jG6Gi,1.069227,-11.025667,4.997844,137.945408


In [10]:
geo_data_2.head(10)

Unnamed: 0,id,f0,f1,f2,product
0,fwXo0,-1.146987,0.963328,-0.828965,27.758673
1,WJtFt,0.262778,0.269839,-2.530187,56.069697
2,ovLUW,0.194587,0.289035,-5.586433,62.87191
3,q6cA6,2.23606,-0.55376,0.930038,114.572842
4,WPMUX,-0.515993,1.716266,5.899011,149.600746
5,LzZXx,-0.758092,0.710691,2.585887,90.222465
6,WBHRv,-0.574891,0.317727,1.773745,45.641478
7,XO8fn,-1.906649,-2.45835,-0.177097,72.48064
8,ybmQ5,1.776292,-0.279356,3.004156,106.616832
9,OilcN,-1.214452,-0.439314,5.922514,52.954532


## Model Training and Testing

In [12]:
def train_test_model(data, region):
    x = data[['f0', 'f1', 'f2']]
    y = data['product']
    
    x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.25, random_state=42)

    model = LinearRegression()
    model.fit(x_train, y_train)
    
    predictions = model.predict(x_valid)
    
    mse = mean_squared_error(y_valid, predictions)
    
    print(f"Region {region} - Mean Squared Error: {mse}")
    
    return predictions, y_valid


In [13]:
predictions_0, true_values_0 = train_test_model(geo_data_0, 0)
predictions_1, true_values_1 = train_test_model(geo_data_1, 1)
predictions_2, true_values_2 = train_test_model(geo_data_2, 2)

Region 0 - Mean Squared Error: 1425.5608700093812
Region 1 - Mean Squared Error: 0.7925986566392037
Region 2 - Mean Squared Error: 1611.6910636385903


## Profit Calculation Preparation

In [34]:
investment_per_well = 100_000_000
target_barrels_per_well = 111.1
number_of_wells = 200

In [37]:
revenue_per_barrel = investment_per_well / (number_of_wells * target_barrels_per_well)

print("Revenue per barrel:", revenue_per_barrel)


Revenue per barrel: 4500.450045004501


Volume breakeven calculation 

In [36]:
required_volume = investment_per_well / revenue_per_barrel

calculate average volume

In [38]:
average_volume_0 = predictions_0.mean()
average_volume_1 = predictions_1.mean()
average_volume_2 = predictions_2.mean()

print(f"\nRequired volume for breakeven: {required_volume}\n")
print(f"Average volume for Region 0: {average_volume_0}")
print(f"Average volume for Region 1: {average_volume_1}")
print(f"Average volume for Region 2: {average_volume_2}")


Required volume for breakeven: 22220.0

Average volume for Region 0: 92.39879990657768
Average volume for Region 1: 68.71287803913762
Average volume for Region 2: 94.77102387765939


## Profit Calculation

In [45]:
def calculate_profit_for_selected_wells(predictions_0, predictions_1, predictions_2):
    # Convert predictions to Pandas Series
    predictions_0_series = pd.Series(predictions_0)
    predictions_1_series = pd.Series(predictions_1)
    predictions_2_series = pd.Series(predictions_2)

    # Select top 200 wells for each region based on predictions
    selected_wells_0 = predictions_0_series.sort_values(ascending=False).head(200)
    selected_wells_1 = predictions_1_series.sort_values(ascending=False).head(200)
    selected_wells_2 = predictions_2_series.sort_values(ascending=False).head(200)

    # Combine selected wells from all regions
    selected_wells_combined = pd.concat([selected_wells_0, selected_wells_1, selected_wells_2])

    # Calculate total volume for the selected wells
    total_volume = selected_wells_combined.sum()

    # Calculate revenue and profit
    revenue = total_volume * revenue_per_barrel
    profit = revenue - investment_per_well * number_of_wells

    # Print summary
    print(f"\nTotal Revenue from Selected Wells: {revenue:.2f}")
    print(f"Total Profit from Selected Wells: {profit:.2f}")

    return profit

# Call the function with predictions from each region
profit_from_selected_wells = calculate_profit_for_selected_wells(predictions_0, predictions_1, predictions_2)



Total Revenue from Selected Wells: 397655546.64
Total Profit from Selected Wells: -19602344453.36


In [51]:
def calculate_profit_for_selected_wells(predictions_0, predictions_1, predictions_2):
    # Convert predictions to Pandas Series
    predictions_0_series = pd.Series(predictions_0)
    predictions_1_series = pd.Series(predictions_1)
    predictions_2_series = pd.Series(predictions_2)

    # Select top 200 wells for each region based on predictions
    selected_wells_0 = predictions_0_series.sort_values(ascending=False).head(200)
    selected_wells_1 = predictions_1_series.sort_values(ascending=False).head(200)
    selected_wells_2 = predictions_2_series.sort_values(ascending=False).head(200)

    # Combine selected wells from all regions
    selected_wells_combined = pd.concat([selected_wells_0, selected_wells_1, selected_wells_2])

    # Calculate total volume for the selected wells
    total_volume = selected_wells_combined.sum()

    # Calculate revenue and profit
    revenue = total_volume * revenue_per_barrel
    profit = revenue - investment_per_well * number_of_wells
    
    # Print summary
    print(f"\nTotal Number of Wells Used in Region 0: {selected_wells_0.shape[0]}")
    print(f"Total Number of Wells Used in Region 1: {selected_wells_1.shape[0]}")
    print(f"Total Number of Wells Used in Region 2: {selected_wells_2.shape[0]}")
    print(f"Total Number of Wells Used in All Regions: {selected_wells_combined.shape[0]}")
    print(f"\nTotal Revenue from Selected Wells: {revenue:.2f}")
    print(f"Total Profit from Selected Wells: {profit:.2f}")

    return profit

# Call the function with predictions from each region
profit_from_selected_wells = calculate_profit_for_selected_wells(predictions_0, predictions_1, predictions_2)



Total Number of Wells Used in Region 0: 200
Total Number of Wells Used in Region 1: 200
Total Number of Wells Used in Region 2: 200
Total Number of Wells Used in All Regions: 600

Total Revenue from Selected Wells: 397655546.64
Total Profit from Selected Wells: -19602344453.36


In [54]:
import pandas as pd

def calculate_profit_for_selected_wells(predictions_0, predictions_1, predictions_2):
    # Convert predictions to Pandas Series
    predictions_0_series = pd.Series(predictions_0)
    predictions_1_series = pd.Series(predictions_1)
    predictions_2_series = pd.Series(predictions_2)

    # Select top 200 wells for each region based on predictions
    selected_wells_0 = predictions_0_series.sort_values(ascending=False).head(200)
    selected_wells_1 = predictions_1_series.sort_values(ascending=False).head(200)
    selected_wells_2 = predictions_2_series.sort_values(ascending=False).head(200)

    # Calculate total volume for the selected wells in each region
    total_volume_0 = selected_wells_0.sum()
    total_volume_1 = selected_wells_1.sum()
    total_volume_2 = selected_wells_2.sum()

    # Calculate revenue and profit for each region
    revenue_0 = total_volume_0 * revenue_per_barrel
    profit_0 = revenue_0 - (investment_per_well * number_of_wells)

    revenue_1 = total_volume_1 * revenue_per_barrel
    profit_1 = revenue_1 - (investment_per_well * number_of_wells)

    revenue_2 = total_volume_2 * revenue_per_barrel
    profit_2 = revenue_2 - (investment_per_well * number_of_wells)
    
    # Print summaries for each region
    print("\nRegion 0:")
    print(f"Total Volume: {total_volume_0:.2f}")
    print(f"Total Revenue from Selected Wells: {revenue_0:.2f}")
    print(f"Total Profit from Selected Wells: {profit_0:.2f}")

    print("\nRegion 1:")
    print(f"Total Volume: {total_volume_1:.2f}")
    print(f"Total Revenue from Selected Wells: {revenue_1:.2f}")
    print(f"Total Profit from Selected Wells: {profit_1:.2f}")

    print("\nRegion 2:")
    print(f"Total Volume: {total_volume_2:.2f}")
    print(f"Total Revenue from Selected Wells: {revenue_2:.2f}")
    print(f"Total Profit from Selected Wells: {profit_2:.2f}")

    return profit_0, profit_1, profit_2

# Call the function with predictions from each region
profit_from_selected_wells_0, profit_from_selected_wells_1, profit_from_selected_wells_2 = calculate_profit_for_selected_wells(predictions_0, predictions_1, predictions_2)



Region 0:
Total Volume: 30881.46
Total Revenue from Selected Wells: 138980482.84
Total Profit from Selected Wells: -19861019517.16

Region 1:
Total Volume: 27748.75
Total Revenue from Selected Wells: 124881869.34
Total Profit from Selected Wells: -19875118130.66

Region 2:
Total Volume: 29728.85
Total Revenue from Selected Wells: 133793194.46
Total Profit from Selected Wells: -19866206805.54


In [39]:
# Step 4: Profit Calculation Function
def calculate_profit(predictions, region):
    # Convert predictions to a Pandas DataFrame
    predictions_df = pd.DataFrame(predictions, columns=['product'])
    
    # Sort values and select top wells
    top_wells = predictions_df.sort_values(by='product', ascending=False).head(number_of_wells)
    selected_wells = top_wells.index

    total_volume = top_wells.sum()['product']
    revenue = total_volume * revenue_per_barrel
    profit = revenue - investment_per_well * len(selected_wells)

    print(f"\nRegion {region} - Total Revenue: {revenue:.2f}")
    print(f"Region {region} - Total Profit: {profit:.2f}")

calculate_profit(predictions_0, 0)
calculate_profit(predictions_1, 1)
calculate_profit(predictions_2, 2)



Region 0 - Total Revenue: 138980482.84
Region 0 - Total Profit: -19861019517.16

Region 1 - Total Revenue: 124881869.34
Region 1 - Total Profit: -19875118130.66

Region 2 - Total Revenue: 133793194.46
Region 2 - Total Profit: -19866206805.54
