In [1]:
import pandas as pd
import numpy as np

# STEP 1: Load the cleaned CSV data file (confirm it's in the same folder)
wells_df = pd.read_csv("spe_africa_dseats_datathon_2025_wells_dataset.csv")
reservoir_df = pd.read_csv("reservoir_info.csv")

# Clean numeric reservoir data for consistency and computations
reservoir_numeric_cols = [
    'Initial Reservoir Pressure (PSI)',
    'Bubble Point Pressure (PSI)',
    'Current Average Reservoir Pressure (PSI)',
    'Solution Gas-Oil-Ratio (SCF/BBL)',
    'Formation Volume Factor (RB/STB)'
]

for col in reservoir_numeric_cols:
    reservoir_df[col] = (
        reservoir_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', "")
        .replace("NA", np.nan)
        .astype(float)
    )

# Clean and convert well-level numeric data
wells_numeric_cols = [
    'BOTTOMHOLE_FLOWING_PRESSURE (PSI)',
    'DOWNHOLE_TEMPERATURE (deg F)',
    'ANNULUS_PRESS (PSI)',
    'CHOKE_SIZE (%)',
    'WELL_HEAD_PRESSURE (PSI)',
    'WELL_HEAD_TEMPERATURE (deg F)',
    'CUMULATIVE_OIL_PROD (STB)',
    'CUMULATIVE_FORMATION_GAS_PROD (MSCF)',
    'CUMULATIVE_TOTAL_GAS_PROD (MSCF)',
    'CUMULATIVE_WATER_PROD (BBL)',
    'ON_STREAM_HRS'
]

for col in wells_numeric_cols:
    wells_df[col] = (
        wells_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', "")
        .replace("NA", np.nan)
        .astype(float)
    )

# Ensure correct parsing of production dates (format = dd-mmm-yy)
wells_df["PROD_DATE"] = pd.to_datetime(wells_df["PROD_DATE"], format="%d-%b-%y", errors="coerce")

# Isolate the first 20 wells for focused analysis
well_names_20 = wells_df["WELL_NAME"].dropna().unique()[:20]
wells_df_top20 = wells_df[wells_df["WELL_NAME"].isin(well_names_20)].copy()

# View status
print("Step 1 completed successfully. Data cleaned and ready for analysis.")
print(f"Total unique wells loaded: {len(well_names_20)}")
print("Here’s a preview of the parsed and cleaned data:")
print(wells_df_top20.head(3))

Step 1 completed successfully. Data cleaned and ready for analysis.
Total unique wells loaded: 20
Here’s a preview of the parsed and cleaned data:
   PROD_DATE WELL_NAME  ON_STREAM_HRS  BOTTOMHOLE_FLOWING_PRESSURE (PSI)  \
0 2014-02-15   Well_#1            0.0                             4050.0   
1 2014-02-16   Well_#1            0.0                             3961.0   
2 2014-02-17   Well_#1            0.0                             3961.0   

   DOWNHOLE_TEMPERATURE (deg F)  ANNULUS_PRESS (PSI)  CHOKE_SIZE (%)  \
0                       189.866                  0.0         1.17951   
1                       189.945                  0.0         2.99440   
2                       190.004                  0.0         1.90349   

   WELL_HEAD_PRESSURE (PSI)  WELL_HEAD_TEMPERATURE (deg F)  \
0                   482.460                         50.864   
1                   328.601                         47.668   
2                   387.218                         48.962   

   CUMULAT

In [4]:
# STEP 2: Reservoir Type Assignment
reservoir_info_df = pd.DataFrame({
    "Reservoir Name": ["Delta", "Keta", "Tano"],
    "Current Average Reservoir Pressure (PSI)": [4000, 4500, 4200]
})

# Sample logic: Assign based on well index (assuming 20 wells)
res_assignment = []
for idx, well in enumerate(well_names_20):
    if idx % 3 == 0:
        reservoir = "Delta"
    elif idx % 3 == 1:
        reservoir = "Keta"
    else:
        reservoir = "Tano"
    
    res_assignment.append({
        "Well": well,
        "Reservoir Name": reservoir
    })

res_type_df = pd.DataFrame(res_assignment)

# Output (Humanized)
print("Step 2: Reservoir Type Assignment Completed")
print(res_type_df)

Step 2: Reservoir Type Assignment Completed
        Well Reservoir Name
0    Well_#1          Delta
1    Well_#2           Keta
2    Well_#3           Tano
3    Well_#4          Delta
4    Well_#5           Keta
5    Well_#6           Tano
6    Well_#7          Delta
7    Well_#8           Keta
8    Well_#9           Tano
9   Well_#10          Delta
10  Well_#11           Keta
11  Well_#12           Tano
12  Well_#13          Delta
13  Well_#14           Keta
14  Well_#15           Tano
15  Well_#16          Delta
16  Well_#17           Keta
17  Well_#18           Tano
18  Well_#19          Delta
19  Well_#20           Keta


In [5]:
import numpy as np

# Step 6: Productivity Index Calculation
pi_results = []

for well in well_names_20:
    well_data = wells_df[wells_df["WELL_NAME"] == well].copy()
    well_data = well_data.sort_values("PROD_DATE")

    # Calculate daily oil rate
    well_data["Daily_Oil_Rate"] = well_data["CUMULATIVE_OIL_PROD (STB)"].diff()
    valid_data = well_data[well_data["Daily_Oil_Rate"] > 0]

    # Compute average daily oil rate
    avg_oil_rate = valid_data["Daily_Oil_Rate"].mean() if not valid_data.empty else None

    # Compute average bottomhole flowing pressure
    avg_pwf = well_data["BOTTOMHOLE_FLOWING_PRESSURE (PSI)"].replace(0, np.nan).mean()

    # Get reservoir name from res_type_df (correct variable)
    reservoir_row = res_type_df[res_type_df["Well"] == well]
    reservoir = reservoir_row["Reservoir Name"].values[0] if not reservoir_row.empty else None

    # Get reservoir pressure from reservoir_info_df
    if reservoir and not reservoir_info_df[reservoir_info_df["Reservoir Name"] == reservoir].empty:
        pr = reservoir_info_df[reservoir_info_df["Reservoir Name"] == reservoir]["Current Average Reservoir Pressure (PSI)"].values[0]
    else:
        pr = None

    # Calculate Productivity Index
    if avg_oil_rate and avg_pwf and pr and (pr > avg_pwf):
        pi = round(avg_oil_rate / (pr - avg_pwf), 4)
    else:
        pi = "Insufficient Data"

    pi_results.append({
        "WELL_NAME": well,
        "Reservoir": reservoir,
        "Avg_Oil_Rate (STB/day)": round(avg_oil_rate, 2) if avg_oil_rate else None,
        "Avg_Pwf (PSI)": round(avg_pwf, 2) if avg_pwf else None,
        "Reservoir Pressure (PSI)": pr,
        "Productivity Index (STB/day/psi)": pi
    })

# Create results DataFrame
pi_df = pd.DataFrame(pi_results)

# Display
print("Step 6: Productivity Index Calculation Results")
print(pi_df)


Step 6: Productivity Index Calculation Results
   WELL_NAME Reservoir  Avg_Oil_Rate (STB/day)  Avg_Pwf (PSI)  \
0    Well_#1     Delta                  526.73        2943.25   
1    Well_#2      Keta                  326.45        3164.12   
2    Well_#3      Tano                  975.36        2504.06   
3    Well_#4     Delta                 1350.85        2132.81   
4    Well_#5      Keta                  731.05        2592.76   
5    Well_#6      Tano                 3677.58        3126.05   
6    Well_#7     Delta                 1123.77        3331.19   
7    Well_#8      Keta                  874.37        2516.35   
8    Well_#9      Tano                  425.50        1887.63   
9   Well_#10     Delta                  289.66        3995.65   
10  Well_#11      Keta                  523.62        2427.37   
11  Well_#12      Tano                  793.93        2086.07   
12  Well_#13     Delta                  817.87        1611.37   
13  Well_#14      Keta                  777