In [17]:
import pandas as pd
import numpy as np

# STEP 1: Load the cleaned CSV data file (confirm it's in the same folder)
wells_df = pd.read_csv("spe_africa_dseats_datathon_2025_wells_dataset.csv")
reservoir_df = pd.read_csv("reservoir_info.csv")

# Clean numeric reservoir data for consistency and computations
reservoir_numeric_cols = [
    'Initial Reservoir Pressure (PSI)',
    'Bubble Point Pressure (PSI)',
    'Current Average Reservoir Pressure (PSI)',
    'Solution Gas-Oil-Ratio (SCF/BBL)',
    'Formation Volume Factor (RB/STB)'
]

for col in reservoir_numeric_cols:
    reservoir_df[col] = (
        reservoir_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', "")
        .replace("NA", np.nan)
        .astype(float)
    )

# Clean and convert well-level numeric data
wells_numeric_cols = [
    'BOTTOMHOLE_FLOWING_PRESSURE (PSI)',
    'DOWNHOLE_TEMPERATURE (deg F)',
    'ANNULUS_PRESS (PSI)',
    'CHOKE_SIZE (%)',
    'WELL_HEAD_PRESSURE (PSI)',
    'WELL_HEAD_TEMPERATURE (deg F)',
    'CUMULATIVE_OIL_PROD (STB)',
    'CUMULATIVE_FORMATION_GAS_PROD (MSCF)',
    'CUMULATIVE_TOTAL_GAS_PROD (MSCF)',
    'CUMULATIVE_WATER_PROD (BBL)',
    'ON_STREAM_HRS'
]

for col in wells_numeric_cols:
    wells_df[col] = (
        wells_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', "")
        .replace("NA", np.nan)
        .astype(float)
    )

# Ensure correct parsing of production dates (format = dd-mmm-yy)
wells_df["PROD_DATE"] = pd.to_datetime(wells_df["PROD_DATE"], format="%d-%b-%y", errors="coerce")

# Isolate the first 20 wells for focused analysis
well_names_20 = wells_df["WELL_NAME"].dropna().unique()[:20]
wells_df_top20 = wells_df[wells_df["WELL_NAME"].isin(well_names_20)].copy()

# View status
print("Step 1 completed successfully. Data cleaned and ready for analysis.")
print(f"Total unique wells loaded: {len(well_names_20)}")
print("Here’s a preview of the parsed and cleaned data:")
print(wells_df_top20.head(3))

Step 1 completed successfully. Data cleaned and ready for analysis.
Total unique wells loaded: 20
Here’s a preview of the parsed and cleaned data:
   PROD_DATE WELL_NAME  ON_STREAM_HRS  BOTTOMHOLE_FLOWING_PRESSURE (PSI)  \
0 2014-02-15   Well_#1            0.0                             4050.0   
1 2014-02-16   Well_#1            0.0                             3961.0   
2 2014-02-17   Well_#1            0.0                             3961.0   

   DOWNHOLE_TEMPERATURE (deg F)  ANNULUS_PRESS (PSI)  CHOKE_SIZE (%)  \
0                       189.866                  0.0         1.17951   
1                       189.945                  0.0         2.99440   
2                       190.004                  0.0         1.90349   

   WELL_HEAD_PRESSURE (PSI)  WELL_HEAD_TEMPERATURE (deg F)  \
0                   482.460                         50.864   
1                   328.601                         47.668   
2                   387.218                         48.962   

   CUMULAT

In [18]:
# STEP 2: Reservoir Type Assignment for each of the first 20 wells

# Our objective: Match each well to the most likely reservoir and determine if it's saturated or undersaturated
reservoir_type_results = []

for well in well_names_20:
    well_data = wells_df_top20[wells_df_top20["WELL_NAME"] == well]
    max_bhp = well_data['BOTTOMHOLE_FLOWING_PRESSURE (PSI)'].max()

    assigned_reservoir = None
    res_type = None

    # Match wells to candidate reservoirs based on closeness of flowing pressure to current reservoir pressure
    candidates = []
    for _, res_row in reservoir_df.iterrows():
        res_pressure = res_row['Current Average Reservoir Pressure (PSI)']
        if pd.notnull(max_bhp) and pd.notnull(res_pressure):
            delta_p = res_pressure - max_bhp
            if 0 <= delta_p <= 200:  # Assuming communication if BHP is within 200 psi of P_res
                candidates.append((res_row['Reservoir Name'], delta_p, res_row))

    if candidates:
        # Choose the closest match (minimum ΔP)
        selected_res = min(candidates, key=lambda x: x[1])[2]
        assigned_reservoir = selected_res['Reservoir Name']

        p_initial = selected_res['Initial Reservoir Pressure (PSI)']
        p_bubble = selected_res['Bubble Point Pressure (PSI)']

        # Reservoir type logic (based on fluid phase conditions)
        if pd.notnull(p_initial) and pd.notnull(p_bubble):
            if p_initial <= p_bubble:
                res_type = "Saturated"
            else:
                res_type = "Undersaturated"
        else:
            res_type = "Unknown"
    else:
        assigned_reservoir = None
        res_type = "Unknown"

    reservoir_type_results.append({
        "WELL_NAME": well,
        "Reservoir Name": assigned_reservoir,
        "Reservoir Type": res_type
    })

# Combine all results into a single dataframe
res_type_df = pd.DataFrame(reservoir_type_results)

# Output for review
print("Step 2 complete — each well has been linked to its most likely reservoir.")
print("We also classified the fluid system as Saturated or Undersaturated based on P_i and P_bubble.")
print(res_type_df)

Step 2 complete — each well has been linked to its most likely reservoir.
We also classified the fluid system as Saturated or Undersaturated based on P_i and P_bubble.
   WELL_NAME Reservoir Name  Reservoir Type
0    Well_#1           JANI  Undersaturated
1    Well_#2           KEMA  Undersaturated
2    Well_#3           MAKO       Saturated
3    Well_#4           DEPU       Saturated
4    Well_#5           MAKO       Saturated
5    Well_#6           KEMA  Undersaturated
6    Well_#7           KEMA  Undersaturated
7    Well_#8           ACHI  Undersaturated
8    Well_#9           DEPU       Saturated
9   Well_#10           JANI  Undersaturated
10  Well_#11           ACHI  Undersaturated
11  Well_#12           ACHI  Undersaturated
12  Well_#13           DEPU       Saturated
13  Well_#14           MAKO       Saturated
14  Well_#15           JANI  Undersaturated
15  Well_#16           KEMA  Undersaturated
16  Well_#17           DEPU       Saturated
17  Well_#18           ACHI  Undersatura