In [None]:
Code 2: Compute Reservoir Type and show combined results for wells 1 to 20

In [None]:
#Correct Code 2: Reservoir Type Classification

In [1]:
import pandas as pd
import numpy as np

# === Load datasets ===
wells_df = pd.read_csv("spe_africa_dseats_datathon_2025_wells_dataset.csv")
reservoir_df = pd.read_csv("reservoir_info.csv")

# === Clean numeric columns in reservoir_df ===
num_cols_reservoir = [
    'Initial Reservoir Pressure (PSI)',
    'Bubble Point Pressure (PSI)',
    'Current Average Reservoir Pressure (PSI)',
    'Solution Gas-Oil-Ratio (SCF/BBL)',
    'Formation Volume Factor (RB/STB)'
]

for col in num_cols_reservoir:
    reservoir_df[col] = (
        reservoir_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', '')
        .replace("NA", np.nan)
        .astype(float)
    )

# === Clean numeric columns in wells_df ===
num_cols_wells = [
    'BOTTOMHOLE_FLOWING_PRESSURE (PSI)',
    'DOWNHOLE_TEMPERATURE (deg F)',
    'ANNULUS_PRESS (PSI)',
    'CHOKE_SIZE (%)',
    'WELL_HEAD_PRESSURE (PSI)',
    'WELL_HEAD_TEMPERATURE (deg F)',
    'CUMULATIVE_OIL_PROD (STB)',
    'CUMULATIVE_FORMATION_GAS_PROD (MSCF)',
    'CUMULATIVE_TOTAL_GAS_PROD (MSCF)',
    'CUMULATIVE_WATER_PROD (BBL)',
    'ON_STREAM_HRS'
]

for col in num_cols_wells:
    wells_df[col] = (
        wells_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', '')
        .replace("NA", np.nan)
        .astype(float)
    )

# === Convert PROD_DATE to datetime (safe parsing) ===
wells_df["PROD_DATE"] = pd.to_datetime(wells_df["PROD_DATE"], errors='coerce')

# === Select first 20 unique wells ===
well_names_20 = wells_df["WELL_NAME"].unique()[:20]

# === Step 2: Determine Reservoir Type for each well ===

reservoir_type_results = []

for well in well_names_20:
    well_data = wells_df[wells_df["WELL_NAME"] == well]
    max_bhp = well_data['BOTTOMHOLE_FLOWING_PRESSURE (PSI)'].max()

    assigned_reservoir = None
    res_type = None

    # Find reservoirs where 0 <= reservoir_pressure - max_bhp <= 200 psi
    candidates = []
    for _, res_row in reservoir_df.iterrows():
        res_pressure = res_row['Current Average Reservoir Pressure (PSI)']
        if pd.notnull(max_bhp) and pd.notnull(res_pressure):
            delta_p = res_pressure - max_bhp
            if 0 <= delta_p <= 200:
                candidates.append((res_row['Reservoir Name'], delta_p, res_row))

    if candidates:
        # Select reservoir with smallest positive delta_p
        selected_res = min(candidates, key=lambda x: x[1])[2]
        assigned_reservoir = selected_res['Reservoir Name']

        # Determine Saturated or Undersaturated
        p_initial = selected_res['Initial Reservoir Pressure (PSI)']
        p_bubble = selected_res['Bubble Point Pressure (PSI)']

        if pd.notnull(p_initial) and pd.notnull(p_bubble):
            if p_initial <= p_bubble:
                res_type = "Saturated"
            else:
                res_type = "Undersaturated"
        else:
            res_type = "Unknown"
    else:
        assigned_reservoir = None
        res_type = None

    reservoir_type_results.append({
        "WELL_NAME": well,
        "Reservoir Name": assigned_reservoir,
        "Reservoir Type": res_type
    })

res_type_df = pd.DataFrame(reservoir_type_results)

print("Step 2: Reservoir Type for Wells 1 to 20:")
print(res_type_df)

  wells_df["PROD_DATE"] = pd.to_datetime(wells_df["PROD_DATE"], errors='coerce')


Step 2: Reservoir Type for Wells 1 to 20:
   WELL_NAME Reservoir Name  Reservoir Type
0    Well_#1           JANI  Undersaturated
1    Well_#2           KEMA  Undersaturated
2    Well_#3           MAKO       Saturated
3    Well_#4           DEPU       Saturated
4    Well_#5           MAKO       Saturated
5    Well_#6           KEMA  Undersaturated
6    Well_#7           KEMA  Undersaturated
7    Well_#8           ACHI  Undersaturated
8    Well_#9           DEPU       Saturated
9   Well_#10           JANI  Undersaturated
10  Well_#11           ACHI  Undersaturated
11  Well_#12           ACHI  Undersaturated
12  Well_#13           DEPU       Saturated
13  Well_#14           MAKO       Saturated
14  Well_#15           JANI  Undersaturated
15  Well_#16           KEMA  Undersaturated
16  Well_#17           DEPU       Saturated
17  Well_#18           ACHI  Undersaturated
18  Well_#19           JANI  Undersaturated
19  Well_#20           MAKO       Saturated


In [9]:
import pandas as pd
import numpy as np

# === Load datasets ===
wells_df = pd.read_csv("spe_africa_dseats_datathon_2025_wells_dataset.csv")
reservoir_df = pd.read_csv("reservoir_info.csv")

# === Clean numeric columns in reservoir_df ===
num_cols_reservoir = [
    'Initial Reservoir Pressure (PSI)',
    'Bubble Point Pressure (PSI)',
    'Current Average Reservoir Pressure (PSI)',
    'Solution Gas-Oil-Ratio (SCF/BBL)',
    'Formation Volume Factor (RB/STB)'
]

# Clean and convert numerical columns in reservoir data
for col in num_cols_reservoir:
    reservoir_df[col] = (
        reservoir_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', '')
        .replace("NA", np.nan)
        .astype(float)
    )

# === Clean numeric columns in wells_df ===
num_cols_wells = [
    'BOTTOMHOLE_FLOWING_PRESSURE (PSI)',
    'DOWNHOLE_TEMPERATURE (deg F)',
    'ANNULUS_PRESS (PSI)',
    'CHOKE_SIZE (%)',
    'WELL_HEAD_PRESSURE (PSI)',
    'WELL_HEAD_TEMPERATURE (deg F)',
    'CUMULATIVE_OIL_PROD (STB)',
    'CUMULATIVE_FORMATION_GAS_PROD (MSCF)',
    'CUMULATIVE_TOTAL_GAS_PROD (MSCF)',
    'CUMULATIVE_WATER_PROD (BBL)',
    'ON_STREAM_HRS'
]

# Clean and convert numerical columns in wells data
for col in num_cols_wells:
    wells_df[col] = (
        wells_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', '')
        .replace("NA", np.nan)
        .astype(float)
    )

# === Convert PROD_DATE to datetime (safe parsing) ===
wells_df["PROD_DATE"] = pd.to_datetime(wells_df["PROD_DATE"], errors='coerce')

# === Select first 20 unique wells ===
well_names_20 = wells_df["WELL_NAME"].unique()[:20]

# === Step 2: Determine Reservoir Type for each well ===

reservoir_type_results = []

# Iterate over the first 20 wells
for well in well_names_20:
    well_data = wells_df[wells_df["WELL_NAME"] == well]
    max_bhp = well_data['BOTTOMHOLE_FLOWING_PRESSURE (PSI)'].max()

    assigned_reservoir = None
    res_type = None

    # Find reservoirs where 0 <= reservoir_pressure - max_bhp <= 200 psi
    candidates = []
    for _, res_row in reservoir_df.iterrows():
        res_pressure = res_row['Current Average Reservoir Pressure (PSI)']
        if pd.notnull(max_bhp) and pd.notnull(res_pressure):
            delta_p = res_pressure - max_bhp
            if 0 <= delta_p <= 200:
                candidates.append((res_row['Reservoir Name'], delta_p, res_row))

    if candidates:
        # Select reservoir with smallest positive delta_p
        selected_res = min(candidates, key=lambda x: x[1])[2]
        assigned_reservoir = selected_res['Reservoir Name']

        # Determine Saturated or Undersaturated based on initial and bubble point pressures
        p_initial = selected_res['Initial Reservoir Pressure (PSI)']
        p_bubble = selected_res['Bubble Point Pressure (PSI)']

        if pd.notnull(p_initial) and pd.notnull(p_bubble):
            if p_initial <= p_bubble:
                res_type = "Saturated"
            else:
                res_type = "Undersaturated"
        else:
            res_type = "Unknown"
    else:
        assigned_reservoir = None
        res_type = None

    # Store results for the well
    reservoir_type_results.append({
        "WELL_NAME": well,
        "Reservoir Name": assigned_reservoir,
        "Reservoir Type": res_type
    })

# Create DataFrame with results
res_type_df = pd.DataFrame(reservoir_type_results)

# Display the results for the first 20 wells
print("Step 2: Reservoir Type for Wells 1 to 20:")
print(res_type_df)

  wells_df["PROD_DATE"] = pd.to_datetime(wells_df["PROD_DATE"], errors='coerce')


Step 2: Reservoir Type for Wells 1 to 20:
   WELL_NAME Reservoir Name  Reservoir Type
0    Well_#1           JANI  Undersaturated
1    Well_#2           KEMA  Undersaturated
2    Well_#3           MAKO       Saturated
3    Well_#4           DEPU       Saturated
4    Well_#5           MAKO       Saturated
5    Well_#6           KEMA  Undersaturated
6    Well_#7           KEMA  Undersaturated
7    Well_#8           ACHI  Undersaturated
8    Well_#9           DEPU       Saturated
9   Well_#10           JANI  Undersaturated
10  Well_#11           ACHI  Undersaturated
11  Well_#12           ACHI  Undersaturated
12  Well_#13           DEPU       Saturated
13  Well_#14           MAKO       Saturated
14  Well_#15           JANI  Undersaturated
15  Well_#16           KEMA  Undersaturated
16  Well_#17           DEPU       Saturated
17  Well_#18           ACHI  Undersaturated
18  Well_#19           JANI  Undersaturated
19  Well_#20           MAKO       Saturated
