In [7]:
import pandas as pd
import numpy as np

# STEP 1: Load the cleaned CSV data file (confirm it's in the same folder)
wells_df = pd.read_csv("spe_africa_dseats_datathon_2025_wells_dataset.csv")
reservoir_df = pd.read_csv("reservoir_info.csv")

# Clean numeric reservoir data for consistency and computations
reservoir_numeric_cols = [
    'Initial Reservoir Pressure (PSI)',
    'Bubble Point Pressure (PSI)',
    'Current Average Reservoir Pressure (PSI)',
    'Solution Gas-Oil-Ratio (SCF/BBL)',
    'Formation Volume Factor (RB/STB)'
]

for col in reservoir_numeric_cols:
    reservoir_df[col] = (
        reservoir_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', "")
        .replace("NA", np.nan)
        .astype(float)
    )

# Clean and convert well-level numeric data
wells_numeric_cols = [
    'BOTTOMHOLE_FLOWING_PRESSURE (PSI)',
    'DOWNHOLE_TEMPERATURE (deg F)',
    'ANNULUS_PRESS (PSI)',
    'CHOKE_SIZE (%)',
    'WELL_HEAD_PRESSURE (PSI)',
    'WELL_HEAD_TEMPERATURE (deg F)',
    'CUMULATIVE_OIL_PROD (STB)',
    'CUMULATIVE_FORMATION_GAS_PROD (MSCF)',
    'CUMULATIVE_TOTAL_GAS_PROD (MSCF)',
    'CUMULATIVE_WATER_PROD (BBL)',
    'ON_STREAM_HRS'
]

for col in wells_numeric_cols:
    wells_df[col] = (
        wells_df[col]
        .astype(str)
        .str.replace(",", "")
        .str.replace('"', "")
        .replace("NA", np.nan)
        .astype(float)
    )

# Ensure correct parsing of production dates (format = dd-mmm-yy)
wells_df["PROD_DATE"] = pd.to_datetime(wells_df["PROD_DATE"], format="%d-%b-%y", errors="coerce")

# Isolate the first 20 wells for focused analysis
well_names_20 = wells_df["WELL_NAME"].dropna().unique()[:20]
wells_df_top20 = wells_df[wells_df["WELL_NAME"].isin(well_names_20)].copy()

# View status
print("Step 1 completed successfully. Data cleaned and ready for analysis.")
print(f"Total unique wells loaded: {len(well_names_20)}")
print("Here’s a preview of the parsed and cleaned data:")
print(wells_df_top20.head(3))

Step 1 completed successfully. Data cleaned and ready for analysis.
Total unique wells loaded: 20
Here’s a preview of the parsed and cleaned data:
   PROD_DATE WELL_NAME  ON_STREAM_HRS  BOTTOMHOLE_FLOWING_PRESSURE (PSI)  \
0 2014-02-15   Well_#1            0.0                             4050.0   
1 2014-02-16   Well_#1            0.0                             3961.0   
2 2014-02-17   Well_#1            0.0                             3961.0   

   DOWNHOLE_TEMPERATURE (deg F)  ANNULUS_PRESS (PSI)  CHOKE_SIZE (%)  \
0                       189.866                  0.0         1.17951   
1                       189.945                  0.0         2.99440   
2                       190.004                  0.0         1.90349   

   WELL_HEAD_PRESSURE (PSI)  WELL_HEAD_TEMPERATURE (deg F)  \
0                   482.460                         50.864   
1                   328.601                         47.668   
2                   387.218                         48.962   

   CUMULAT

In [8]:
# === Step 3: Classify well type using annular pressure logic ===
well_type_classification = []

for well in well_names_20:
    well_data = wells_df_top20[wells_df_top20["WELL_NAME"] == well]
    
    annular_pressure = well_data["ANNULUS_PRESS (PSI)"]
    average_annulus = annular_pressure.mean()

    if pd.notnull(average_annulus):
        if average_annulus > 0:
            well_type = "GL"  # Gas Lift
        else:
            well_type = "NF"  # Natural Flow
    else:
        well_type = "Unknown"
    
    well_type_classification.append({
        "WELL_NAME": well,
        "Well Type": well_type
    })

well_type_df = pd.DataFrame(well_type_classification)

# Display Results
print("Step 3 complete — well type classified for top 20 wells based on annular pressure behavior.")
print(well_type_df)

Step 3 complete — well type classified for top 20 wells based on annular pressure behavior.
   WELL_NAME Well Type
0    Well_#1        GL
1    Well_#2        NF
2    Well_#3        GL
3    Well_#4        GL
4    Well_#5        GL
5    Well_#6        GL
6    Well_#7        GL
7    Well_#8        GL
8    Well_#9        GL
9   Well_#10        GL
10  Well_#11        GL
11  Well_#12        GL
12  Well_#13        GL
13  Well_#14        GL
14  Well_#15        GL
15  Well_#16        GL
16  Well_#17        GL
17  Well_#18        NF
18  Well_#19        NF
19  Well_#20        GL
