In [10]:
import pandas as pd
import numpy as np

# --- Load datasets ---
wells_df = pd.read_csv('spe_africa_dseats_datathon_2025_wells_dataset.csv')
reservoir_df = pd.read_csv('reservoir_info.csv')

# --- Clean pressure columns ---
wells_df['BOTTOMHOLE_FLOWING_PRESSURE (PSI)'] = (
    wells_df['BOTTOMHOLE_FLOWING_PRESSURE (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

reservoir_df['Current Average Reservoir Pressure (PSI)'] = (
    reservoir_df['Current Average Reservoir Pressure (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

# --- Code 1: Assign Reservoir to each well based on max BHP (±500 PSI match) ---
max_bhp = wells_df.groupby('WELL_NAME')['BOTTOMHOLE_FLOWING_PRESSURE (PSI)'].max().reset_index()

assigned_reservoirs = []
for _, row in max_bhp.iterrows():
    well = row['WELL_NAME']
    bhp = row['BOTTOMHOLE_FLOWING_PRESSURE (PSI)']
    match = reservoir_df[np.abs(reservoir_df['Current Average Reservoir Pressure (PSI)'] - bhp) <= 500]
    if not match.empty:
        closest = match.loc[np.abs(match['Current Average Reservoir Pressure (PSI)'] - bhp).idxmin()]
        assigned_reservoirs.append((well, closest['Reservoir Name']))
    else:
        assigned_reservoirs.append((well, 'Unassigned'))

# Create reservoir assignment DataFrame
reservoir_assignment_df = pd.DataFrame(assigned_reservoirs, columns=['WELL_NAME', 'Reservoir Name'])

# --- Code 2: Determine Reservoir Type (Saturated / Undersaturated) ---
reservoir_df['Initial Reservoir Pressure (PSI)'] = (
    reservoir_df['Initial Reservoir Pressure (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

reservoir_df['Bubble Point Pressure (PSI)'] = (
    reservoir_df['Bubble Point Pressure (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

reservoir_df['Reservoir Type'] = reservoir_df.apply(
    lambda x: 'Saturated' if x['Initial Reservoir Pressure (PSI)'] <= x['Bubble Point Pressure (PSI)'] else 'Undersat',
    axis=1
)

# Merge reservoir type into assignments
reservoir_assignment_df = reservoir_assignment_df.merge(
    reservoir_df[['Reservoir Name', 'Reservoir Type']],
    on='Reservoir Name',
    how='left'
)

# --- Code 3: Classify Well Type (Naturally Flowing or Gas Lifted) ---
# Find the actual wellhead pressure column
pressure_column = None
for col in wells_df.columns:
    if 'WELL' in col.upper() and 'HEAD' in col.upper() and 'PRESSURE' in col.upper():
        pressure_column = col
        break

if not pressure_column:
    raise ValueError("Could not find a column for Wellhead Pressure!")

# Clean pressure column
wells_df[pressure_column] = wells_df[pressure_column].astype(str).str.replace(',', '', regex=False)
wells_df[pressure_column] = pd.to_numeric(wells_df[pressure_column], errors='coerce')

# Compute average WHT per well
avg_whp = wells_df.groupby('WELL_NAME')[pressure_column].mean().reset_index()

# Apply threshold logic
threshold = 500  # psi
avg_whp['Well Type'] = avg_whp[pressure_column].apply(lambda x: 'GL' if x > threshold else 'NF')

# --- Final Merge of All Results ---
final_classification_df = reservoir_assignment_df.merge(avg_whp[['WELL_NAME', 'Well Type']], on='WELL_NAME', how='left')

# Sort wells numerically by well number
final_classification_df['WELL_NUMBER'] = final_classification_df['WELL_NAME'].str.extract(r'(\d+)').astype(int)
final_classification_df = final_classification_df.sort_values('WELL_NUMBER').drop(columns='WELL_NUMBER').reset_index(drop=True)

# --- Display Final Output ---
print("\n✅ Final Classification: Reservoir Assignment + Reservoir Type + Well Type\n")
print(final_classification_df)


✅ Final Classification: Reservoir Assignment + Reservoir Type + Well Type

   WELL_NAME Reservoir Name Reservoir Type Well Type
0    Well_#1           JANI       Undersat        GL
1    Well_#2           KEMA       Undersat        GL
2    Well_#3           MAKO      Saturated        GL
3    Well_#4           DEPU      Saturated        GL
4    Well_#5           MAKO      Saturated        NF
5    Well_#6           KEMA       Undersat        NF
6    Well_#7           KEMA       Undersat        NF
7    Well_#8           ACHI       Undersat        NF
8    Well_#9           DEPU      Saturated        NF
9   Well_#10           JANI       Undersat        NF
10  Well_#11           DEPU      Saturated        NF
11  Well_#12           ACHI       Undersat        NF
12  Well_#13           DEPU      Saturated        NF
13  Well_#14           MAKO      Saturated        NF
14  Well_#15           KEMA       Undersat        NF
15  Well_#16           KEMA       Undersat        NF
16  Well_#17           

In [11]:
import pandas as pd
import numpy as np

# --- Load datasets ---
wells_df = pd.read_csv('spe_africa_dseats_datathon_2025_wells_dataset.csv')
reservoir_df = pd.read_csv('reservoir_info.csv')

# --- Clean pressure columns ---
wells_df['BOTTOMHOLE_FLOWING_PRESSURE (PSI)'] = (
    wells_df['BOTTOMHOLE_FLOWING_PRESSURE (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

reservoir_df['Current Average Reservoir Pressure (PSI)'] = (
    reservoir_df['Current Average Reservoir Pressure (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

# --- Code 1: Assign Reservoir to each well based on max BHP (±500 PSI match) ---
max_bhp = wells_df.groupby('WELL_NAME')['BOTTOMHOLE_FLOWING_PRESSURE (PSI)'].max().reset_index()

assigned_reservoirs = []
for _, row in max_bhp.iterrows():
    well = row['WELL_NAME']
    bhp = row['BOTTOMHOLE_FLOWING_PRESSURE (PSI)']
    match = reservoir_df[np.abs(reservoir_df['Current Average Reservoir Pressure (PSI)'] - bhp) <= 500]
    if not match.empty:
        closest = match.loc[np.abs(match['Current Average Reservoir Pressure (PSI)'] - bhp).idxmin()]
        assigned_reservoirs.append((well, closest['Reservoir Name']))
    else:
        assigned_reservoirs.append((well, 'Unassigned'))

# Create reservoir assignment DataFrame
reservoir_assignment_df = pd.DataFrame(assigned_reservoirs, columns=['WELL_NAME', 'Reservoir Name'])

# --- Code 2: Determine Reservoir Type (Saturated / Undersaturated) ---
reservoir_df['Initial Reservoir Pressure (PSI)'] = (
    reservoir_df['Initial Reservoir Pressure (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

reservoir_df['Bubble Point Pressure (PSI)'] = (
    reservoir_df['Bubble Point Pressure (PSI)']
    .astype(str).str.replace(',', '', regex=False)
    .astype(float)
)

reservoir_df['Reservoir Type'] = reservoir_df.apply(
    lambda x: 'Saturated' if x['Initial Reservoir Pressure (PSI)'] <= x['Bubble Point Pressure (PSI)'] else 'Undersat',
    axis=1
)

# Merge reservoir type into assignments
reservoir_assignment_df = reservoir_assignment_df.merge(
    reservoir_df[['Reservoir Name', 'Reservoir Type']],
    on='Reservoir Name',
    how='left'
)

# --- Code 3: Classify Well Type (Naturally Flowing or Gas Lifted) ---
# Find the actual wellhead pressure column
pressure_column = None
for col in wells_df.columns:
    if 'WELL' in col.upper() and 'HEAD' in col.upper() and 'PRESSURE' in col.upper():
        pressure_column = col
        break

if not pressure_column:
    raise ValueError("Could not find a column for Wellhead Pressure!")

# Clean pressure column
wells_df[pressure_column] = wells_df[pressure_column].astype(str).str.replace(',', '', regex=False)
wells_df[pressure_column] = pd.to_numeric(wells_df[pressure_column], errors='coerce')

# Compute average WHT per well
avg_whp = wells_df.groupby('WELL_NAME')[pressure_column].mean().reset_index()

# Apply threshold logic
threshold = 500  # psi
avg_whp['Well Type'] = avg_whp[pressure_column].apply(lambda x: 'GL' if x > threshold else 'NF')

# --- Final Merge of All Results ---
final_classification_df = reservoir_assignment_df.merge(avg_whp[['WELL_NAME', 'Well Type']], on='WELL_NAME', how='left')

# Sort wells numerically by well number
final_classification_df['WELL_NUMBER'] = final_classification_df['WELL_NAME'].str.extract(r'(\d+)').astype(int)
final_classification_df = final_classification_df.sort_values('WELL_NUMBER').drop(columns='WELL_NUMBER').reset_index(drop=True)

# --- Display Final Output ---
print("\n✅ Final Classification: Reservoir Assignment + Reservoir Type + Well Type\n")
print(final_classification_df)


✅ Final Classification: Reservoir Assignment + Reservoir Type + Well Type

   WELL_NAME Reservoir Name Reservoir Type Well Type
0    Well_#1           JANI       Undersat        GL
1    Well_#2           KEMA       Undersat        GL
2    Well_#3           MAKO      Saturated        GL
3    Well_#4           DEPU      Saturated        GL
4    Well_#5           MAKO      Saturated        NF
5    Well_#6           KEMA       Undersat        NF
6    Well_#7           KEMA       Undersat        NF
7    Well_#8           ACHI       Undersat        NF
8    Well_#9           DEPU      Saturated        NF
9   Well_#10           JANI       Undersat        NF
10  Well_#11           DEPU      Saturated        NF
11  Well_#12           ACHI       Undersat        NF
12  Well_#13           DEPU      Saturated        NF
13  Well_#14           MAKO      Saturated        NF
14  Well_#15           KEMA       Undersat        NF
15  Well_#16           KEMA       Undersat        NF
16  Well_#17           

In [12]:
import pandas as pd
import numpy as np

# --- Load dataset ---
wells_df = pd.read_csv('spe_africa_dseats_datathon_2025_wells_dataset.csv')

# --- Identify and clean the Wellhead Pressure column ---
# Automatically detect the correct column (case-insensitive)
pressure_col = [col for col in wells_df.columns if 'WELL' in col.upper() and 'HEAD' in col.upper() and 'PRESSURE' in col.upper()]
if not pressure_col:
    raise KeyError("❌ Could not find a wellhead pressure column in the dataset.")
pressure_col = pressure_col[0]

# Clean and convert pressure values
wells_df[pressure_col] = wells_df[pressure_col].astype(str).str.replace(',', '', regex=False)
wells_df[pressure_col] = pd.to_numeric(wells_df[pressure_col], errors='coerce')

# --- Compute average wellhead pressure per well ---
avg_whp = wells_df.groupby('WELL_NAME')[pressure_col].mean().reset_index()

# --- Classify each well as Gas Lifted (GL) or Naturally Flowing (NF) ---
threshold = 500  # psi
avg_whp['Well Type'] = avg_whp[pressure_col].apply(lambda x: 'GL' if x > threshold else 'NF')

# --- Display result ---
print("\n✅ Well Type Classification based on Average Wellhead Pressure:\n")
print(avg_whp[['WELL_NAME', pressure_col, 'Well Type']])


✅ Well Type Classification based on Average Wellhead Pressure:

   WELL_NAME  WELL_HEAD_PRESSURE (PSI) Well Type
0    Well_#1                711.214985        GL
1   Well_#10                 33.184883        NF
2   Well_#11                135.461129        NF
3   Well_#12                102.540756        NF
4   Well_#13                 43.503364        NF
5   Well_#14                103.510549        NF
6   Well_#15                 37.950869        NF
7   Well_#16                129.180378        NF
8   Well_#17                 84.915323        NF
9   Well_#18                141.425189        NF
10  Well_#19                102.660887        NF
11   Well_#2                861.152174        GL
12  Well_#20                117.928844        NF
13   Well_#3                789.801290        GL
14   Well_#4                647.913186        GL
15   Well_#5                425.250344        NF
16   Well_#6                 76.459775        NF
17   Well_#7                316.043412        NF
18  