In [1]:
import pandas as pd
import numpy as np


In [2]:
def resample(df: pd.DataFrame, well: str):
    temp_df = df.copy(deep=True)

    # depth data is rounded to the nearest 0.5
    temp_df["depth"] = temp_df["depth"].apply(lambda x: round(x*2)/2)

    # for each depth value we need only one property value
    temp_df = temp_df.groupby("depth").mean()

    depth_resampled = None
    if well == "lb-07A":
        depth_resampled = np.arange(335, 545.5, 0.5)
    elif well == "lb-08A":
        depth_resampled = np.arange(225, 450.5, 0.5)
    else:
        raise Exception

    temp_df = temp_df.reindex(temp_df.index.union(depth_resampled))

    # interpolate values
    temp_df = temp_df.interpolate(
        method='spline', order=2, limit_direction="both")

    return temp_df


In [3]:
# Lithologies
plbr = "polymict lithic breccia"
suvt = "suevite"
mgrw = "metagreywacke"
mlbr = "monomict lithic breccia"
ssps = "shale slate phyllite schist"


### LB-07A


In [5]:
density_lb07_df = pd.read_csv(
    "raw/LB-07A/lb07_density.csv", names=["depth", "density"])
caliper_lb07_df = pd.read_csv(
    "raw/LB-07A/lb07_caliper.csv", names=["depth", "caliper"])
mag_suscep_lb07_df = pd.read_csv(
    "raw/LB-07A/lb07_mag_suscep.csv", names=["depth", "mag_suscep"])
scaler_tmi_lb07_df = pd.read_csv(
    "raw/LB-07A/lb07_scaler_tmi.csv", names=["depth", "scaler_tmi"])
total_gamma_lb07_df = pd.read_csv(
    "raw/LB-07A/lb07_total_gamma.csv", names=["depth", "total_gamma"])

lb07A_raw_data = [density_lb07_df, caliper_lb07_df,
                  mag_suscep_lb07_df, scaler_tmi_lb07_df, total_gamma_lb07_df]


In [6]:
lb07A_resampled_data: None | pd.DataFrame = None

for raw_df in lb07A_raw_data:
    resampled_df = resample(raw_df, "lb-07A")

    if lb07A_resampled_data is None:
        lb07A_resampled_data = resampled_df
    else:
        lb07A_resampled_data = pd.concat(
            [lb07A_resampled_data, resampled_df], axis=1)

lb07A_resampled_data.reset_index(inplace=True)


In [7]:
def label_lb_07A_lithology(depth):
    if (depth <= 359):
        return plbr
    elif (depth <= 369):
        return suvt
    elif (depth <= 378):
        return plbr
    elif (depth <= 389):
        return suvt
    elif (depth <= 394):
        return plbr
    elif (depth <= 414):
        return suvt
    elif (depth <= 470):
        return mlbr
    elif (depth <= 481):
        return ssps
    elif (depth <= 488):
        return mgrw
    elif (depth <= 509):
        return ssps
    elif (depth <= 512):
        return mgrw
    elif (depth <= 545):
        return ssps


In [8]:
lb07A_labelled_data = lb07A_resampled_data.copy(deep=True)
lb07A_labelled_data["lithology"] = lb07A_resampled_data["depth"].apply(
    lambda depth: label_lb_07A_lithology(depth))

lb07A_labelled_data.to_csv("datasets/LB-07A", index=False)


### LB-08A


In [10]:
LB_08A_BASE_PATH = "raw/LB-08A/"
density_df = pd.read_csv(
    "raw/LB-08A/lb08_density.csv", names=["depth", "density"])
caliper_df = pd.read_csv(
    "raw/LB-08A/lb08_caliper.csv", names=["depth", "caliper"])
mag_suscep_df = pd.read_csv(
    "raw/LB-08A/lb08_mag_suscep.csv", names=["depth", "mag_suscep"])
scaler_tmi_df = pd.read_csv(
    "raw/LB-08A/lb08_scaler_tmi.csv", names=["depth", "scaler_tmi"])
total_gamma_df = pd.read_csv(
    "raw/LB-08A/lb08_total_gamma.csv", names=["depth", "total_gamma"])

lb08A_raw_data = [density_df, caliper_df,
                  mag_suscep_df, scaler_tmi_df, total_gamma_df]


In [12]:
lb08A_resampled_data: None | pd.DataFrame = None

for raw_df in lb08A_raw_data:
    resampled_df = resample(raw_df, "lb-08A")

    if lb08A_resampled_data is None:
        lb08A_resampled_data = resampled_df
    else:
        lb08A_resampled_data = pd.concat(
            [lb08A_resampled_data, resampled_df], axis=1)

lb08A_resampled_data.reset_index(inplace=True)


In [13]:
def label_lb_08A_lithology(depth):
    if (depth <= 258):
        return plbr
    elif (depth <= 263):
        return suvt
    elif (depth <= 268):
        return ssps
    elif (depth <= 280):
        return mgrw
    elif (depth <= 295):
        return ssps  # double check
    elif (depth <= 315):
        return mgrw
    elif (depth <= 318):
        return mgrw
    elif (depth <= 323):
        return ssps
    elif (depth <= 325):
        return mgrw
    elif (depth <= 332):
        return mgrw
    elif (depth <= 338):
        return ssps
    elif (depth <= 344):
        return mgrw
    elif (depth <= 347):
        return ssps
    elif (depth <= 349):
        return mgrw
    elif (depth <= 358):
        return ssps
    elif (depth <= 389):
        return mgrw
    elif (depth <= 364):
        return ssps
    elif (depth <= 382):
        return mgrw
    elif (depth <= 389):
        return ssps
    elif (depth <= 439):
        return mgrw
    elif (depth <= 450):
        return mgrw


In [None]:
lb08A_labelled_data = lb08A_resampled_data.copy(deep=True)
lb08A_labelled_data["lithology"] = lb08A_resampled_data["depth"].apply(
    lambda depth: label_lb_08A_lithology(depth))

lb08A_labelled_data.to_csv("datasets/LB-08A", index=False)
