### Training data tool

This code generates synthetic drilling data by running a drilling simulator in OpenLab and storing the results in a Pandas DataFrame. The user can specify the simulation time, as well as setpoint values for various parameters such as surface RPM, desired WOB, and flow rate.

The code creates a DataFrame with columns for various drilling parameters, including SPP, hook load, downhole pressure, fluid temperature, and many others. The DataFrame is saved to a CSV file, with the file name including a timestamp and information about whether the data is intended for training or validation purposes.

Overall, this code allows users to generate large amounts of synthetic drilling data for use in machine learning models or other data analysis tasks. By adjusting the simulation time and setpoint values, users can create data that is representative of a wide range of drilling conditions.

In [1]:
# quick config edit:
Configuration_Name = "training_data_tool"
Simulation_Name = "sim"
Initial_Bit_Depth = 2500

In [2]:
# importing
import openlab
import os
import matplotlib.pyplot as plt
import numpy as np
import time
import pandas as pd
import time
import random
import pprint
import datetime

from IPython.display import clear_output
from datetime import datetime

In [3]:
# Initializing http client
username="ax.jorgensen@stud.uis.no"
apikey="EDE1872A07785D868992DEBB642A81C2DB726ECCA6E0DF115A44E43C6248AE8A"
licenseguid="c3a0315b-813c-4e0b-8ec9-e14436a1783d"

In [4]:
# start a session
session = openlab.http_client(username=username, apikey=apikey, licenseguid=licenseguid)

Initializing http client...
Saving token...
Login Succesfull
Your openlab version 2.3.0 is outdated. Consider upgrading to 2.5.6


In [5]:
# creating the configuration. ps: the config will have to be made "by hand" in openlab website.
config_name = Configuration_Name
sim_name = Simulation_Name
initial_depth = Initial_Bit_Depth

In [6]:
# start a test session
sim = session.create_simulation(config_name,sim_name,initial_depth)

User limits: {'ActiveSimulationCount': 0, 'MaxConcurrentSimulations': 5, 'MaxSimulationCount': 0, 'MaxStepCount': 180000, 'MaxStoredSimulations': 10, 'MaxTimeStep': 18000, 'TotalSimulationCount': 1114, 'UsedCapacityPercent': 1, 'UsedStepCount': 2532}
influx_type:  {}
No influx mode selected. Defaulting to 'no influx and loss'
Using non-transient model
Simulation Initialized


In [7]:
# ask user if he/she is making training data
user_input = input("Are you making training data? (yes/no): ")

statement = False
# Check user's response
if user_input.lower() == "yes":
    statement = True
    print("df will be saved in jupyter as df_training.")
else:
    print("df will be saved in jupyter as df_validation.")

Are you making data for NN purposes? (yes/no): yes
df will be saved in jupyter as df_training.


In [8]:
# logic and length of sim/duration of generation
if statement == True:
    single_simulation_lenght = 30 #min
    total_simulation_length =  0.5#hrs

    step_per_sim = single_simulation_lenght*60 
    nr_sim = int(total_simulation_length*60/single_simulation_lenght)
    total_steps = nr_sim*step_per_sim

    print(f"There will be {nr_sim} simulations, lasting for {single_simulation_lenght} minutes, equalling to {total_steps} timeSteps.")
else:
    single_simulation_lenght = 30 #min
    total_simulation_length =  0.5#hrs

    step_per_sim = single_simulation_lenght*60 
    nr_sim = int(total_simulation_length*60/single_simulation_lenght)
    total_steps = nr_sim*step_per_sim

    print(f"There will be {nr_sim} simulations, lasting for {single_simulation_lenght} minutes, equalling to {total_steps} timeSteps.")

There will be 1 simulations, lasting for 30 minutes, equalling to 1800 timeSteps.


In [9]:
# lists that will be used for making each simulation unique
initial_depth = 2500

SurfaceRPM = [random.randint(0, 220) for _ in range(nr_sim)]
SurfaceRPM = [rpm/60 for rpm in SurfaceRPM]

DesiredWOB = [random.randint(0, 40) for _ in range(nr_sim)]
DesiredWOB = [wob*1000 for wob in DesiredWOB]

FlowRateIn = [random.randint(0, 3000) for _ in range(nr_sim)]
FlowRateIn = [flow_rate/60000 for flow_rate in FlowRateIn]

In [10]:
# dataframe and tags stuff
tags = ['SPP', 'DownholeECD', 'FlowRateOut', 'HookLoad', 'SurfaceTorque', 'BitDepth', 'TD', 'ChokeOpening', 'DownholePressure', 'ChokePressure', 'FluidTemperatureOut', 'WOB', 'InstantaneousROP', 'FlowRateIn', 'TopOfStringVelocity', 'SurfaceRPM', 'DrillstringTemperature', 'TotalInfluxMass', 'CalculatedPressureBottomHole', 'CuttingsMassFractionTransient', 'FluidTemperatureIn', 'AnnulusECD', 'AnnulusTemperature', 'DrillstringTension', 'AnnulusFluidVelocity', 'DrillstringFluidVelocity', 'CuttingsBedHeight', 'AnnulusDensity', 'DrillstringTorque', 'TotalMudLossMass', 'TopOfStringPosition', 'ActivePitVolume', 'ActivePitDensity', 'ActivePitTemperature', 'MainPitVolume', 'MainPitDensity', 'MainPitTemperature', 'ReservePitVolume', 'ReservePitDensity', 'ReservePitTemperature']
ID = ["sim_ID", "timeStep"]
columns = tags + ID


df = pd.DataFrame(columns=columns)

In [11]:
startTime = 1
endTime = single_simulation_lenght*60

timeStep = 1
current_simulation = 0

sim.stop()

while current_simulation < nr_sim:
    
    print(current_simulation)
    sim_ID = current_simulation
    sim = session.create_simulation(config_name,sim_name,initial_depth)
    
    sim.setpoints.SurfaceRPM = SurfaceRPM[0+current_simulation]
    sim.setpoints.DesiredWOB = DesiredWOB[0+current_simulation]
    sim.setpoints.FlowRateIn = FlowRateIn[0+current_simulation]
    sim.setpoints.TopOfStringVelocity = 0.02 #m/s
    sim.setpoints.DesiredROP = 0.02
    values = {}
    while timeStep <= endTime:

        sim.step(timeStep) # stepping openlab
        sim.get_results(timeStep, tags) # fetching results

        for feature in tags:
            # Skip "Connection" feature
            if feature == "Connection":
                continue
            # Create the variable name for the current feature
            var_name = f"{feature}"
            # Extract the value of the feature at this time step
            value_dict = eval(f"sim.results.{var_name}")
            value = list(value_dict.values())[-1]  # Extract the value as a number
            # Add the value to the dictionary
            values[feature] = value

        # Update other variables based on the current time step and simulation ID, and add them to the dictionary
        values['timeStep'] = timeStep
        values['sim_ID'] = sim_ID

        # Append the dictionary to the dataframe
        df = df.append(values, ignore_index=True)
        pprint.pprint(df.tail(1))
        clear_output(wait=True)
        
        # Increment the time step
        timeStep += 1

    # Print the entire dataframe after the inner while loop has completed
    print(df)
    
    sim.stop()
    timeStep = 1
    current_simulation += 1

               SPP  DownholeECD   FlowRateOut       HookLoad  SurfaceTorque  \
0     1.811731e+05  1374.382178 -2.229704e-07  244120.083947   11768.734936   
1     1.201131e+06  1374.382616 -2.495278e-07  243656.593967   11828.442007   
2     1.647691e+06  1374.445902  2.791874e-05  243568.579521   11815.869614   
3     2.206723e+06  1377.834651  2.970509e-04  243437.607508   11780.461624   
4     2.726786e+06  1399.198158  1.960908e-04  243270.228332   11791.251325   
...            ...          ...           ...            ...            ...   
1795  2.738621e+06  1399.027153  1.420080e-02 -334209.107879  278179.200127   
1796  2.740301e+06  1399.026766  1.419990e-02 -334679.437345  278503.414619   
1797  2.741182e+06  1399.025815  1.420016e-02 -335143.901070  278801.773173   
1798  2.739086e+06  1399.024682  1.420193e-02 -335615.125081  279129.524568   
1799  2.738789e+06  1399.024612  1.420251e-02 -336081.486642  279451.011621   

         BitDepth           TD  ChokeOpening  Downh

In [13]:
# Get the number of rows and columns in the DataFrame
num_rows, num_cols = df.shape

# Get time to save the file with
now = datetime.now()
time = now.strftime("%d_%m_%H_%M")

if statement == True:
    word = "training"
else:
    word = "validation"

# Construct the file name using the DataFrame shape
file_name = f"df_{word}_{num_rows}x{num_cols}:{time}.csv"

# Create a directory named "trainingdata" in the current working directory (if it doesn't already exist)
directory = "trainingdata"
if not os.path.exists(directory):
    os.makedirs(directory)

# Save the DataFrame to a file in the "trainingdata" directory
file_path = os.path.join(directory, file_name)
df.to_csv(file_path, index=False)

# Print the file path for confirmation
print("File saved to:", file_path)

File saved to: trainingdata\df_training_1800x42:14_03_17_15.csv


In [14]:
if statement == True:
    df_training = df
    %store df_training
else:
    df_validation = df
    %store df_validation

Stored 'df_training' (DataFrame)
