# Remove data aquired when the tool is stationary or slowing

This step is used in 2-temperature.ipynb and 4-feedzones.ipynb

In [None]:
import pandas as pd
from utilities import* # functions in the utilities.py file

In [None]:
flowrate = read_flowrate(r'Data-FlowRate.xlsx')
pts = read_pts(r'Data-PTS.xlsx')

Look at the spinner data plotted below and note where there is noise at the top, bottom and ~750 mMD because the tool is not moving or is moving at a slower pace. 

We need to clean these from the data to do the spinner analysis.

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(24,8),sharey=True)

ax1.scatter(pts.frequency_hz, pts.depth_m, c = pts.timestamp, s = 5, linewidths = 0)
ax2.scatter(pts.datetime, pts.depth_m, c = pts.timestamp, s = 5, linewidths = 0)

ax3 = ax2.twinx()
ax3.plot(flowrate.datetime, flowrate.flow_tph, 
    c='k', linestyle = '-', linewidth = 3, alpha = 0.3, 
    label='Surface pump flowrate')

ax1.set_ylim(1000,0)
ax1.set_xlim(-30,30)

ax1.set_ylabel('Depth [m]')
ax1.set_xlabel('Spinner frequency [hz]')

ax2.set_xlabel('Time [hh:mm]')
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

ax3.set_ylabel('Flowrate [t/hr]')

for ax in [ax1, ax2]:
    ax.grid()



## Decide on a speed limit for the filer

We can use descriptive and plotting tools that are built into Pandas to understand the data

In [None]:
pts.describe()

In [None]:
pts['speed_mps'].hist(bins=30)

## Explore the data at various scales using a plot

Because we will use this filter on the temperature and spinner data, we want to remove both the stationary data and the data acquired as the tool slows down before it stops.

We use the matplotlib plot below to zoom into the data and decide on a reasonable cut-off value, which looks to be 0.05 m/s.

Hint: Explore the data by changing the y axis limits: ax1.set_ylim(-0.05,0.05)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

fig, (ax) = plt.subplots(1, 1,figsize=(12,6))

ax.set_title('All Data')

ax.scatter(pts.datetime, pts.speed_mps, s = 3, c='k', linewidths = 0)

ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.set_xlabel('Time [hh:mm]')

ax.set_ylabel('Tool speed [m/sec]')

#ax.set_ylim(-0.2,0.2)

;

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(28,8))

#ax1.plot(pts.datetime, pts.frequency_hz, c='k', linestyle = '-', linewidth = 1, alpha = 0.3)
ax1.scatter(pts.datetime, pts.frequency_hz, c = pts.timestamp, s = 5, linewidths = 0)
ax2.scatter(pts.datetime, pts.depth_m, c = pts.timestamp, s = 5, linewidths = 0)

ax3 = ax2.twinx()
ax3.plot(flowrate.datetime, flowrate.flow_tph, 
    c='k', linestyle = '-', linewidth = 3, alpha = 0.3, 
    label='Surface pump flowrate')

ax4 = ax1.twinx()
ax4.plot(pts.datetime, pts.depth_m, 
    c='k', linestyle = '-', linewidth = 2, alpha = 0.3, 
    label='Tool depth [m]')
ax4.set_ylim(1000,-1000)
ax4.set_ylabel('Tool depth [m]')

ax1.set_ylim(-30,30)
ax1.set_ylabel('Spinner frequency [hz]')

ax2.set_ylim(1000,0)
ax2.set_ylabel('Tool depth [m/s]')

for ax in [ax1,ax2]:
    ax.set_xlabel('Time [hh:mm]')
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

ax3.set_ylabel('Flowrate [t/hr]')

for ax in [ax1, ax2]:
    ax.grid()

#
# Limit to a time range
#

start_time = pd.to_datetime('2020-12-11 09:30:00')
end_time = pd.to_datetime('2020-12-11 10:30:00')

ax1.set_xlim(start_time,end_time)
ax4.set_ylim(1000,400);

## Use a boolean expression to select only the desired data

We decided that, in this case, data acquired when the tool is moving down the well at > 0.9 m/s is fast enough to be included. Remember that the sign indicates the direction that a tool is moving inside the well and is not an actual -ve speed.  

Our new working dataframe is called moving_pts

In [None]:
moving_pts = pts[
    (pts.speed_mps > 0.9 ) & (pts.speed_mps < pts.speed_mps.max()) | 
    (pts.speed_mps > pts.speed_mps.min() ) & (pts.speed_mps < -0.9)
    ]

In [None]:
moving_pts.shape

In [None]:
fig, (ax) = plt.subplots(1, 1,figsize=(12,6))

ax.set_title('Data Acquired while the Tool is Moving')

ax.scatter(moving_pts.datetime, moving_pts.speed_mps, s = 3, c='k', linewidths = 0)

ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.set_xlabel('Time [hh:mm]')

ax.set_ylabel('Tool speed [m/sec]')

#ax.set_ylim(-0.5,0.5)

;

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(28,8))

ax1.scatter(moving_pts.datetime, moving_pts.frequency_hz, c = moving_pts.timestamp, s = 5, linewidths = 0)
ax2.scatter(moving_pts.datetime, moving_pts.depth_m, c = moving_pts.timestamp, s = 5, linewidths = 0)

ax3 = ax2.twinx()
ax3.plot(flowrate.datetime, flowrate.flow_tph, 
    c='k', linestyle = '-', linewidth = 3, alpha = 0.3, 
    label='Surface pump flowrate')

ax4 = ax1.twinx()
ax4.plot(pts.datetime, pts.depth_m, 
    c='k', linestyle = '-', linewidth = 2, alpha = 0.3, 
    label='Tool depth [m]')
ax4.set_ylim(1000,400)
ax4.set_ylabel('Tool depth [m]')

ax1.set_ylim(-30,30)
ax1.set_ylabel('Spinner frequency [hz]')

ax2.set_ylim(1000,0)
ax2.set_ylabel('Tool depth [m]')

for ax in [ax1,ax2]:
    ax.set_xlabel('Time [hh:mm]')
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))

ax3.set_ylabel('Flowrate [t/hr]')

for ax in [ax1, ax2]:
    ax.grid()

#
# Limit to a time range
#

start_time = pd.to_datetime('2020-12-11 14:30:00')
end_time = pd.to_datetime('2020-12-11 15:30:00')

ax1.set_xlim(start_time,end_time);

# Remove data from inside the casing



In [None]:
production_shoe = 462.5  

depth_filtered = moving_pts[(moving_pts.depth_m < moving_pts.depth_m.max()) & (moving_pts.depth_m > production_shoe)]

depth_filtered.describe()

## Plot the cleaned data

Now we have two Boolean statements that we can use to filter this PTS dataset down to only the by-depth data that we want for the spinner analysis. 

There are still a few stray data points that missed by this simple approach. We plan to develop something a little more elegant to clean these data fully, but this approach is near enough for most purposes. 

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(24,8),sharey=True)

ax1.set_title('All data')
ax1.scatter(pts.frequency_hz, pts.depth_m, c = pts.timestamp, s = 5, linewidths = 0)

ax2.set_title('Filtered data')
ax2.scatter(depth_filtered.frequency_hz, depth_filtered.depth_m, c = depth_filtered.timestamp, s = 5, linewidths = 0)

ax1.set_ylabel('Depth [m]')

for ax in [ax1,ax2]:
    ax.set_ylim(1000,0)
    ax.set_xlim(-30,30)
    ax.set_xlabel('Spinner frequency [hz]')
    ax.grid();