In [None]:
import boto3
import numpy as np 
from src.aau.S3Manager import S3Manager
from src.utils.PathManager import Paths as Path
from src.Torque.DataPipe import S3TRQManager
import pandas as pd 
import yaml 

### Init S3 Manager

In [2]:
S3 = S3Manager("config.csv")

### Download well data and labels

In [None]:
#Download all raw data
for well_code in S3.all_labelled_wells:
    S3.read_processed_data(well_code = well_code, start="2016-01-01", end="2023-01-01", nan_replace_method='zero',to_csv=True)

In [None]:
#Download all weather data
for station in S3.all_stations:
    station_df = S3.read_solar(station,'2016-01-01','2023-01-01',to_csv=True)

In [3]:

#Download all labels
start="2016-01-01"
end="2023-01-01"
for well_code in S3.all_labelled_wells:
    S3.read_labelled_data(well_code = well_code, 
                          start=start,
                          end=end,
                          nan_replace_method='zero',
                          raw_csv=f"{well_code}_{start}_{end}_raw.csv",
                          weather_csv = f"{S3.nearest_station[well_code]}_{start}_{end}_weather.csv",
                          to_pickle=True)

Issue processing well WKT3, incomplete feature length. Feature: ROC_VOLTAGE, size: 9995


### Classify well types

In [3]:
S3.classify_voltage_type()

### Calculate transform params

In [None]:
S3.calculate_weather_transform_params()

In [None]:
S3.calculate_well_transform_params()

### Get label description

In [61]:
S3.get_well_label_count()
S3.well_label_count[S3.well_label_count[9]!=0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
BIGL20,129,7,0,0,0,0,0,11,25,9
BIGL22,115,3,3,1,0,13,0,25,5,16
MOOM115,118,0,7,0,1,25,0,21,0,18
MOOM157,77,5,0,1,0,0,0,11,88,9
TIRRA80,145,1,0,147,53,1,0,1,3,2
TOOL16,160,3,0,0,0,0,0,6,0,8
WKT3,685,10,2,143,13,15,0,5,7,31


In [51]:
all_df = {}
for well in S3.label_dict:
    all_df[well] = pd.DataFrame({'labels': S3.label_dict[well].values()},index=S3.label_dict[well].keys())

In [59]:
all_df["BIGL20"]

Unnamed: 0,labels
2022-01-01,0
2022-01-02,0
2022-01-03,0
2022-01-04,0
2022-01-05,0
...,...
2022-06-26,0
2022-06-27,0
2022-06-28,0
2022-06-29,1


## Check if correct folders are on S3

In [3]:
S3.list_dir()

{'': ['COPY_TAG', 'ROC', 'TAG_DATA']}

In [11]:
S3.list_files('COPY_TAG/TAG_DATA')

['COPY_TAG/TAG_DATA/',
 'COPY_TAG/TAG_DATA/RM01-05-4_MasterSQL_Extract.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20170301_20170401.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20170401_20170501.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20170501_20170601.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20170601_20170701.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20170701_20170801.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20170801_20170901.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20170901_20171001.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20171001_20171101.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20171101_20171201.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20171201_20180101.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20180101_20180201.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20180201_20180301.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20180301_20180401.csv',
 'COPY_TAG/TAG_DATA/RM01-05-4_TRQ_TAG_DATA_20180401_

In [13]:
S3.read_from_storage(path="COPY_TAG/TAG_DATA",
                          file_prefix='TRQ_TAG_DATA',
                          item_cd = "RM02-09-1",
                          start = '2019-01-01',
                          end = '2021-11-01') 

Unnamed: 0,TS,TORQUE_MOTOR,TORQUE_ROD,SPEED_MOTOR,SPEED_ROD,EFFICIENCY_PUMP,LEVEL_DOWNHOLE,FLOW_GAS,FLOW_WATER,TEMP_GAS,PRESSURE_AN,PRESSURE_DH,PRESSURE_GAS,PRESSURE_TUB,PRESSURE_WATER
0,2019-01-01 00:00:45.274,,,,,,,15.150074,0.015111,,,,,,
1,2019-01-01 00:01:45.278,,,,,,3081.039307,15.150074,0.015111,,-30194.185547,,,,
2,2019-01-01 00:02:45.290,,,,,,,15.150074,0.015111,,,,,,
3,2019-01-01 00:03:45.338,16.400000,,,,,,15.150074,0.015111,33.055817,,,206.314178,,
4,2019-01-01 00:04:45.349,16.200001,,,,,,14.674245,0.015344,33.315002,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2595297,2021-10-31 23:58:33.734,,,,,,,,,28.000135,,,,,
2595298,2021-10-31 23:58:33.993,,,,,,,8.267233,0.015392,,,,,,
2595299,2021-10-31 23:59:33.747,19.200001,,,,,,,,28.000135,,,,,
2595300,2021-10-31 23:59:33.996,,,,,,,8.267233,0.015392,,,,,,


## S3TRQManager

In [2]:
S3 = S3TRQManager("config.csv")

In [3]:
len(S3.list_all_wells())

222

In [3]:
well_list = S3.list_all_wells()

In [None]:
for well in well_list: 
    try:
        S3.read_processed_data(well_code = well, start = '2016-01-01', end='2023-01-01', to_csv=True)
    except:
        print(f"Error for well {well}")

In [6]:
S3.read_processed_data(well_code ='RM07-80-1', start = '2016-01-01', end='2023-01-01',  to_csv=True)

INFO:src.Torque.DataPipe:Read well data from database for well: RM07-80-1 from 2016-01-01 to 2023-01-01
INFO:src.Torque.DataPipe:Save well data to RM07-80-1_2016-01-01_2023-01-01_raw.csv


Unnamed: 0,TS,TORQUE_MOTOR,TORQUE_ROD,SPEED_MOTOR,SPEED_ROD,EFFICIENCY_PUMP,LEVEL_DOWNHOLE,FLOW_GAS,FLOW_WATER,TEMP_GAS,PRESSURE_AN,PRESSURE_DH,PRESSURE_GAS,PRESSURE_TUB,PRESSURE_WATER
0,2018-06-23 00:12:00.241,221.459961,,58.5,50.0,,,,0.0,,,,,,
1,2018-06-23 00:13:00.241,,,,,,,,,,,,,,
2,2018-06-23 00:14:00.241,,,,,,,,,,,,,,
3,2018-06-23 00:15:00.241,,,,,,,,,,,,,,
4,2018-06-23 00:16:00.241,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2240598,2022-09-25 23:30:00.241,,,,,,,,,,,,,,
2240599,2022-09-25 23:31:00.241,,,,,,,,,,,,,,
2240600,2022-09-25 23:32:00.241,,,,,,,,,,,,,,
2240601,2022-09-25 23:33:00.241,,,,,,,,,,,,,,
