In [1]:
print(1)

1


In [1]:
import csv
import pandas as pd
import os
from pathlib import Path
import datetime

In [2]:
def iso_to_unix(iso_str):
    # Handles ISO 8601 with 'Z' (UTC)
    # 2025-06-29T08:08:20Z
    try:
        dt = datetime.datetime.strptime(iso_str, "%Y-%m-%dT%H:%M:%SZ")
        dt = dt.replace(tzinfo=datetime.timezone.utc)
        return int(dt.timestamp())
    except ValueError as e:
        print(f"erro: {iso_str}")
        raise e

In [3]:
def parse_csv(file_path):
    csv_file = []
    idx = 0
    with open(file_path, mode='r', newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if (idx == 0):
                idx+=1
                continue
            row[0] = iso_to_unix(row[0])
            row[1] = float(row[1])
            csv_file.append(row)
    return csv_file     

In [4]:
files = {}
data_dir = Path(Path.cwd().parent / "La Vista 1H")
for file_name in os.listdir(data_dir):
    content = parse_csv(
        data_dir / file_name
    )
    files[file_name] = content



In [5]:
files["Sales Meter Flow Rate (MCF_Day).csv"][1]

[1751184620, 567.95]

## cleaning up logic
cleaning the data before 1st proper start cycle

In [6]:
for file in files:
    print(file)

Casing Pressure (PSI).csv
Sales Meter Static Pressure (PSIA).csv
Line Pressure (PSIA).csv
Tubing Pressure (PSI).csv
Arrival Speed.csv
Arrival Time Remaining.csv
Current Non-Arrival Count.csv
Sales Meter Flow Rate (MCF_Day).csv
Down Hole Pressure (PSI).csv


In [7]:
files["Sales Meter Flow Rate (MCF_Day).csv"]

[[1751184500, 585.2],
 [1751184620, 567.95],
 [1751184740, 0.0],
 [1751184860, 0.0],
 [1751184980, 0.0],
 [1751185100, 1054.75],
 [1751185220, 650.64],
 [1751185340, 613.71],
 [1751185460, 542.38],
 [1751185580, 611.62],
 [1751185700, 573.69],
 [1751185820, 517.6],
 [1751185940, 0.0],
 [1751186060, 0.0],
 [1751186180, 0.0],
 [1751186300, 0.0],
 [1751186420, 617.83],
 [1751186540, 515.95],
 [1751186660, 476.27],
 [1751186780, 411.39],
 [1751186900, 688.06],
 [1751187020, 595.41],
 [1751187140, 580.13],
 [1751187260, 0.0],
 [1751187380, 0.0],
 [1751187500, 0.0],
 [1751187620, 0.0],
 [1751187740, 709.58],
 [1751187860, 587.85],
 [1751187980, 542.48],
 [1751188100, 437.1],
 [1751188220, 604.91],
 [1751188340, 568.88],
 [1751188460, 50.51],
 [1751188580, 0.0],
 [1751188700, 0.0],
 [1751188820, 0.0],
 [1751188940, 732.92],
 [1751189060, 544.09],
 [1751189180, 520.34],
 [1751189300, 471.18],
 [1751189420, 403.82],
 [1751189540, 658.41],
 [1751189660, 584.66],
 [1751189780, 49.72],
 [175118990

In [8]:
files["Sales Meter Flow Rate (MCF_Day).csv"][1][0] + 60

1751184680

In [9]:
def cleanup():
    isotime_threshold = 0
    for i in range(len(files["Sales Meter Flow Rate (MCF_Day).csv"])):
        if files["Sales Meter Flow Rate (MCF_Day).csv"][i][1] == 0.0:
            if i==0: # if list is already clean (starting from zero)
                break
            isotime_threshold = files["Sales Meter Flow Rate (MCF_Day).csv"][i-1][0] + 60 # added one minute of threshold
            files["Sales Meter Flow Rate (MCF_Day).csv"] = files["Sales Meter Flow Rate (MCF_Day).csv"][i:]
            break
    # now cleaning all the data using this in all files
    for file in files:
        if(file=="Sales Meter Flow Rate (MCF_Day).csv"):
            continue
        for i in range(len(files[file])):
            if files[file][i][0] >= isotime_threshold:
                #print(f"threshold: ${isotime_threshold} and data: ${files[file][i][0]}\n")
                files[file] = files[file][i:]  # remove the first element
                break

cleanup()

## Separate Out cycles


In [18]:
def flow_rate_cycles():
    flow_rate = files['Sales Meter Flow Rate (MCF_Day).csv']
    data = [[None, None, None] for i in range(len(flow_rate))]
    cycle_id = -1
    new_cycle_mil_gia = False
    for i in range(len(data)):
        if flow_rate[i][1] == 0.0 and not new_cycle_mil_gia:
            cycle_id+=1
            new_cycle_mil_gia = True
        elif flow_rate[i][1] > 0.0:
            new_cycle_mil_gia = False
        data[i][0] = cycle_id
        data[i][1] = flow_rate[i][0]
        data[i][2] = flow_rate[i][1]

    return data

In [19]:
files['Sales Meter Flow Rate (MCF_Day).csv'] = flow_rate_cycles()
files['Sales Meter Flow Rate (MCF_Day).csv']

[[0, 1751184740, 0.0],
 [0, 1751184860, 0.0],
 [0, 1751184980, 0.0],
 [0, 1751185100, 1054.75],
 [0, 1751185220, 650.64],
 [0, 1751185340, 613.71],
 [0, 1751185460, 542.38],
 [0, 1751185580, 611.62],
 [0, 1751185700, 573.69],
 [0, 1751185820, 517.6],
 [1, 1751185940, 0.0],
 [1, 1751186060, 0.0],
 [1, 1751186180, 0.0],
 [1, 1751186300, 0.0],
 [1, 1751186420, 617.83],
 [1, 1751186540, 515.95],
 [1, 1751186660, 476.27],
 [1, 1751186780, 411.39],
 [1, 1751186900, 688.06],
 [1, 1751187020, 595.41],
 [1, 1751187140, 580.13],
 [2, 1751187260, 0.0],
 [2, 1751187380, 0.0],
 [2, 1751187500, 0.0],
 [2, 1751187620, 0.0],
 [2, 1751187740, 709.58],
 [2, 1751187860, 587.85],
 [2, 1751187980, 542.48],
 [2, 1751188100, 437.1],
 [2, 1751188220, 604.91],
 [2, 1751188340, 568.88],
 [2, 1751188460, 50.51],
 [3, 1751188580, 0.0],
 [3, 1751188700, 0.0],
 [3, 1751188820, 0.0],
 [3, 1751188940, 732.92],
 [3, 1751189060, 544.09],
 [3, 1751189180, 520.34],
 [3, 1751189300, 471.18],
 [3, 1751189420, 403.82],
 [3,

In [20]:
import pandas as pd
df = pd.DataFrame(files['Sales Meter Flow Rate (MCF_Day).csv'], columns=['cycle_id', 'isotime', 'flow_rate'])
df

Unnamed: 0,cycle_id,isotime,flow_rate
0,0,1751184740,0.00
1,0,1751184860,0.00
2,0,1751184980,0.00
3,0,1751185100,1054.75
4,0,1751185220,650.64
...,...,...,...
7193,680,1752048020,426.05
7194,680,1752048140,614.65
7195,680,1752048260,574.28
7196,680,1752048380,544.98


In [22]:
def data_entries_manager():
    for file_name in files:
        if file_name == "Sales Meter Flow Rate (MCF_Day).csv":
            continue
        df[file_name] = None  # Create a new column for each file
        # Iterate through the DataFrame and for each isotime of flow rate (threshold_isotime)
        # find the corresponding isotime in the range of the thereshold_isotime_range
        # and assign the value from the corresponding file to the DataFrame      
        for i in range(0,7198):
            threshold_isotime = df.iloc[i]["isotime"]
            threshold_isotime_range = [threshold_isotime-60, threshold_isotime+61] # last is not included soo +61 to get +60
            for j in range(len(files[file_name])):
                if files[file_name][j][0] in range(threshold_isotime_range[0], threshold_isotime_range[1]):
                    # found the data entry
                    df.at[i,file_name] = files[file_name][j][1]
                    break
       
data_entries_manager()

In [25]:
df

Unnamed: 0,cycle_id,isotime,flow_rate,Casing Pressure (PSI).csv,Sales Meter Static Pressure (PSIA).csv,Line Pressure (PSIA).csv,Tubing Pressure (PSI).csv,Arrival Speed.csv,Arrival Time Remaining.csv,Current Non-Arrival Count.csv,Down Hole Pressure (PSI).csv
0,0,1751184740,0.00,204.41,97.06,16.52,181.19,979.15,30.0,0.0,291.14
1,0,1751184860,0.00,207.57,96.14,16.49,196.42,979.15,30.0,0.0,299.94
2,0,1751184980,0.00,211.6,95.36,16.55,202.05,979.15,30.0,0.0,306.28
3,0,1751185100,1054.75,215.27,113.09,16.52,125.86,979.15,29.64,0.0,305.62
4,0,1751185220,650.64,215.22,103.22,16.48,101.29,979.15,26.63,0.0,289.12
...,...,...,...,...,...,...,...,...,...,...,...
7193,680,1752048020,426.05,207.56,94.15,16.52,83.23,1122.95,22.34,0.0,295.53
7194,680,1752048140,614.65,205.56,97.66,16.52,99.8,1057.8,30.0,0.0,291.17
7195,680,1752048260,574.28,205.51,97.9,16.49,98.35,1057.8,30.0,0.0,281.86
7196,680,1752048380,544.98,204.19,98.3,16.55,97.3,1057.8,30.0,0.0,268.87
