In [1]:
import petrovisor as pv
from petrovisor import ItemType, RefTableColumnType

In [2]:
import os
import time
import psutil
import pandas as pd
import numpy as np

# Authorization

In [None]:
# url
# discovery_url = r'https://identity.eu1.petrovisor.com'
discovery_url = r'https://identity.us1.petrovisor.com'

# workspace
workspace = 'Workspace Name'

# generate access key (only for the first time, comment the line afterwards)
key = pv.PetroVisor.generate_credentials_key()
# print(f"key : '{key}'")
# save key for later to reuse
# key = ''

api = pv.PetroVisor(workspace = workspace, discovery_url = discovery_url, key = key)
print(api.Api)

# PetroVisor API Swagger

https://api.eu1.petrovisor.com/index.html

https://api.us1.petrovisor.com/index.html

# Examples: RefTables

#### Create DataFrame

In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# get dataframe size
def dataframe_size(df):
    return df.memory_usage(deep=True).sum()/1024/1024 # MB

# get memory size used by process
def process_size():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss/1024/1024 # MB

# Keep your existing row count configuration
num_rows = int(1000)
columns = list('ABCDEF')

# Create the initial DataFrame
df = pd.DataFrame(np.random.uniform(0,1,size=(num_rows, len(columns))), columns=columns)
df_size = dataframe_size(df)
memory_size = process_size()

# Entity column (keeping your existing logic)
df['Entity'] = None
df.loc[:num_rows//2, 'Entity'] = 'Well 001'

# Generate time series data with some gaps
start_date = datetime(2025, 1, 1)  # Starting from January 1st, 2025
time_interval = timedelta(hours=1)  # 1-hour interval

# Create base time series
time_series = [start_date + i * time_interval for i in range(num_rows)]

# Introduce random gaps (None values) in about 10% of the data
time_series = np.array(time_series, dtype=object)
gap_indices = np.arange(9, num_rows, 10)  # Creates gaps at every 10th position (0-based)
time_series[gap_indices] = None

# Assign the time series to the DataFrame
df['Time'] = time_series

# Key column
df['Key'] = [i for i in range(0,len(df))]

# Reorder columns and rename as in your original code
df = df[['Entity','Time','Key',*columns]]
df = df.rename(columns={"F": "F [cm]", "E": "E [ft]"})

print(f"Memory used: {memory_size} MB")
print(f"DataFrame Shape: {df.shape}")
print("\nSample of the data:")
display(df.head())

# Additional information about the time series
non_null_times = df['Time'].count()
print(f"\nTime series statistics:")
print(f"Total rows: {len(df)}")
print(f"Non-null times: {non_null_times}")
print(f"Null times: {len(df) - non_null_times}")
print(f"Time range: {df['Time'].min()} to {df['Time'].max()}")

Memory used: 144.484375 MB
DataFrame Shape: (1000, 9)

Sample of the data:


Unnamed: 0,Entity,Time,Key,A,B,C,D,E [ft],F [cm]
0,Well 001,2025-01-01 00:00:00,0,0.93389,0.854275,0.204862,0.38315,0.656357,0.164028
1,Well 001,2025-01-01 01:00:00,1,0.531154,0.463441,0.009958,0.029668,0.605644,0.136833
2,Well 001,2025-01-01 02:00:00,2,0.002999,0.423221,0.11286,0.60685,0.617334,0.827693
3,Well 001,2025-01-01 03:00:00,3,0.03545,0.937476,0.220853,0.503644,0.033182,0.168539
4,Well 001,2025-01-01 04:00:00,4,0.556272,0.379054,0.311725,0.888229,0.583703,0.627247



Time series statistics:
Total rows: 1000
Non-null times: 900
Null times: 100
Time range: 2025-01-01 00:00:00 to 2025-02-11 14:00:00


## Add RefTable

In [5]:
name = 'PyRefTable'

In [6]:
# delete data if exists
api.delete_ref_table_data(name)

<Response [200]>

In [7]:
# add new table

start_time = time.time()

api.add_ref_table(name, df, description = 'Testing API from Python')

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.02324527899424235


In [8]:
# add data to already existing table

start_time = time.time()

api.add_ref_table(name, df, description = 'Testing API from Python')

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.035637088616689044


In [9]:
# get data info
api.get_ref_table_data_info(name)

{'Key': {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'Key'},
 'Values': [{'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'A'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'B'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'C'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'D'},
  {'ColumnType': 'Numeric', 'UnitName': 'ft', 'Name': 'E'},
  {'ColumnType': 'Numeric', 'UnitName': 'cm', 'Name': 'F'}],
 'Modified': '2025-02-13T11:52:46.111Z',
 'Created': '2025-02-13T11:35:15.267Z',
 'ModifiedBy': 'RomanManasipov',
 'CreatedBy': 'RomanManasipov',
 'Description': 'Testing API from Python',
 'TypeOfItem': 'ReferenceTable',
 'Labels': [],
 'Name': 'PyRefTable'}

## Save RefTable Data

In [10]:
df = df.rename(columns={"F [cm]": "F [m]", "E [ft]": "E [m]"})

In [11]:
# save data and overwrite existing data, meaning that rows with the same 'Entity', 'Timestamp/Date/Time', 'Key' will be overwritten

start_time = time.time()

api.save_ref_table_data(name, df, skip_existing_data = False)

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.019711248079935708


In [12]:
# df = df.rename(columns={"F": "F [cm]", "E": "E [ft]"})
df = df.rename(columns={col: i for i, col in enumerate(df.columns)})
df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Well 001,2025-01-01 00:00:00,0,0.933890,0.854275,0.204862,0.383150,2.153403,16.402814
1,Well 001,2025-01-01 01:00:00,1,0.531154,0.463441,0.009958,0.029668,1.987021,13.683252
2,Well 001,2025-01-01 02:00:00,2,0.002999,0.423221,0.112860,0.606850,2.025374,82.769261
3,Well 001,2025-01-01 03:00:00,3,0.035450,0.937476,0.220853,0.503644,0.108863,16.853892
4,Well 001,2025-01-01 04:00:00,4,0.556272,0.379054,0.311725,0.888229,1.915037,62.724656
...,...,...,...,...,...,...,...,...,...
995,,2025-02-11 11:00:00,995,0.815963,0.768093,0.717846,0.991356,0.248677,15.650860
996,,2025-02-11 12:00:00,996,0.771074,0.735959,0.209810,0.903201,1.816565,46.363178
997,,2025-02-11 13:00:00,997,0.037438,0.210890,0.272688,0.401626,1.073386,90.584551
998,,2025-02-11 14:00:00,998,0.586751,0.594620,0.696842,0.109367,2.827075,9.762039


In [13]:
# save data but keep existing data, meaning that rows with the same 'Entity', 'Timestamp/Date/Time', 'Key' will be not overwritten

start_time = time.time()

api.save_ref_table_data(name, df, skip_existing_data = True)

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.013485201199849446


## Load RefTable Data

In [14]:
start_time = time.time()

df_loaded = api.load_ref_table_data(name)

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

df_loaded

Elapsed time: 0.006603630383809408


Unnamed: 0,Entity,Timestamp,Key [ ],A [ ],B [ ],C [ ],D [ ],E [ft],F [cm]
0,Well 001,2025-01-01T00:00:00,0,0.933890,0.854275,0.204862,0.383150,2.153403,16.402814
1,Well 001,2025-01-01T01:00:00,1,0.531154,0.463441,0.009958,0.029668,1.987021,13.683252
2,Well 001,2025-01-01T02:00:00,2,0.002999,0.423221,0.112860,0.606850,2.025374,82.769261
3,Well 001,2025-01-01T03:00:00,3,0.035450,0.937476,0.220853,0.503644,0.108863,16.853892
4,Well 001,2025-01-01T04:00:00,4,0.556272,0.379054,0.311725,0.888229,1.915037,62.724656
...,...,...,...,...,...,...,...,...,...
995,,2025-02-11T11:00:00,995,0.815963,0.768093,0.717846,0.991356,0.248677,15.650860
996,,2025-02-11T12:00:00,996,0.771074,0.735959,0.209810,0.903201,1.816565,46.363178
997,,2025-02-11T13:00:00,997,0.037438,0.210890,0.272688,0.401626,1.073386,90.584551
998,,2025-02-11T14:00:00,998,0.586751,0.594620,0.696842,0.109367,2.827075,9.762039


In [15]:
# load data using filter options
start_time = time.time()

df_loaded = api.load_ref_table_data(name,
                                    date_start=datetime(2025, 1, 1),
                                    date_end=datetime(2025, 1, 10),
                                    columns=["F [cm]", "E [cm]"],
                                    top=10,
                                    all_cols=False,
                                    where="[Entity] = 'Well 001' AND [Key] >= '20'",
                                   )

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

df_loaded

Elapsed time: 0.005273016293843588


Unnamed: 0,Entity,Timestamp,Key [ ],F [cm],E [cm]
0,Well 001,2025-01-01T20:00:00,20,54.92295,36.282716
1,Well 001,2025-01-01T21:00:00,21,76.746669,59.821165
2,Well 001,2025-01-01T22:00:00,22,28.586216,27.864751
3,Well 001,2025-01-01T23:00:00,23,38.834049,68.256674
4,Well 001,2025-01-02T00:00:00,24,38.344599,8.168921
5,Well 001,2025-01-02T01:00:00,25,50.167389,12.084824
6,Well 001,2025-01-02T02:00:00,26,7.259113,89.10349
7,Well 001,2025-01-02T03:00:00,27,71.471126,8.574323
8,Well 001,2025-01-02T04:00:00,28,55.628957,96.542465
9,Well 001,2025-01-02T06:00:00,30,66.418378,33.244806


## Delete RefTable Data

In [16]:
# delete ref table data using filter options
api.delete_ref_table_data(name,
                          entities=["Well 001", None],
                          date_start=datetime(2025,1,1),
                          date_end=datetime(2025,1,10),
                          drop_null_dates=True,
                          where="[Key] > '20'",
                         )

<Response [200]>

In [17]:
api.load_ref_table_data(name)

Unnamed: 0,Entity,Timestamp,Key [ ],A [ ],B [ ],C [ ],D [ ],E [ft],F [cm]
0,Well 001,2025-01-01T00:00:00,0,0.933890,0.854275,0.204862,0.383150,2.153403,16.402814
1,Well 001,2025-01-01T01:00:00,1,0.531154,0.463441,0.009958,0.029668,1.987021,13.683252
2,Well 001,2025-01-01T02:00:00,2,0.002999,0.423221,0.112860,0.606850,2.025374,82.769261
3,Well 001,2025-01-01T03:00:00,3,0.035450,0.937476,0.220853,0.503644,0.108863,16.853892
4,Well 001,2025-01-01T04:00:00,4,0.556272,0.379054,0.311725,0.888229,1.915037,62.724656
...,...,...,...,...,...,...,...,...,...
720,,2025-02-11T10:00:00,994,0.808316,0.443616,0.294395,0.108750,1.218640,31.996913
721,,2025-02-11T11:00:00,995,0.815963,0.768093,0.717846,0.991356,0.248677,15.650860
722,,2025-02-11T12:00:00,996,0.771074,0.735959,0.209810,0.903201,1.816565,46.363178
723,,2025-02-11T13:00:00,997,0.037438,0.210890,0.272688,0.401626,1.073386,90.584551


In [18]:
# delete ref table data using keys
api.delete_ref_table_data(name,
                          keys=[
                              ["Well 001", datetime(2025,1,1,1), 1],
                              [None, datetime(2025,2,11,14), 998],
                          ],
                         )

<Response [200]>

In [19]:
api.load_ref_table_data(name)

Unnamed: 0,Entity,Timestamp,Key [ ],A [ ],B [ ],C [ ],D [ ],E [ft],F [cm]
0,Well 001,2025-01-01T00:00:00,0,0.933890,0.854275,0.204862,0.383150,2.153403,16.402814
1,Well 001,2025-01-01T02:00:00,2,0.002999,0.423221,0.112860,0.606850,2.025374,82.769261
2,Well 001,2025-01-01T03:00:00,3,0.035450,0.937476,0.220853,0.503644,0.108863,16.853892
3,Well 001,2025-01-01T04:00:00,4,0.556272,0.379054,0.311725,0.888229,1.915037,62.724656
4,Well 001,2025-01-01T05:00:00,5,0.315203,0.853143,0.522263,0.841016,0.105311,91.032671
...,...,...,...,...,...,...,...,...,...
718,,2025-02-11T09:00:00,993,0.457633,0.197568,0.162471,0.620044,0.245033,23.734511
719,,2025-02-11T10:00:00,994,0.808316,0.443616,0.294395,0.108750,1.218640,31.996913
720,,2025-02-11T11:00:00,995,0.815963,0.768093,0.717846,0.991356,0.248677,15.650860
721,,2025-02-11T12:00:00,996,0.771074,0.735959,0.209810,0.903201,1.816565,46.363178


In [20]:
api.delete_ref_table_data(name)

<Response [200]>