In [1]:
import petrovisor as pv
from petrovisor import ItemType, RefTableColumnType

In [2]:
import os
import time
import psutil
import pandas as pd
import numpy as np

# Authorization

In [3]:
# url
# discovery_url = r'https://identity.eu1.petrovisor.com'
discovery_url = r'https://identity.us1.petrovisor.com'

# workspace
workspace = 'Workspace Name'

# generate access key (only for the first time, comment the line afterwards)
key = pv.PetroVisor.generate_credentials_key()
# print(f"key : '{key}'")
# save key for later to reuse
# key = ''

api = pv.PetroVisor(workspace = workspace, discovery_url = discovery_url, key = key)
print(api.Api)

https://api-latest.eu1.petrovisor.com


# PetroVisor API Swagger

https://api.eu1.petrovisor.com/index.html

https://api.us1.petrovisor.com/index.html

# Examples: RefTables

#### Create DataFrame

In [4]:
# get dataframe size
def dataframe_size(df):
    return df.memory_usage(deep=True).sum()/1024/1024 # MB

# get memory size used by process
def process_size():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss/1024/1024 # MB

In [5]:
num_rows = int(100)
# num_rows = int(1000)
# num_rows = int(1e6)
columns = list('ABCDEF')
df = pd.DataFrame(np.random.uniform(0,1,size=(num_rows, len(columns))), columns=columns)
df_size = dataframe_size(df)
memory_size = process_size()

df['Entity'] = None
df.loc[:num_rows//2, 'Entity'] = 'Well 001'
df['Time'] = None
df['Key'] = [str(i) for i in range(0,len(df))]

df = df[['Entity','Time','Key',*columns]]
df['Key'] = df['Key'].values.astype(str)

print(f"Memory used: {memory_size} MB")
print(f"DataFrame Shape: {df.shape}")
df.head()

Memory used: 115.765625 MB
DataFrame Shape: (100, 9)


Unnamed: 0,Entity,Time,Key,A,B,C,D,E,F
0,Well 001,,0,0.865291,0.101145,0.024983,0.321104,0.602491,0.950114
1,Well 001,,1,0.644955,0.726658,0.776576,0.839482,0.599664,0.371566
2,Well 001,,2,0.12902,0.524276,0.920349,0.211082,0.390712,0.553114
3,Well 001,,3,0.953477,0.551805,0.827254,0.922899,0.821126,0.509512
4,Well 001,,4,0.633325,0.691348,0.75683,0.127926,0.962552,0.423052


## Add RefTable

In [6]:
name = 'Py Test New Table'

In [7]:
# delete if exists
api.delete_ref_table(name)

<Response [200]>

In [8]:
# add new table

start_time = time.time()

api.add_ref_table(name, df, description = 'Testing API from Python')

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.017015397548675537


In [9]:
# add data to already existing table

start_time = time.time()

api.add_ref_table(name, df, description = 'Testing API from Python')

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.013286117712656658


In [10]:
# get data info
api.get_ref_table_data_info(name)

{'Key': {'ColumnType': 'String', 'UnitName': ' ', 'Name': 'Key'},
 'Values': [{'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'A'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'B'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'C'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'D'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'E'},
  {'ColumnType': 'Numeric', 'UnitName': ' ', 'Name': 'F'}],
 'Modified': '2024-02-14T15:26:51.2950096Z',
 'Created': '2024-02-14T15:26:51.2950097Z',
 'Description': 'Testing API from Python',
 'Labels': [],
 'Name': 'Py Test New Table'}

## Save RefTable Data

In [11]:
# save data and overwrite existing data, meaning that rows with the same 'Entity', 'Timestamp/Date/Time', 'Key' will be overwritten

start_time = time.time()

api.save_ref_table_data(name, df, skip_existing_data = False)

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.005513131618499756


In [12]:
# save data but keep existing data, meaning that rows with the same 'Entity', 'Timestamp/Date/Time', 'Key' will be not overwritten

start_time = time.time()

api.save_ref_table_data(name, df, skip_existing_data = True)

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

Elapsed time: 0.0059435486793518065


## Load RefTable Data

In [13]:
start_time = time.time()

df = api.load_ref_table_data(name)

end_time = time.time()
print(f"Elapsed time: {(end_time-start_time)/60}")

df

Elapsed time: 0.007588982582092285


Unnamed: 0,Entity,Date,Key [ ],A [ ],B [ ],C [ ],D [ ],E [ ],F [ ]
0,Well 001,,0,0.865291,0.101145,0.024983,0.321104,0.602491,0.950114
1,Well 001,,1,0.644955,0.726658,0.776576,0.839482,0.599664,0.371566
2,Well 001,,2,0.129020,0.524276,0.920349,0.211082,0.390712,0.553114
3,Well 001,,3,0.953477,0.551805,0.827254,0.922899,0.821126,0.509512
4,Well 001,,4,0.633325,0.691348,0.756830,0.127926,0.962552,0.423052
...,...,...,...,...,...,...,...,...,...
95,,,95,0.037831,0.051062,0.898218,0.803915,0.668444,0.136398
96,,,96,0.093827,0.305742,0.737167,0.251420,0.955462,0.573619
97,,,97,0.924104,0.670228,0.860875,0.491014,0.339187,0.561251
98,,,98,0.053995,0.568543,0.876069,0.796048,0.725472,0.109247


## Delete RefTable Data

In [14]:
api.delete_ref_table_data(name)

<Response [200]>