## Get data from Arboreals api:s

In [2]:
import requests as rq
import pandas as pd
import json as json
import numpy as np
import os.path
import time
import tqdm

In [3]:
## Settings ##

# Insert your own API_KEY it should look like something like this:
# API_KEY = "Key 23412E3......"

# Mich key

API_KEY = "Key XXX"

In [4]:
# Checks if you have an previous export of data - will make it possible to only fetch new data
# Makes the request faster. 

if os.path.isfile('Samples.csv'):
    previousSamples_df=pd.read_csv('Samples.csv', sep=',', decimal='.')
    previous_samples_array = previousSamples_df[['sample_id']].values


In [5]:
def getSamples():
    api_endpoint = "https://api.arboreal.se/getFilteredSamples"

    headers = {
        'Accept': 'application/json',
        "Authorization" : API_KEY
    }
    response = rq.get(
        api_endpoint,
        headers = headers
    )
    response.json()
    samples_json = response.json()
    samples_json_str = json.dumps(samples_json)
    if 'error' in samples_json_str:
        print(samples_json_str)
    return samples_json_str



In [6]:
def getTreesAndStemsAndCalculationsAndHeights(idet):
    
    # Common API call
    api_endpoint = "https://api.arboreal.se/getSampleById"
    parameters = {"id": idet}
    headers = {
        'Accept': 'application/json',
        "Authorization": API_KEY
    }

    response = rq.get(api_endpoint, headers=headers, params=parameters)



    sample_json = response.json()
    trees = sample_json[0]['trees'][0]
    
    calculations = sample_json[0]['calculations']
    calculations_df = pd.DataFrame(calculations)
    desired_order = ['id', 'latin_name','english_name','sample_id','specie_id','basal_area', 'diameter_dgv', 'diameter_aritm', 'trunk_per_ha', 'height', 'volume_m3sk', 'volume_m3fub', 'avg_trunk_volume','age']

    calculations_df =  calculations_df.reindex(columns=desired_order)
    calculations_df = calculations_df.rename(columns={'trunk_per_ha': 'trees_per_ha'})
    
    heightAgeGrowth = sample_json[0]['heightAgeGrowth']
    heightAgeGrowth_df = pd.DataFrame(heightAgeGrowth)
    

    # Process Trees
    trees_df = pd.json_normalize(trees)
    desired_columns = ['tree_id', 'sample_id', 'unix_time', 'specie_id','latin_name','english_name', 'x', 'y',
                       'z', 'diameter', 'height', 'line_number', 'number_in_line', 'damaged_detected',
                       'gps_longitude', 'gps_latitude', 'gps_horizontal_accuracy', 'bearing_to_north', 'comment']
    trees_df = trees_df.reindex(columns=desired_columns)

    # Process Stems
    stems_df = pd.json_normalize(trees, record_path=['stems'])
    stems_df = stems_df.rename(columns={'id': 'stem_id', 'name': 'stem_name'})
    desired_order = ['stem_id', 'tree_id', 'sample_id', 'diameter', 'x', 'y', 'z', 'stem_name', 'quality_code']
    for col in desired_order:
        if col not in stems_df.columns:
            stems_df[col] = pd.Series([None] * len(stems_df), index=stems_df.index, dtype=object)
    stems_df.loc[stems_df['diameter'].notna(), 'diameter'] *= 100
    
    heightAgeGrowth_df = heightAgeGrowth_df.rename(columns={'id': 'height_id'}) 
    heightAgeGrowth_df = heightAgeGrowth_df.rename(columns={'name': 'stem_name'})
     # Ensure all desired columns are present
    desired_columns = ['height_id','latin_name','english_name','tree_id','stem_name','sample_id','time_stamp','marking_x', 'marking_y','marking_z',
          'diameter', 'growth', 'age', 'specie_id', 'height','comment'
    ]
    heightAgeGrowth_df = heightAgeGrowth_df.reindex(columns=desired_columns)
    heightAgeGrowth_df.loc[heightAgeGrowth_df['diameter'].notna(), 'diameter'] *= 100
    
    # Convert tree_id to integer

    heightAgeGrowth_df['tree_id'] = heightAgeGrowth_df['tree_id'].fillna(0).astype(int)
    heightAgeGrowth_df['age'] = heightAgeGrowth_df['age'].fillna(0).astype(int)
 

    # Return all DataFrames
    return trees_df, stems_df,calculations_df,heightAgeGrowth_df


In [7]:
# Fetch sample plots. Should just take a few seconds. 158 Plots takes 0.38 seconds

start_time = time.time()
samples_json_str = getSamples()
samples_df = pd.json_normalize(json.loads(samples_json_str)) 


desired_columns = ['sample_id', 'external_id','external_name', 'name','unix_time', 'area','sample_radius','measure_method_type', 'longitude','latitude','altitude', 'gps_stamp_horizontal_accuracy', 'center_x','center_y','center_z',   
     'tracking_not_available','tracking_limited','tracking_normal','latest_sync', 'heading', 'comment', 'customer_id', 'app_version', 'model'
]

# Subset the DataFrame to include only the desired columns
samples_df = samples_df.reindex(columns=desired_columns)

samples_array = samples_df[['sample_id']].values
samples_array = samples_array.astype(int)


print(f"Number of samples in samples_array: {len(samples_array)}")


try:
    print(f"Number of samples in previous_samples_array: {len(previous_samples_array)}")
    if len(previous_samples_array) > 0:
        dims = np.maximum(previous_samples_array.max(0), samples_array.max(0)) + 1
        samples_array = samples_array[~np.in1d(np.ravel_multi_index(samples_array.T, dims), np.ravel_multi_index(previous_samples_array.T, dims))]
except NameError:
    print("previous_samples_array does not exist.")
    # If previous_samples_array does not exist, you might want to initialize it or handle it as needed
    # For example, if it should be empty:
    previous_samples_array = np.array([])  # or however you would like to initialize

print(f"Number of samples to fetch: {len(samples_array)}")

#48995


# Record the end time
end_time = time.time()

# Calculate the elapsed time in seconds
elapsed_time = end_time - start_time

# Print the elapsed time
print(f"Elapsed time: {elapsed_time:.2f} seconds")
samples_df


Number of samples in samples_array: 6
previous_samples_array does not exist.
Number of samples to fetch: 6
Elapsed time: 0.13 seconds


Unnamed: 0,sample_id,external_id,external_name,name,unix_time,area,sample_radius,measure_method_type,longitude,latitude,...,center_z,tracking_not_available,tracking_limited,tracking_normal,latest_sync,heading,comment,customer_id,app_version,model
0,168184,0,Py 4,SP2025-02-11_14_08_12,1739279292,397.6078,11.25,1,21.21179,66.013886,...,1.131008,0,58,10758,1,0,,13884,4.01,iPhone 16 Pro
1,168179,0,Py 3,SP2025-02-11_13_29_07,1739276947,397.6078,11.25,1,21.212615,66.013244,...,0.125123,0,0,6539,1,0,Bra föryngring mest gran,13884,4.01,iPhone 16 Pro
2,168176,0,py 2,SP2025-02-11_13_06_50,1739275610,397.6078,11.25,1,21.212978,66.013413,...,2.064107,0,0,5634,1,0,,13884,4.01,iPhone 16 Pro
3,168172,0,Py 1 nordöstra hörnet,SP2025-02-11_12_27_40,1739273260,397.6078,11.25,1,21.213282,66.01339,...,-1.490507,0,0,7206,1,0,PY 1 nordöstra hörn,13884,4.01,iPhone 16 Pro
4,168064,0,,SP2025-02-09_15_14_32,1739110472,99.99999,5.641896,1,20.019893,66.654849,...,1.75056,0,0,6081,1,0,Rostets,13884,3.57,iPhone 16 Pro
5,168063,0,,SP2025-02-07_21_34_02,1738960441,99.99999,5.641896,1,20.014187,66.655103,...,-1.930297,0,0,5490,1,0,,13884,3.57,iPhone 16 Pro


In [8]:
# Fetches all unfetched sampleplots and saves it to four different Dataframes that later will be exported as csv-files

# This will take some time if you are fetching many plots. It takes about three seconds per plot to run

from tqdm import tqdm  # Importing tqdm correctly

start_time = time.time()

# Check if samples_array is empty
if samples_array.size == 0:
    print("Warning: samples_array is empty. No data is fetched.")
else:
    # Fetch data for each sample
    # data = [getTreesAndStemsAndCalculationsAndHeights(x) for x in samples_array]
    data = [getTreesAndStemsAndCalculationsAndHeights(x) for x in tqdm(samples_array, desc="Fetching Data")]


    # Separate trees and stems data
    trees_data = [item[0] for item in data]  # First element of each tuple
    stems_data = [item[1] for item in data]  # Second element of each tuple
    calculations_data = [item[2] for item in data]
    height_data = [item[3] for item in data]

    # Concatenate DataFrames
    trees_DF = pd.concat(trees_data, ignore_index=True)
    stems_DF = pd.concat(stems_data, ignore_index=True)
    calculations_DF = pd.concat(calculations_data, ignore_index=True)
    heights_DF = pd.concat(height_data, ignore_index=True)

    # Continue with the previous checks and conversions for trees
    for col in ['tree_id', 'sample_id', 'specie_id']:
        if trees_DF[col].isnull().any():
            raise ValueError(f"Column {col} contains NaN values, which can't be converted to integers.")
        trees_DF[col] = trees_DF[col].astype(int)

   
    # Proceed with checks and conversions only if the DataFrame has data
    for col in ['tree_id', 'stem_id']:
        if stems_DF[col].isnull().any():
            raise ValueError(f"Column {col} contains NaN values, which can't be converted to integers.")
        stems_DF[col] = stems_DF[col].astype(int)
   
    
    for col in ['sample_id', 'specie_id']:
        if heights_DF[col].isnull().any():
            raise ValueError(f"Column {col} contains NaN values, which can't be converted to integers.")
        heights_DF[col] = heights_DF[col].astype(int)
        
    for col in ['sample_id', 'specie_id']:
        if calculations_DF[col].isnull().any():
            raise ValueError(f"Column {col} contains NaN values, which can't be converted to integers.")
        calculations_DF[col] = calculations_DF[col].astype(int)

        # Record the end time
end_time = time.time()

# Calculate the elapsed time in seconds
elapsed_time = end_time - start_time

# Print the elapsed time
print(f"Elapsed time: {elapsed_time:.2f} seconds")

if samples_array.size > 0:
    calculations_DF


Fetching Data: 100%|██████████| 6/6 [00:01<00:00,  5.14it/s]

Elapsed time: 1.19 seconds





In [9]:
# Export the data to CSV -files. Append requires a newer version of Python, like 

samples_df.to_csv('Samples.csv')
print(f"All sample plots: {samples_df.shape[0]}")

if len(previous_samples_array) > 0:
    
    if 'heights_DF' in locals() and os.path.exists('Heights.csv'):
        previous_heights_df = pd.read_csv('Heights.csv', sep=',', decimal='.')
        print(f"New heights fetched: {heights_DF.shape[0]}")
        heights_DF = pd.concat([heights_DF, previous_heights_df], ignore_index=True)
        columns_to_remove = [col for col in heights_DF.columns if 'Unnamed' in col]
        heights_DF = heights_DF.drop(columns=columns_to_remove)
        heights_DF.to_csv('Heights.csv', index=False) 
      
    
    if 'trees_DF' in locals() and os.path.exists('Trees.csv'):
        previous_trees_df =pd.read_csv('Trees.csv', sep=',', decimal='.')
        print(f"New trees fetched: {trees_DF.shape[0]}")
        trees_DF = pd.concat([trees_DF, previous_trees_df], ignore_index=True)
        columns_to_remove = [col for col in trees_DF.columns if 'Unnamed' in col]
        trees_DF = trees_DF.drop(columns=columns_to_remove)
        trees_DF.to_csv('Trees.csv')
        print(f"Saved total trees: {trees_DF.shape[0]}")
    
    if 'stems_DF' in locals() and os.path.exists('Stems.csv'):
        previous_stems_df = pd.read_csv('Stems.csv', sep=',', decimal='.')
        print(f"New stems fetched: {stems_DF.shape[0]}")
        stems_DF = pd.concat([stems_DF, previous_stems_df], ignore_index=True)
        columns_to_remove = [col for col in stems_DF.columns if 'Unnamed' in col]
        stems_DF = stems_DF.drop(columns=columns_to_remove)
        stems_DF.to_csv('Stems.csv')
        print(f"Saved total stems: {stems_DF.shape[0]}")
    
    if 'calculations_DF' in locals() and os.path.exists('Calculations.csv'):
        previous_calc_df = pd.read_csv('Calculations.csv', sep=',', decimal='.')
        print(f"New stems fetched: {calculations_DF.shape[0]}")
        calculations_DF = pd.concat([calculations_DF, previous_calculations_df], ignore_index=True)
        columns_to_remove = [col for col in calculations_DF.columns if 'Unnamed' in col]
        calculations_DF = calculations_DF.drop(columns=columns_to_remove)
        calculations_DF.to_csv('Calculations.csv')
        print(f"Saved total Calculations: {calculations_DF.shape[0]}")
        
    if not os.path.isfile('Stems.csv'):
        stems_DF.to_csv('Stems.csv')
        print(f"New total stems saved because missing: {stems_DF.shape[0]}")

    if not os.path.isfile('Calculations.csv'):
        calculations_DF.to_csv('Calculations.csv')
        print(f"New total Calculations saved  because missing: {calculations_DF.shape[0]}")
        
    if not os.path.isfile('Heights.csv'):
        heights_DF.to_csv('Heights.csv')
        print(f"New total Heights saved because missing: {heights_DF.shape[0]}")

    if not os.path.isfile('Trees.csv'):
        trees_DF.to_csv('Trees.csv')
        print(f"New total Trees saved because missing: {trees_DF.shape[0]}")
  
        
        
else:
    heights_DF.to_csv('Heights.csv')
    trees_DF.to_csv('Trees.csv')
    stems_DF.to_csv('Stems.csv')
    calculations_DF.to_csv('Calculations.csv')
    print(f"New total stems saved: {stems_DF.shape[0]}")
    print(f"New total trees saved: {trees_DF.shape[0]}")
    print(f"New total heights saved: {heights_DF.shape[0]}")
    print(f"New total Calculations saved: {calculations_DF.shape[0]}")
    


  



    



All sample plots: 6
New total stems saved: 75
New total trees saved: 75
New total heights saved: 15
New total Calculations saved: 14
