In [1]:
import os
import time
import json
import requests
import random
import string
from google.cloud import bigquery
import urllib.parse
import hmac
import hashlib
import base64
import dlt
from dotenv import load_dotenv

In [7]:
# Load environment variables from secrets.env
load_dotenv("secrets.env")

# Authenticate with google cloud
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("GCP_KEY_PATH")

# BigQuery account info
PROJECT_ID = os.getenv("PROJECT_ID")
DATASET_ID = os.getenv("DATASET_ID")

# NetSuite access secrets
NETSUITE_ACCOUNT_ID = os.getenv("NETSUITE_ACCOUNT_ID")
NETSUITE_REALM = os.getenv("NETSUITE_REALM")
NETSUITE_CONSUMER_KEY = os.getenv("NETSUITE_CONSUMER_KEY")
NETSUITE_CONSUMER_SECRET = os.getenv("NETSUITE_CONSUMER_SECRET")
NETSUITE_TOKEN = os.getenv("NETSUITE_TOKEN")
NETSUITE_TOKEN_SECRET = os.getenv("NETSUITE_TOKEN_SECRET")

In [8]:
# Initialize BigQuery client
client = bigquery.Client()

In [9]:
# Get BigQuery table IDs
ACCOUNT_TABLE = f"{PROJECT_ID}.{DATASET_ID}.Account"

# TODO: Add the other tables here

In [10]:
# NetSuite API Endpoint
HTTP_METHOD = "POST"
BASE_URL = f"https://{NETSUITE_ACCOUNT_ID}.suitetalk.api.netsuite.com/services/rest/query/v1/suiteql"

In [11]:
"""
Input params in the form of:
params = {
    "limit": "5",
    "offset": "0"
}

Input SQL query in the form of:
query_body = {
    "q": "SELECT acctnumber, fullname, generalrate, currency FROM account"
}

"""
def get_netsuite_data(params, query):
    
    # Encode parameters for URL
    encoded_params = urllib.parse.urlencode(params)
    
    # Full URL with encoccquery params, used in post request
    url = f"{BASE_URL}?{encoded_params}"
    
    # Generate OAuth Parameters
    timestamp = str(int(time.time()))
    nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=11))
    
    oauth_params = {
        "oauth_consumer_key": NETSUITE_CONSUMER_KEY,
        "oauth_token": NETSUITE_TOKEN,
        "oauth_signature_method": "HMAC-SHA256",
        "oauth_timestamp": timestamp,
        "oauth_nonce": nonce,
        "oauth_version": "1.0"
    }
    
    # Merge OAuth parameters with query parameters for signature
    all_params = {**oauth_params, **params}
    
    # Sort all parameters alphabetically (important for signature)
    sorted_params = sorted(all_params.items())
    encoded_param_string = "&".join([f"{urllib.parse.quote(k, safe='')}={urllib.parse.quote(v, safe='')}" for k, v in sorted_params])
    
    # Construct the Signature Base String
    base_string = f"{HTTP_METHOD}&{urllib.parse.quote(BASE_URL, safe='')}&{urllib.parse.quote(encoded_param_string, safe='')}"
    
    # Generate the HMAC-SHA256 Signature
    signing_key = f"{NETSUITE_CONSUMER_SECRET}&{NETSUITE_TOKEN_SECRET}"
    hashed = hmac.new(signing_key.encode(), base_string.encode(), hashlib.sha256)
    signature = base64.b64encode(hashed.digest()).decode()
    
    # URL Encode Signature Before Sending
    encoded_signature = urllib.parse.quote(signature, safe="")
    
    # Construct OAuth Header
    auth_header = (
        f'OAuth realm="{NETSUITE_REALM}", '
        f'oauth_consumer_key="{NETSUITE_CONSUMER_KEY}", '
        f'oauth_token="{NETSUITE_TOKEN}", '
        f'oauth_signature_method="HMAC-SHA256", '
        f'oauth_timestamp="{timestamp}", '
        f'oauth_nonce="{nonce}", '
        f'oauth_version="1.0", '
        f'oauth_signature="{encoded_signature}"'
    )
    
    # Headers
    headers = {
        "Authorization": auth_header,
        "Content-Type": "application/json",
        "Prefer": "transient",
        "Cache-Control": "no-cache",
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }
    
    # Send Request
    response = requests.post(url, json=query, headers=headers)

    if response.status_code == 200:
        # return response.json().get("links", []), response.json().get("items", [])
        return response
    # Handle response error
    else:
        raise Exception(f"NetSuite API Error: {response.text}")
    
    # Check the response
    #print("\n🔹 Response Status Code:", response.status_code)
    #print("\n🔹 Response Body:", response.json())

In [12]:
LIMIT = 20

In [18]:
"""
Inputs:
    Name of table in netsuite: String
    List of columns to retreive: List

Functionality:
    Get 1000 rows at a time, continuing until the entire table is retreived
    Filter based on input columns

Returns:
    DLT object thing?

Notes:
    Need to remove links field from all returns
    
"""
def load_full_netsuite_table(table_name, columns):

    timestamp = time.time()
    all_data = []


    """
    # DLT Pipeline Setup
    pipeline_name = f'{table_name}_pipeline'
    dataset_name = f'{table_name}_data'

    pipeline = dlt.pipeline(
        pipeline_name=pipeline_name,
        destination="bigquery",
        dataset_name=dataset_name,
    )
    """

    # Initial parameters for request
    offset = 0
    params = {
        "limit": str(LIMIT),
        "offset": str(offset)
    }
    

    # Build query from table_name and columns
    query_body = {
        "q": f"SELECT {', '.join([item for item in columns])} FROM {table_name}"
    }


    # Stop requesting data when this is false, and table has been fully retreived
    has_more = True
    
    while has_more:   
        
        response = get_netsuite_data(params, query_body)
        response.raise_for_status() # Make sure request was successful
        response_json = response.json()

        # Reached end of data table
        if response_json.get("hasMore") == False:
            has_more = False
        # Table has more data, need to request new batch
        else:
            offset += LIMIT
            params["offset"] = str(offset)
            

        # Delete 'link' column for each record, which seems to be sent over empty no matter what
        batch = response_json.get("items", [])
        for record in batch:
            if 'links' in record:
                del record['links']
            record['updated_at'] = timestamp

        
        all_data.extend(batch)

        # Temporary testing cutoff
        if offset >= 120:
            break

    return all_data

    

In [19]:
columns = ["acctnumber", "fullname", "generalrate", "currency", "lastmodifieddate"]
# columns = ["*"]
table_name = "account"
data = load_full_netsuite_table(table_name, columns)

In [22]:
print(data)

[{'acctnumber': '1', 'fullname': 'Cumulative Translation Adjustment-Elimination', 'generalrate': 'HISTORICAL', 'lastmodifieddate': '2/22/2018', 'updated_at': 1743015134.073619}, {'acctnumber': '10000', 'currency': '1', 'fullname': 'Checking - Wells Fargo', 'generalrate': 'CURRENT', 'lastmodifieddate': '12/12/2022', 'updated_at': 1743015134.073619}, {'acctnumber': '10100', 'fullname': 'Undeposited Funds', 'generalrate': 'CURRENT', 'lastmodifieddate': '9/25/2024', 'updated_at': 1743015134.073619}, {'acctnumber': '10501', 'currency': '1', 'fullname': 'Checking - BOJ (OLD ACCT)', 'generalrate': 'CURRENT', 'lastmodifieddate': '8/19/2022', 'updated_at': 1743015134.073619}, {'acctnumber': '10502', 'currency': '1', 'fullname': 'Checking - BOTJ', 'generalrate': 'CURRENT', 'lastmodifieddate': '9/25/2024', 'updated_at': 1743015134.073619}, {'acctnumber': '10503', 'currency': '1', 'fullname': 'Sweep - BOTJ (ICS 2616)', 'generalrate': 'CURRENT', 'lastmodifieddate': '10/6/2023', 'updated_at': 174301

In [20]:
# Insert colleccted data into BigQuery
# This is not a merge, and will send rows regarless of whether they are already in BigQuery
def load_data_to_bigquery(TABLE_ID, data):
    errors = client.insert_rows_json(TABLE_ID, data)
    if not errors:
        print(f"✅ Successfully loaded {len(data)} rows into BigQuery.")
    else:
        print(f"❌ Failed to load data into BigQuery: {errors}")

In [21]:
load_data_to_bigquery(ACCOUNT_TABLE, data)

✅ Successfully loaded 120 rows into BigQuery.
