In [1]:
import os
import time
import json
import requests
import random
import string
from google.cloud import bigquery
from google.cloud.bigquery import SchemaField
import urllib.parse
import hmac
import hashlib
import base64
import dlt
from dotenv import load_dotenv
from datetime import datetime, timedelta
import gspread
from google.oauth2.service_account import Credentials

In [2]:
# Load environment variables from secrets.env
load_dotenv("secrets.env")

# Authenticate with google cloud
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("GCP_KEY_PATH")

# BigQuery account info
PROJECT_ID = os.getenv("PROJECT_ID")
DATASET_ID = os.getenv("DATASET_ID")
DATASET_ID_STAGING = os.getenv("DATASET_ID_STAGING")

# NetSuite access secrets
NETSUITE_ACCOUNT_ID = os.getenv("NETSUITE_ACCOUNT_ID")
NETSUITE_REALM = os.getenv("NETSUITE_REALM")
NETSUITE_CONSUMER_KEY = os.getenv("NETSUITE_CONSUMER_KEY")
NETSUITE_CONSUMER_SECRET = os.getenv("NETSUITE_CONSUMER_SECRET")
NETSUITE_TOKEN = os.getenv("NETSUITE_TOKEN")
NETSUITE_TOKEN_SECRET = os.getenv("NETSUITE_TOKEN_SECRET")

In [3]:
# Initialize BigQuery client
client = bigquery.Client()

In [4]:
# Get BigQuery table IDs
ACCOUNT_TABLE = f"{PROJECT_ID}.{DATASET_ID}.Account"
ACCOUNT_TABLE_TEST = f"{PROJECT_ID}.{DATASET_ID}.Account_test"
ACCOUNT_TABLE_STAGING = f"{PROJECT_ID}.{DATASET_ID_STAGING}.Account_staging"

# TODO: Add all tables to dict of tables sharing the same keys in TABLE_COLUMNS
# Order is BQ table ID, BQ staging table ID
BIGQUERY_TABLE_IDS = {
    "account": [f"{PROJECT_ID}.{DATASET_ID}.Account", f"{PROJECT_ID}.{DATASET_ID_STAGING}.Account_staging"]
    # "transaction": [f"{PROJECT_ID}.{DATASET_ID}.Transaction", f"{PROJECT_ID}.{DATASET_ID_STAGING}.Transaction_staging"],
}

In [5]:
# NetSuite API Endpoint
HTTP_METHOD = "POST"
BASE_URL = f"https://{NETSUITE_ACCOUNT_ID}.suitetalk.api.netsuite.com/services/rest/query/v1/suiteql"

In [6]:
"""
Input params in the form of:
params = {
    "limit": "5",
    "offset": "0"
}

Input SQL query in the form of:
query_body = {
    "q": "SELECT acctnumber, fullname, generalrate, currency FROM account"
}

"""
def get_netsuite_data(params, query):
    
    # Encode parameters for URL
    encoded_params = urllib.parse.urlencode(params)
    
    # Full URL with encoccquery params, used in post request
    url = f"{BASE_URL}?{encoded_params}"
    
    # Generate OAuth Parameters
    timestamp = str(int(time.time()))
    nonce = ''.join(random.choices(string.ascii_letters + string.digits, k=11))
    
    oauth_params = {
        "oauth_consumer_key": NETSUITE_CONSUMER_KEY,
        "oauth_token": NETSUITE_TOKEN,
        "oauth_signature_method": "HMAC-SHA256",
        "oauth_timestamp": timestamp,
        "oauth_nonce": nonce,
        "oauth_version": "1.0"
    }
    
    # Merge OAuth parameters with query parameters for signature
    all_params = {**oauth_params, **params}
    
    # Sort all parameters alphabetically (important for signature)
    sorted_params = sorted(all_params.items())
    encoded_param_string = "&".join([f"{urllib.parse.quote(k, safe='')}={urllib.parse.quote(v, safe='')}" for k, v in sorted_params])
    
    # Construct the Signature Base String
    base_string = f"{HTTP_METHOD}&{urllib.parse.quote(BASE_URL, safe='')}&{urllib.parse.quote(encoded_param_string, safe='')}"
    
    # Generate the HMAC-SHA256 Signature
    signing_key = f"{NETSUITE_CONSUMER_SECRET}&{NETSUITE_TOKEN_SECRET}"
    hashed = hmac.new(signing_key.encode(), base_string.encode(), hashlib.sha256)
    signature = base64.b64encode(hashed.digest()).decode()
    
    # URL Encode Signature Before Sending
    encoded_signature = urllib.parse.quote(signature, safe="")
    
    # Construct OAuth Header
    auth_header = (
        f'OAuth realm="{NETSUITE_REALM}", '
        f'oauth_consumer_key="{NETSUITE_CONSUMER_KEY}", '
        f'oauth_token="{NETSUITE_TOKEN}", '
        f'oauth_signature_method="HMAC-SHA256", '
        f'oauth_timestamp="{timestamp}", '
        f'oauth_nonce="{nonce}", '
        f'oauth_version="1.0", '
        f'oauth_signature="{encoded_signature}"'
    )
    
    # Headers
    headers = {
        "Authorization": auth_header,
        "Content-Type": "application/json",
        "Prefer": "transient",
        "Cache-Control": "no-cache",
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }
    
    # Send Request
    response = requests.post(url, json=query, headers=headers)

    if response.status_code == 200:
        # return response.json().get("links", []), response.json().get("items", [])
        return response
    # Handle response error
    else:
        raise Exception(f"NetSuite API Error: {response.text}")
    
    # Check the response
    #print("\n🔹 Response Status Code:", response.status_code)
    #print("\n🔹 Response Body:", response.json())

In [7]:
LIMIT = 10

In [8]:
def load_data_to_bq(query):
    #TODO: remove noted lines, and instead stream to bigQuery after each batch
    
    
    timestamp = time.time()
    all_data = [] #REMOVE THIS

    # Initial parameters for request
    offset = 0
    params = {
        "limit": str(LIMIT),
        "offset": str(offset)
    }

    # Stop requesting data when this is false, and table has been fully retreived
    has_more = True
    
    while has_more:   
        
        response = get_netsuite_data(params, query)
        response.raise_for_status() # Make sure request was successful
        response_json = response.json()

        # Reached end of data table
        if response_json.get("hasMore") == False:
            has_more = False
        # Table has more data, need to request new batch
        else:
            offset += LIMIT
            params["offset"] = str(offset)
            

        # Delete 'link' column for each record, which seems to be sent over empty no matter what
        batch = response_json.get("items", [])
        for record in batch:
            if 'links' in record:
                del record['links']
            record['updated_at'] = timestamp
        
        
        all_data.extend(batch) # REMOVE THIS, replace with below line
        # load_data_to_bigquery(DESTINATION_TABLE, batch)
        
        # Temporary testing cutoff
        # if offset >= 20:
        #     break

    return all_data # REMOVE THIS

In [9]:
"""
Inputs:
    Name of table in netsuite: String
    List of netsuite columns to retreive: List[String]

Functionality:
    Get LIMIT rows at a time, continuing until the entire table is retreived
    Filter based on input columns

Returns:
    request object
    
"""
def load_full_netsuite_table(table_name, columns):

    # Build query from table_name and columns
    query = {
        "q": f"SELECT {', '.join([item for item in columns])} FROM {table_name}"
    }
    
    return load_data_to_bq(query)



In [10]:
# Get NetSuite data from the last 2 days, based on lastmodifieddate
# can maybe change this to 1 day
def load_recent_netsuite_data(table_name, columns):

    # Get the current date and format it in MM/DD/YYYY
    current_date = datetime.now().strftime('%m/%d/%Y')

    # Build query with recent data condition
    query = {
        "q": f"""
        SELECT {', '.join([item for item in columns])} 
        FROM {table_name}
        WHERE lastmodifieddate >= TO_DATE('{current_date}', 'MM/DD/YYYY') - 2
        """
    }
    
    return load_data_to_bq(query)

In [11]:
columns = ["acctnumber", "fullname", "generalrate", "currency", "lastmodifieddate", "id"]
# columns = ["*"]
table_name = "transactionLine"
column_names.remove("updated_at")
data = load_full_netsuite_table(table_name, column_names)

NameError: name 'column_names' is not defined

In [12]:
# Insert colleccted data into BigQuery
# This is not a merge, and will send rows regarless of whether they are already in BigQuery
def load_data_to_bigquery(TABLE_ID, data):
    """
    Loads JSON data into BigQuery (staging table).
    
    Args:
        table_id (str): The  table name where data will be inserted.
        data (list): A list of dictionaries representing the records to load.
    """
    
    errors = client.insert_rows_json(TABLE_ID, data)
    if not errors:
        print(f"✅ Successfully loaded {len(data)} rows into BigQuery.")
    else:
        print(f"❌ Failed to load data into BigQuery: {errors}")

In [None]:
load_data_to_bigquery(ACCOUNT_TABLE, data)

In [None]:
# LOAD TO STAGING TABLE

In [None]:
# Columns to merge in
# columns = ["acctnumber", "currency", "lastmodifieddate", "id", "accountsearchdisplayname", "accountsearchdisplaynamecopy", "isinactive"]
all_test_columns = ["fullname", "acctnumber", "generalrate", "currency", "lastmodifieddate", "id", "accountsearchdisplayname", "accountsearchdisplaynamecopy", "isinactive"]

# columns = ["*"]
table_name = "account"
merge_data = load_full_netsuite_table(table_name, all_test_columns)


In [None]:
load_data_to_bigquery(ACCOUNT_TABLE_STAGING, merge_data)

In [13]:
# merge data into BigQuery, so that existing rows are not duplicated
def merge_into_bigquery(target_table, staging_table, unique_key="id"):
    """
    Merges data from a staging table into the target table using BigQuery's MERGE statement.

    Args:
        target_table (str): The full target table name (e.g., "your_project.your_dataset.target_table").
        staging_table (str): The full staging table name (e.g., "your_project.your_dataset.staging_table").
        unique_key (str): The column that uniquely identifies a row (default is "id").
    """
    
    query = f"""
        MERGE `{target_table}` AS T
        USING `{staging_table}` AS S
        ON T.{unique_key} = S.{unique_key}
        
        WHEN MATCHED THEN 
            UPDATE SET
                T.fullname = S.fullname,
                T.acctnumber = S.acctnumber,
                T.generalrate = S.generalrate,
                T.currency = S.currency,
                T.lastmodifieddate = S.lastmodifieddate,
                T.id = S.id,
                T.accountsearchdisplayname = S.accountsearchdisplayname,
                T.accountsearchdisplaynamecopy = S.accountsearchdisplaynamecopy,
                T.isinactive = S.isinactive,
                T.updated_at = S.updated_at
    
        WHEN NOT MATCHED THEN
            INSERT (id, fullname, acctnumber, generalrate, currency, lastmodifieddate, 
                    accountsearchdisplayname, accountsearchdisplaynamecopy, isinactive, updated_at)
            VALUES (S.id, S.fullname, S.acctnumber, S.generalrate, S.currency, S.lastmodifieddate, 
                    S.accountsearchdisplayname, S.accountsearchdisplaynamecopy, S.isinactive, S.updated_at)
    """

    # Run the merge query
    try:
        job = client.query(query)
        job.result()  # Wait for the query to complete
        print(f"✅ Successfully merged data into {target_table}.")
    except Exception as e:
        print(f"❌ Failed to merge data into {target_table}: {e}")



In [14]:
# merge data into BigQuery, so that existing rows are not duplicated
def merge_into_bigquery(target_table, staging_table, columns, unique_key="id"):
    """
    Merges data from a staging table into the target table using BigQuery's MERGE statement.

    Args:
        target_table (str): The full target table name (e.g., "your_project.your_dataset.target_table").
        staging_table (str): The full staging table name (e.g., "your_project.your_dataset.staging_table").
        columns (list): A list of column names to be merged.
        unique_key (str): The column that uniquely identifies a row (default is "id").
    """

    # Add our generated timestamp column to the list of merged columns
    columns.append("updated_at")
    
    # Construct update clause dynamically
    update_clause = ",\n                ".join([f"T.{col} = S.{col}" for col in columns])

    # Construct insert clause dynamically
    insert_columns = ", ".join(columns)
    insert_values = ", ".join([f"S.{col}" for col in columns])

    query = f"""
        MERGE `{target_table}` AS T
        USING `{staging_table}` AS S
        ON T.{unique_key} = S.{unique_key}
        
        WHEN MATCHED THEN 
            UPDATE SET
                {update_clause}
    
        WHEN NOT MATCHED THEN
            INSERT ({insert_columns})
            VALUES ({insert_values})
    """

    # Run the merge query
    try:
        job = client.query(query)
        job.result()  # Wait for the query to complete
        print(f"✅ Successfully merged data into {target_table}.")
    except Exception as e:
        print(f"❌ Failed to merge data into {target_table}: {e}")

In [None]:
# Merge the staging table into the main table
all_test_columns = ["fullname", "acctnumber", "generalrate", "currency", "lastmodifieddate", "id", "accountsearchdisplayname", "accountsearchdisplaynamecopy", "isinactive"]
merge_into_bigquery(ACCOUNT_TABLE, ACCOUNT_TABLE_STAGING, all_test_columns)

In [None]:
# Clear staging table after merge (to avoid duplicate processing)
client.query(f"DELETE FROM `{ACCOUNT_TABLE_STAGING}` WHERE TRUE").result()
print("✅ Staging table cleared.")

In [15]:
# Input full dataset ID, including project ID
def create_dataset(dataset_id):
    dataset = bigquery.Dataset(dataset_id)
    dataset.location = "US"
    client.create_dataset(dataset, exists_ok=True)
    print(f"✅ Dataset {dataset_id} created successfully.")

In [16]:
# Input full table ID, including dataset ID and project ID
# Input table schema
def create_table(table_id, schema):
    table = bigquery.Table(table_id, schema=schema)
    client.create_table(table, exists_ok=True)
    print(f"✅ Table {table_id} recreated successfully.")
    

In [None]:
ACCOUNT_TEST_SCHEMA = [
    bigquery.SchemaField("fullname", "STRING"),
    bigquery.SchemaField("acctnumber", "STRING"),
    bigquery.SchemaField("generalrate", "STRING"),
    bigquery.SchemaField("currency", "STRING"),
    bigquery.SchemaField("lastmodifieddate", "STRING"),
    bigquery.SchemaField("id", "STRING"),
    bigquery.SchemaField("accountsearchdisplayname", "STRING"),
    bigquery.SchemaField("accountsearchdisplaynamecopy", "STRING"),
    bigquery.SchemaField("isinactive", "STRING"),
    bigquery.SchemaField("updated_at", "TIMESTAMP"),
]

In [17]:
def drop_dataset(dataset_id):
    client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True)
    print(f"✅ Dataset {dataset_id} dropped successfully.")

In [18]:
def drop_table(table_id):    
    # Drop table
    client.delete_table(table_id, not_found_ok=True)
    print(f"✅ Table {table_id} dropped successfully.")

In [None]:
drop_table(ACCOUNT_TABLE_TEST)

In [None]:
create_table(ACCOUNT_TABLE_TEST, ACCOUNT_TEST_SCHEMA)

In [None]:
load_data_to_bigquery(ACCOUNT_TABLE_TEST, data)

In [None]:
drop_table(ACCOUNT_TABLE_STAGING)

In [None]:
create_table(ACCOUNT_TABLE_STAGING, ACCOUNT_TEST_SCHEMA)

In [None]:
load_data_to_bigquery(ACCOUNT_TABLE_STAGING, merge_data)

In [None]:
merge_into_bigquery(ACCOUNT_TABLE_TEST, ACCOUNT_TABLE_STAGING, all_test_columns)

In [None]:
# columns = ["acctnumber", "currency", "lastmodifieddate", "id", "accountsearchdisplayname", "accountsearchdisplaynamecopy", "isinactive"]
columns = ["*"]
table_name = "transaction"

load_recent_netsuite_data(table_name, columns)

In [None]:
transactionLine_table_id = f"{PROJECT_ID}.{DATASET_ID}.transactionLine"
transactionLine_table_staging = f"{PROJECT_ID}.{DATASET_ID}.transactionLine_staging"

In [None]:
table = client.get_table(transaction_table_id)
schema = table.schema  # Extract schema

In [None]:
new_table = bigquery.Table(new_table_id, schema=schema)

In [None]:
client.create_table(new_table, exists_ok=True)

In [None]:
column_names = [field.name for field in schema]
print(column_names)

In [None]:
create_table(ACCOUNT_TABLE_STAGING, ACCOUNT_TEST_SCHEMA)

In [None]:
current_time = time.localtime()

In [None]:
print(current_time)

In [None]:
print(schema)

In [25]:
query = {
    "q":"SELECT COUNT(*) FROM entity"
}         
         
params = {
    "offset": "0",
    "limit": "1"
}
count = get_netsuite_data(params, query).json()

print(count)



{'links': [{'rel': 'self', 'href': 'https://3730751-sb1.suitetalk.api.netsuite.com/services/rest/query/v1/suiteql?limit=1&offset=0'}], 'count': 1, 'hasMore': False, 'items': [{'links': [], 'expr1': '66878'}], 'offset': 0, 'totalResults': 1}


In [24]:
query = {
    "q":f"""
    SELECT
      id
    FROM
      transaction
    WHERE
      id = 19835062
    """
}

params = {
    "offset": "0",
    "limit": "10"
}
data =  get_netsuite_data(params, query).json()
print(data)

{'links': [{'rel': 'self', 'href': 'https://3730751-sb1.suitetalk.api.netsuite.com/services/rest/query/v1/suiteql?limit=10&offset=0'}], 'count': 0, 'hasMore': False, 'items': [], 'offset': 0, 'totalResults': 0}


In [None]:
query = {
    "q":f"""
    SELECT uniquekey
    FROM transactionLine
    WHERE uniquekey >= 500000 AND uniquekey < 505000
    """
}

params = {
    "offset": "0",
    "limit": "10"
}
startTime = int(time.time())

data = get_netsuite_data(params, query).json()

endTime = int(time.time())
print("total time: " + str(endTime - startTime))

print(data)

In [None]:
moore_giles_acct_table = f"{PROJECT_ID}.MooreGiles2.account"

account_table = client.get_table(moore_giles_acct_table)

account_schema = account_table.schema
print(account_schema)

In [None]:
ACCOUNT_TABLE = f"{PROJECT_ID}.{DATASET_ID}.Account"
print(ACCOUNT_TABLE)

In [None]:
drop_table(ACCOUNT_TABLE)

In [None]:
ACCOUNT_SCHEMA = [
    SchemaField('id', 'INTEGER', 'NULLABLE'),
    SchemaField('accountsearchdisplayname', 'STRING', 'NULLABLE'),
    SchemaField('accountsearchdisplaynamecopy', 'STRING', 'NULLABLE'),
    SchemaField('acctnumber', 'STRING', 'NULLABLE'),
    SchemaField('accttype', 'STRING', 'NULLABLE'),
    SchemaField('cashflowrate', 'STRING', 'NULLABLE'),
    SchemaField('category1099misc', 'INTEGER', 'NULLABLE'),
    SchemaField('currency', 'INTEGER', 'NULLABLE'),
    SchemaField('deferralacct', 'INTEGER', 'NULLABLE'),
    SchemaField('description', 'STRING', 'NULLABLE'),
    SchemaField('displaynamewithhierarchy', 'STRING', 'NULLABLE'),
    SchemaField('eliminate', 'BOOLEAN', 'NULLABLE'),
    SchemaField('externalid', 'STRING', 'NULLABLE'),
    SchemaField('fullname', 'STRING', 'NULLABLE'),
    SchemaField('generalrate', 'STRING', 'NULLABLE'),
    SchemaField('includechildren', 'BOOLEAN', 'NULLABLE'),
    SchemaField('inventory', 'BOOLEAN', 'NULLABLE'),
    SchemaField('isinactive', 'BOOLEAN', 'NULLABLE'),
    SchemaField('issummary', 'BOOLEAN', 'NULLABLE'),
    SchemaField('lastmodifieddate', 'DATE', 'NULLABLE'),
    SchemaField('parent', 'INTEGER', 'NULLABLE'),
    SchemaField('reconcilewithmatching', 'BOOLEAN', 'NULLABLE'),
    SchemaField('revalue', 'BOOLEAN', 'NULLABLE'),
    SchemaField('sspecacct', 'STRING', 'NULLABLE'),
    SchemaField('subsidiary', 'STRING', 'NULLABLE'),
    SchemaField('custrecord_acct_bank_account_number', 'STRING', 'NULLABLE'),
    SchemaField('custrecord_mg_default_department', 'INTEGER', 'NULLABLE'),
    SchemaField('custrecord_mg_default_division', 'STRING', 'NULLABLE'),
    SchemaField('custrecord_fam_account_showinfixedasset', 'STRING', 'NULLABLE'),
    SchemaField('updated_at', 'TIMESTAMP', 'NULLABLE')
]

In [None]:
create_table(ACCOUNT_TABLE, ACCOUNT_SCHEMA)

In [19]:
def get_max_unique_key(ns_table):
    """
    Returns the maximum uniqueKey value from a NetSuite table.
    """

    params = {
        "limit": "1"
    }
    query = {
        "q": f"SELECT uniqueKey FROM {ns_table} ORDER BY uniqueKey DESC"
    }

    # We just need the first row since it's sorted in descending order
    results = get_netsuite_data(params, query).json().get("items", [])
    if results and len(results) > 0:
        return int(results[0]['uniquekey'])

    raise Exception(f"❌ Could not fetch max uniqueKey from {ns_table}")

In [None]:
print(str(get_max_unique_key("transactionLine")))

In [None]:
SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
creds = Credentials.from_service_account_file(os.getenv("GCP_KEY_PATH"), scopes=SCOPES)
gspread_client = gspread.authorize(creds)

In [None]:
spreadsheet = gspread_client.open_by_key("1J05aKxbxBhgV8l3Tj4NxlTNHhkyqmZgNlohgkB8a7KU")
worksheet = spreadsheet.get_worksheet(0)
rows = worksheet.get_all_records()
print(rows)


In [20]:
# Method to grab rows from google sheets
# Grab data from one sheet? or all sheets?
# Perhaps this one grabs from one, and combines with below method
# Need to find out how to index through gsheet without hitting end
# look into google sheets API methods






def create_schema_from_sheet(gsheet_rows):
    schema = [
        bigquery.SchemaField(
            field['Column Name'],
            field['Data Type'],
            field['Nullable']
        )
        for field in gsheet_rows
    ]

    return schema

In [None]:

print(create_schema_from_sheet(rows))

In [None]:
sheets = spreadsheet.worksheets()
print(sheets)

In [None]:
table_id = f"{PROJECT_ID}.MooreGiles2.AccountingPeriod"

table = client.get_table(table_id)

schema = table.schema
print(schema)

In [None]:
table_id = f"{PROJECT_ID}.MooreGiles2.CUSTOMRECORD_360_COMMISSION_RULE"

table = client.get_table(table_id)

schema = table.schema
print(schema)

In [None]:
table_id = f"{PROJECT_ID}.MooreGiles2.CUSTOMRECORD_360_COMMISSION_TRACKING"

table = client.get_table(table_id)

schema = table.schema
print(schema)

In [None]:
table_id = f"{PROJECT_ID}.MooreGiles2.transactionSalesTeam"

table = client.get_table(table_id)

schema = table.schema
print(schema)

In [None]:
query = {
    "q":f"""
    SELECT *
    FROM Account
    """
}

params = {
    "offset": "100",
    "limit": "10"
}

data = get_netsuite_data(params, query).json().get('items')

print(data)

In [21]:
import re
from typing import List, Tuple


def extract_field_names_and_types(schema_fields: List[SchemaField]) :
    """
    Returns two lists:
    - field_names: list of field names
    - field_types: list of cleaned types (STRING(n) → STRING, NUMERIC(x, y) → FLOAT, etc.)
    Skips fields with None or empty types.
    """
    field_names = []
    field_types = []

    for field in schema_fields:
        name = field.name
        type_str = field.field_type

        if not type_str or type_str in ("()", "None"):
            continue

        # Normalize types
        if re.match(r'^STRING\(\d+\)$', type_str):
            converted_type = "STRING"
        elif re.match(r'^NUMERIC\(\d+,\s*\d+\)$', type_str):
            converted_type = "FLOAT"
        else:
            converted_type = type_str

        field_names.append(name)
        field_types.append(converted_type)

    for name in field_names:
        print(name)
    
    print()
    print()
    for t in field_types:
        print(t)



In [None]:
schema = [SchemaField('contribution', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('employee', 'STRING(2000)', 'NULLABLE', None, None, (), None), SchemaField('isprimary', 'BOOLEAN', 'NULLABLE', None, None, (), None), SchemaField('salesrole', 'STRING(2000)', 'NULLABLE', None, None, (), None), SchemaField('id', 'STRING(2000)', 'NULLABLE', None, None, (), None), SchemaField('transaction', 'INTEGER', 'NULLABLE', None, None, (), None)]
print(extract_field_names_and_types(schema))