In [None]:
#gcloud auth application-default login

In [1]:
from google.cloud import bigquery
import base64
import json
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting
from google.cloud import storage

In [2]:
bucket_name = 'velexa_demo'
csv_path = 'gs://velexa_demo/Trades.csv'
table_id = 'vertexai-demo-420409.Velexa.trades'
schema = [
    bigquery.SchemaField('Account ID', 'STRING'),
    bigquery.SchemaField('Instrument', 'STRING'),
    bigquery.SchemaField('Quantity', 'INTEGER'),
    bigquery.SchemaField('Date - BuySell', 'DATE'),
    bigquery.SchemaField('Date - Placement', 'DATE'),
    bigquery.SchemaField('Order Status', 'STRING'),
]
file_name = "tags.json"

In [None]:
def load_csv_to_bigquery(csv_path, table_id, schema, create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_APPEND'):
  """Loads data from a CSV file into a BigQuery table.

  Args:
    csv_path: The path to the CSV file.
    table_id: The ID of the BigQuery table.
    schema: The schema of the BigQuery table.
    create_disposition: Specifies whether the table should be created if it doesn't exist.
    write_disposition: Specifies how to handle existing data in the table.
  """

  client = bigquery.Client()

  job_config = bigquery.LoadJobConfig(
      schema=schema,
      skip_leading_rows=1,  # Skip header row
      source_format=bigquery.SourceFormat.CSV,
      create_disposition=create_disposition,
      write_disposition=write_disposition
  )

  load_job = client.load_table_from_uri(csv_path, table_id, job_config=job_config)
  load_job.result()  # Wait for the job to complete

  destination_table = client.get_table(table_id)
  print(f"Loaded {destination_table.num_rows} rows to {table_id}")

In [None]:
load_csv_to_bigquery(csv_path, table_id, schema)

In [10]:
def getTrades(account_id):
    client = bigquery.Client()

    query = """
    SELECT
        `Account ID`,
        Instrument,
        COUNTIF(`Order Status` = 'Filled') AS Number_of_Trades,
        SUM(CASE
          WHEN `Order Status` = 'Filled' THEN Quantity
          ELSE 0
        END) AS Total_Quantity_Filled,
      FROM
        `{}`
        WHERE `Account ID` = "{}"
      GROUP BY
        `Account ID`,
        Instrument
        ORDER BY
      `Account ID`,
      Number_of_Trades DESC
    """.format(table_id, account_id)

    # Run the query and get the results
    query_job = client.query(query)  
    results = query_job.result()  # Waits for job to complete.

    formatted_text = ""
    for row in results:
        formatted_text += f"{row['Account ID']} has traded {row['Number_of_Trades']} times for a quantity of {row['Total_Quantity_Filled']} for this instrument: {row['Instrument']}\n"

    return formatted_text


In [11]:
def getTags():
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_name)
    json_data = json.loads(blob.download_as_string())

    # Format the JSON data for the prompt
    formatted_json = json.dumps(json_data, indent=4)
    return formatted_json

In [7]:
def generateWithInfo(account_id, additional_information):
    # Initialize Vertex AI
    vertexai.init(project="vertexai-demo-420409", location="us-central1")
    model = GenerativeModel(
        "gemini-1.5-flash-002",
    )
      
    trades = getTrades(account_id)
    tags = getTags()
    
    print(trades)
    
    # Construct the prompt with the JSON data
    text1 = f"""I have a list of trades for an account and a list of potential industry tags. Please analyze the trades and assign the most relevant industry tags to the trader.

    **Trades:**

    {trades} 

    **Potential Industry Tags:**

    {tags}

    **Additional Information:**

    {additional_information}

    **Please provide a clear and concise list of the most relevant tags based on the trading activity.**"""

    # Generation configuration and safety settings
    generation_config = {
        "max_output_tokens": 8192,
        "temperature": 1,
        "top_p": 0.95,
    }

    safety_settings = [
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
    ]

    # Generate content
    responses = model.generate_content(
        [text1],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    for response in responses:
        print(response.text, end="")

In [None]:
generateWithInfo("ACCT_MIX3", "The trader is interested in finance as well")

In [17]:
def generate(account_id):
    vertexai.init(project="vertexai-demo-420409", location="us-central1")
    model = GenerativeModel(
        "gemini-1.5-flash-002",
    )
      
    trades = getTrades(account_id)
    tags = getTags()
    additional_information = ""
        
    text1 = f"""Analyze the following list of trades and extract the most relevant tags (between 1 to 5) for the trader based on the instruments traded and the volume of trades per instrument.
    The tags should be picked from the provided list.

    **Trades:**

    {trades} 

    **Potential Industry Tags:**

    {tags}

    **Output the tags as a JSON with the account ID following the schema: "accountID": "", "tags":**"""

    response_schema = {
    "title": "Account Tags",
    "description": "Schema for an account and its associated tags",
    "type": "object",
    "properties": {
        "account_id": {
            "type": "string",
            "description": "Unique identifier for the account"
        },
        "tags": {
            "type": "array",
            "description": "List of tags associated with the account",
            "items": {
                "type": "string"
            }
        }
    },
    "required": [
        "account_id",
        "tags"
    ]
    }
    
    # Generation configuration and safety settings
    generation_config = {
        "max_output_tokens": 8192,
        "temperature": 1,
        "top_p": 0.95,
        "response_mime_type": "application/json",
        #"response_schema": {"title":"Account Tags","description":"Schema for an account and its associated tags","type":"OBJECT","properties":{"account_id":{"type":"STRING","description":"Unique identifier for the account"},"tags":{"type":"ARRAY","description":"List of tags associated with the account","items":{"type":"STRING"}}},"required":["account_id","tags"]},
    }

    safety_settings = [
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
    ]

    # Generate content
    response = model.generate_content(
        [text1],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=False,
    )

    return response.text

In [18]:
# Construct a BigQuery client object.
client = bigquery.Client()


# Query to get all distinct account IDs from the table
query = f"""
    SELECT DISTINCT `Account ID`
    FROM `{table_id}`
"""

# Run the query and get the results
query_job = client.query(query)
results = query_job.result()

combined_json = []

for row in results:
    account_id = row['Account ID']
    json_result = generate(account_id)
    combined_json.append(json_result)

with open("data.json", "w") as f:
  json.dump((combined_json), f, indent=4)

In [None]:
# Batch processing
# Grounding in search