In [28]:
import datetime
FEATURE_TIME = datetime.datetime(year=2023, month=5, day=20, hour=0, minute=0, second=0)

In [2]:
import getpass
# 'postgresql://username:password@dbhost:dbport/dbname'
connect_url = getpass.getpass(prompt='Connection URL: ')

Connection URL: ········


In [4]:
from sqlalchemy import create_engine
engine = create_engine(connect_url);

In [5]:
conn = engine.connect().execution_options(stream_results=True)

In [6]:
sql_query = """
WITH MAX_DATE AS (
  SELECT max(date) as maxdate
  FROM globaltrust
  WHERE strategy_name = 'followship'
)
SELECT 
    ROW_NUMBER() OVER(ORDER BY v DESC) AS rank, 
    g.v AS score, 
    p.handle AS profile_handle,
    p.profile_id
FROM globaltrust AS g
INNER JOIN profile AS p ON p.profile_id = g.i
WHERE 
  strategy_name = 'followship' 
  AND date = (SELECT maxdate FROM MAX_DATE)
"""

In [7]:
import pandas as pd
profile_trust_df = pd.read_sql(sql_query, conn)

In [9]:
profile_trust_df.head()

Unnamed: 0,rank,score,profile_handle,profile_id
0,1,0.0599,stani.lens,0x05
1,2,0.055299,christina.lens,0x8e
2,3,0.055085,bradorbradley.lens,0x24
3,4,0.054092,levychain.lens,0x0ce1
4,5,0.05324,blackdave.lens,0x8dbc


In [10]:
profile_trust_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107978 entries, 0 to 107977
Data columns (total 4 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   rank            107978 non-null  int64  
 1   score           107978 non-null  float64
 2   profile_handle  107978 non-null  object 
 3   profile_id      107978 non-null  object 
dtypes: float64(1), int64(1), object(2)
memory usage: 3.3+ MB


In [18]:
# look for duplicate profiles
profile_trust_df["profile_id"].value_counts()[lambda x: x>1]

Series([], Name: count, dtype: int64)

In [19]:
PROJECT_ID = "boxwood-well-386122"
REGION = "us-central1"
BUCKET_URI = "gs://vijay-lens-feature-store-temp"  
from google.cloud import aiplatform
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

In [21]:
# Credentials saved to file: [/Users/vijay/.config/gcloud/application_default_credentials.json]
# These credentials will be used by any library that requests Application Default Credentials (ADC).

! gcloud auth application-default login

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Faccounts.reauth&state=YaK4WFi6gYsI7YSjSCyYVxtKHPK4oM&access_type=offline&code_challenge=YDNZDAZPZTznwPDY_nZzhUjclHmeH-c5KX5S1AjXw5Y&code_challenge_method=S256


Credentials saved to file: [/Users/vijay/.config/gcloud/application_default_credentials.json]

These credentials will be used by any library that requests Application Default Credentials (ADC).

Quota project "boxwood-well-386122" was added to ADC which can be used by Google client libraries for billing and quota. Note that some services may still bill the project owning the

In [22]:
FEATURESTORE_ID = "lens_featurestore_d2"
from google.cloud.aiplatform import Feature, Featurestore
fs = Featurestore(
    featurestore_name=FEATURESTORE_ID
)
print(fs.gca_resource)

name: "projects/1181216607/locations/us-central1/featurestores/lens_featurestore_d2"
create_time {
  seconds: 1684349445
  nanos: 286219000
}
update_time {
  seconds: 1684349445
  nanos: 525844000
}
etag: "AMEw9yM63I4mC3Nplsr6rtExbIa3MfBEaSB4DFGZ1VLYDHkArHhLuNu1DXQZA8k4DSYl"
online_serving_config {
}
state: STABLE



In [23]:
profiles_entity_type = fs.get_entity_type(entity_type_id="profiles")

In [24]:
profiles_entity_type.list_features()

[<google.cloud.aiplatform.featurestore.feature.Feature object at 0x1256c9550> 
 resource name: projects/1181216607/locations/us-central1/featurestores/lens_featurestore_d2/entityTypes/profiles/features/followship_score,
 <google.cloud.aiplatform.featurestore.feature.Feature object at 0x1256c9be0> 
 resource name: projects/1181216607/locations/us-central1/featurestores/lens_featurestore_d2/entityTypes/profiles/features/followship_rank]

In [25]:
PROFILES_FEATURES_IDS = ['followship_rank', 'followship_score']
PROFILES_SRC_FIELDS = {
    'followship_rank': 'rank',
    'followship_score': 'score',
}
PROFILES_ENTITY_ID_FIELD = "profile_id"

In [26]:
PROFILES_DF = profile_trust_df

In [29]:
# ingest_from_df(
#     feature_ids: List[str],
#     feature_time: Union[str, datetime.datetime],
#     df_source: pd.DataFrame,
#     feature_source_fields: Optional[Dict[str, str]] = None,
#     entity_id_field: Optional[str] = None,
#     request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
#     ingest_request_timeout: Optional[float] = None,
# )
profiles_entity_type.ingest_from_df(
    feature_ids = PROFILES_FEATURES_IDS,
    feature_time = FEATURE_TIME,
    df_source = PROFILES_DF,
    feature_source_fields = PROFILES_SRC_FIELDS,
    entity_id_field = PROFILES_ENTITY_ID_FIELD,
)

Importing EntityType feature values: projects/1181216607/locations/us-central1/featurestores/lens_featurestore_d2/entityTypes/profiles
Import EntityType feature values backing LRO: projects/1181216607/locations/us-central1/featurestores/lens_featurestore_d2/entityTypes/profiles/operations/2507979632045719552
EntityType feature values imported. Resource name: projects/1181216607/locations/us-central1/featurestores/lens_featurestore_d2/entityTypes/profiles


<google.cloud.aiplatform.featurestore.entity_type.EntityType object at 0x1256b0bb0> 
resource name: projects/1181216607/locations/us-central1/featurestores/lens_featurestore_d2/entityTypes/profiles

In [None]:
# TODO 
# 1. "recommend" label
# 2. read full dataset from bigquery
# 3. checkpoint max(block_timestamp) for incremental reads from BigQuery