## 1. Fetching Data from Supabase

In [2]:
from supabase import create_client, Client
import pandas as pd
import os as os
from dotenv import load_dotenv

load_dotenv()

# Initialize Supabase client
url: str = os.environ.get("SUPABASE_PUBLIC_URL")
key: str = os.environ.get("SUPABASE_SERVICE_ROLE_KEY")
supabase: Client = create_client(url, key)

# Fetch data from each table
users_data = supabase.table("users").select("*").execute().data
user_location_data = supabase.table("user_location").select("*").execute().data
user_behavior_data = supabase.table("user_behavior").select("*").execute().data
user_app_usage_data = supabase.table("user_app_usage").select("*").execute().data

_app_names_data = supabase.table("_app_names").select("*").execute().data
_actions_data = supabase.table("_actions").select("*").execute().data

2023-11-19 20:05:14,118:INFO - HTTP Request: GET https://supabase.cloud-atlas.xyz/rest/v1/users?select=%2A "HTTP/1.1 200 OK"
2023-11-19 20:05:14,132:INFO - HTTP Request: GET https://supabase.cloud-atlas.xyz/rest/v1/user_location?select=%2A "HTTP/1.1 200 OK"
2023-11-19 20:05:14,142:INFO - HTTP Request: GET https://supabase.cloud-atlas.xyz/rest/v1/user_behavior?select=%2A "HTTP/1.1 200 OK"
2023-11-19 20:05:14,153:INFO - HTTP Request: GET https://supabase.cloud-atlas.xyz/rest/v1/user_app_usage?select=%2A "HTTP/1.1 200 OK"
2023-11-19 20:05:14,164:INFO - HTTP Request: GET https://supabase.cloud-atlas.xyz/rest/v1/_app_names?select=%2A "HTTP/1.1 200 OK"
2023-11-19 20:05:14,175:INFO - HTTP Request: GET https://supabase.cloud-atlas.xyz/rest/v1/_actions?select=%2A "HTTP/1.1 200 OK"


In [3]:
# Convert to pandas DataFrames
df_users = pd.DataFrame(users_data)
df_user_location = pd.DataFrame(user_location_data)
df_user_behavior = pd.DataFrame(user_behavior_data)
df_user_app_usage = pd.DataFrame(user_app_usage_data)

df__app_names = pd.DataFrame(_app_names_data)
df__actions = pd.DataFrame(_actions_data)

# Verify the structure of the dataframes
df_user_location.head()

Unnamed: 0,id,created_at,user_id,location,start_time,end_time
0,1,2023-11-19T16:00:21.232935+00:00,f7427faa-c131-402a-9578-b742aaf3b5bd,cafe,2023-11-19T10:01:01+00:00,2023-11-19T12:02:02+00:00


##  2. Data Preprocessing

Calculate/simplify data

In [10]:
from datetime import datetime

def convert_string_to_date(df_original, dob_column):
    """
    Converts a date of birth column from string to datetime and calculates the age.
    """
    df = df_original.copy()
    df[dob_column] = pd.to_datetime(df[dob_column])
    df['age'] = df[dob_column].apply(
        lambda dob: datetime.now().year - dob.year - ((datetime.now().month, datetime.now().day) < (dob.month, dob.day))
    )
    return df


def calculate_duration_and_drop_end_time(df_original, start_time_col='start_time', end_time_col='end_time'):
    """
    Calculates the duration in seconds between start and end times, and drops the end time column.
    """
    df = df_original.copy()
    df['duration'] = df.apply(
        lambda row: (pd.to_datetime(row[end_time_col]).tz_convert('UTC') - pd.to_datetime(row[start_time_col]).tz_convert('UTC')).total_seconds(),
        axis=1
    )
    df.drop(end_time_col, axis=1, inplace=True)
    return df

# Apply
df_users_time = convert_string_to_date(df_users, 'date_of_birth')

df_user_location_time = calculate_duration_and_drop_end_time(df_user_location)
df_user_behavior_time = calculate_duration_and_drop_end_time(df_user_behavior)
df_user_app_usage_time = calculate_duration_and_drop_end_time(df_user_app_usage)


# Delete non-needed attributes
df_user_location_time.drop('id', axis=1, inplace=True)
df_user_behavior_time.drop('id', axis=1, inplace=True)
df_user_app_usage_time.drop('id', axis=1, inplace=True)

df_user_location_time.drop('created_at', axis=1, inplace=True)
df_user_behavior_time.drop('created_at', axis=1, inplace=True)
df_user_app_usage_time.drop('created_at', axis=1, inplace=True)

print("Check the results")
df_user_location_time.head()

Check the results


Unnamed: 0,user_id,location,start_time,duration
0,f7427faa-c131-402a-9578-b742aaf3b5bd,cafe,2023-11-19T10:01:01+00:00,7261.0


Normalize data

In [25]:
def normalize_data(df_original, column, fixed_max=None):
    """
    Normalizes a specified column of the DataFrame.
    """
    df = df_original.copy()

    # Normalize the column
    df[column] = df[column] / fixed_max
    
    # Ensure that the values do not exceed 1, more than 1 are clipped to 1
    df[column] = df[column].clip(lower=0, upper=1)
    
    return df


# Normalize age
df_users_normalized = normalize_data(df_users_time, 'age', fixed_max=130) # max-age fixed to 130 years

# Normalize duration
df_user_location_normalized = normalize_data(df_user_location_time, 'duration', fixed_max=86400) # 24h max
df_user_behavior_normalized = normalize_data(df_user_behavior_time, 'duration', fixed_max=86400) # 24h max
df_user_app_usage_normalized = normalize_data(df_user_app_usage_time, 'duration', fixed_max=86400) # 24h max

print("Check the results")
df_user_app_usage_normalized.head()
# df_user_app_usage_normalized.dtypes

Check the results


Unnamed: 0,app_name,start_time,user_id,acceptance,duration
0,2,2023-11-19T10:00:00+00:00,f7427faa-c131-402a-9578-b742aaf3b5bd,4,0.031204


Merge data

# TODO from here

In [None]:
# Mapping for app names
app_name_to_id = {name: idx for idx, name in enumerate(df__app_names['app_name'].unique(), 1)}
df_user_app_usage_normalized['app_name_id'] = df_user_app_usage_normalized['app_name'].map(app_name_to_id)

# Mapping for actions
action_to_id = {action: idx for idx, action in enumerate(df__actions['action'].unique(), 1)}
df_user_behavior_normalized['action_id'] = df_user_behavior_normalized['action'].map(action_to_id)


## 3. TensorFlow Model

In [None]:
import tensorflow as tf

def build_transformer_model(num_unique_apps, num_unique_actions, embedding_dim, num_numerical_features):
    # Embedding layers
    app_input = tf.keras.layers.Input(shape=(1,), name='app_input')
    app_embedding = tf.keras.layers.Embedding(input_dim=num_unique_apps, output_dim=embedding_dim)(app_input)
    
    action_input = tf.keras.layers.Input(shape=(1,), name='action_input')
    action_embedding = tf.keras.layers.Embedding(input_dim=num_unique_actions, output_dim=embedding_dim)(action_input)
    
    # Additional inputs (e.g., age, duration)
    numerical_input = tf.keras.layers.Input(shape=(num_numerical_features,), name='numerical_input')
    
    # Combine inputs
    combined = tf.keras.layers.concatenate([app_embedding, action_embedding, numerical_input])

    # Transformer or Dense layers
    x = tf.keras.layers.Dense(64, activation='relu')(combined)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=[app_input, action_input, numerical_input], outputs=output)
    return model

# Example parameters
model = build_transformer_model(num_unique_apps=len(app_name_to_id),
                                num_unique_actions=len(action_to_id),
                                embedding_dim=32,
                                num_numerical_features=2) # Update based on actual number of numerical features


## 4. Making Predictions

In [None]:
# Evaluate the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Prepare your input data as tensors or numpy arrays in the format the model expects
# Train the model
# model.fit(...)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Predict on new data
new_data_preprocessed = # preprocess new data similarly
prediction = model.predict(new_data_preprocessed)
block_user_decision = prediction > 0.5  # Adjust threshold based on your requirement
