## 1 Fetching Data from Supabase

In [None]:
import sys
from dotenv import load_dotenv
import os

# Import functions
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.append(project_root)
from server import functions_aggregated, functions_supabase, functions_basic, functions_model

load_dotenv()

supabase = functions_supabase.auth()

_acceptance_data, _actions_data, _app_names_data, _location_data, _sex, _weekdays, user_app_usage_data, users_data = functions_supabase.fetchTables(supabase)


In [None]:
df__acceptance, df__actions, df__app_names, df__location, df__sex, df__weekdays, df_user_app_usage, df_users = functions_basic.toPandasDataframes(_acceptance_data, _actions_data, _app_names_data, _location_data, _sex, _weekdays, user_app_usage_data, users_data)

# Verify the structure of the dataframes
df_user_app_usage.head()

##  2 Data Preprocessing

## 2.1 Remove uncompleted rows/entrys

In [None]:
def remove_none_rows(df, column_name):
    """
    Removes rows from a DataFrame where the specified column has 'None' or 'NaN'.
    """
    return df.dropna(subset=[column_name])

df_user_app_usage = remove_none_rows(df_user_app_usage, 'app_usage_time')

# Verify the structure of the dataframes
display(df_user_app_usage.head())
display(df_user_app_usage.dtypes)

## 2.2 Calculate/simplify data functions

### 2.2.1 Normalize and numericalize data

In [None]:
df_user_app_usage_normalized, df_users_normalized = functions_aggregated.normalizeAndNumericalize(df__acceptance, df__actions, df__app_names, df__location, df__sex, df__weekdays, df_user_app_usage, df_users)

# num_acceptance_categories = df__acceptance['id'].nunique()

# display(num_acceptance_categories)

# Check the results
display(df_user_app_usage_normalized.head())
# display(df_user_app_usage_normalized.dtypes)

display(df_users_normalized.head())
# display(df_users_normalized.dtypes)


Merge data

In [None]:
merged_df = functions_aggregated.mergeUsersAndAppUsage(df_user_app_usage_normalized, df_users_normalized)

display(merged_df.head())
display(merged_df.dtypes)


## 3 TensorFlow Model

In [None]:
from tensorflow.keras.models import Model

# merged_df is the DataFrame
feature_columns = merged_df.columns.tolist()
# Exclude 'should_be_blocked' from feature columns
feature_columns = [col for col in merged_df.columns if col != 'should_be_blocked']
display(feature_columns)

model: Model = functions_model.build_and_compile_model(1000, 64, feature_columns, functions_model.SupervisedMLA.BINARY_CLASSIFICATION)
model.summary()

model.save('../model/model.keras')

## 4. Making Predictions

In [None]:
from sklearn.model_selection import train_test_split

# Splitting the data
train, test = train_test_split(merged_df, test_size=0.1)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')
print("-------------------------------------")

# Prepare the data for the model, label_column is the column that we are trying to predict
def prepare_data(df, feature_columns, label_column):
    features = {col: df[col].values for col in feature_columns if col != label_column}
    labels = df[label_column].values
    
    return features, labels


x_train, y_train = prepare_data(train, feature_columns, 'should_be_blocked')
x_val, y_val = prepare_data(val, feature_columns, 'should_be_blocked')
x_test, y_test = prepare_data(test, feature_columns, 'should_be_blocked')

# print("Train")
# display(x_train)
# display(y_train)

# print("Val")
# display(x_val)
# display(y_val)

# Train the model
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val))

# Evaluate the model
val_loss, val_accuracy = model.evaluate(x_val, y_val)
print("-------------------------------------")
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy}')
print("-------------------------------------")

# Predicting new data
predictions = model.predict(x_test)

# Since the output is now continuous, adjust how you interpret the predictions
# For instance, you might round them to the nearest whole number or percentage
predicted_values = predictions.flatten()  # If predictions need to be flattened
display(predicted_values)


In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')
plt.show()

# Plot training & validation mean absolute error values
plt.plot(history.history['mae'], label='Train MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Model Mean Absolute Error')
plt.ylabel('MAE')
plt.xlabel('Epoch')
plt.legend(loc='upper right')
plt.show()