<a href="https://colab.research.google.com/github/LeechXDD/9417_Pro_Project/blob/main/feature_engineer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# New Section

In [None]:
!pip install tensorflow_addons
!pip install tensorflow
!pip install tensorflow --upgrade
!pip install --upgrade tensorflow-addons
!pip install keras --upgrade

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
! pip install -q kaggle
from google.colab import files
files.upload()

In [None]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c predict-student-performance-from-game-play
! kaggle datasets list

In [None]:
! mkdir kaggleData
! unzip predict-student-performance-from-game-play.zip -d kaggleData

from google.colab import drive
drive.mount('/content/drive')

In [None]:
def get_minimal_dtype(df):
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype.name
        if ((col_type != 'datetime64[ns]') & (col_type != 'category')):
            if (col_type != 'object'):
                c_min = df[col].min()
                c_max = df[col].max()

                if str(col_type)[:3] == 'int':
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)

                else:
                    if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                        df[col] = df[col].astype(np.float16)
                    elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        pass
            else:
                df[col] = df[col].astype('category')
    mem_usg = df.memory_usage().sum() / 1024**2
    print("Memory usage became: ",mem_usg," MB")

    return df

In [None]:
train_data = pd.read_csv('kaggleData/train.csv')
test_data = pd.read_csv('kaggleData/test.csv')

In [None]:
train_data = get_minimal_dtype(train_data)

In [None]:
# Load in labels for training dataset
labels = pd.read_csv('kaggleData/train_labels.csv')
labels['session'] = labels.session_id.apply(lambda x: int(x.split('_')[0]) )
labels['q'] = labels.session_id.apply(lambda x: int(x.split('_')[-1][1:]) )
labels.head()

#### Data Cleaning
The columns with missing values are:

- page: This is only for notebook-related events. The missing values could indicate that the event is not related to the notebook. We could fill missing values with a placeholder like -1 to denote 'Not Applicable'.
- room_coor_x, room_coor_y, screen_coor_x, screen_coor_y: These are the coordinates of the click, and are only relevant for click events. Similar to 'page', we could fill missing values with a placeholder.
- hover_duration: This is only for hover events. We can use the same approach as for the coordinates.
- text, fqid, text_fqid: These columns contain information about the event and the room. Missing values could indicate that the event does not involve any text or specific interactions that would be recorded in these fields. We could replace missing values with a placeholder like 'None' or 'Not Applicable'.

In [None]:
# Find out columns with missing values
missing_values = train_data.isnull().sum()

# Fill missing values
train_data['page'].fillna(-1, inplace=True)
train_data['room_coor_x'].fillna(-1, inplace=True)
train_data['room_coor_y'].fillna(-1, inplace=True)
train_data['screen_coor_x'].fillna(-1, inplace=True)
train_data['screen_coor_y'].fillna(-1, inplace=True)
train_data['hover_duration'].fillna(-1, inplace=True)

### Exploratory Data Analysis

In [None]:
# Set plot style
sns.set_style("whitegrid")

# Create a function for easy plotting
def plot_count(train_data, column, title, color, rotation=0):
    plt.figure(figsize=(12,6))
    sns.countplot(data=train_data, x=column, order=train_data[column].value_counts().index, color=color)
    plt.title(title, size=16)
    plt.xticks(rotation=rotation)
    plt.show()

#### Distribution of the Event Names
The most common event in the dataset is 'navigate_click', followed by 'notification_click'. These events likely relate to key interactions within the game and could be influential in a model's ability to predict student performance.

In [None]:
# Plot the distribution of event names
plot_count(train_data, 'event_name', 'Distribution of Event Names', 'skyblue')

#### Distribution of Game Levels
The distribution of game levels shows that the majority of the events are happening in the middle levels of the game (around level 10). This could suggest that most users progress to these levels before stopping, or that these levels simply have more interactive events.

In [None]:
# Plot the distribution of levels
plot_count(train_data, 'level', 'Distribution of Game Levels', 'green')

#### Distribution of Level Groups
The level group distribution shows that the majority of events belong to the '5-12' level group. This is consistent with the distribution of game levels, as the majority of events occurred at these levels.

In [None]:
# Plot the distribution of level groups
plot_count(train_data, 'level_group', 'Distribution of Level Groups', 'red')

#### Elapsed Time Statistics
From the histogram of 'elapsed_time', we can observe that the distribution is heavily skewed to the right, with a few sessions having unusually high elapsed time values. These could potentially be outliers or errors in the data.

In [None]:
# Display statistics related to elapsed time
elapsed_time_stats = train_data['elapsed_time'].describe()
elapsed_time_stats

# Plot the distribution of 'elapsed_time'
plt.figure(figsize=(10, 6))
sns.histplot(train_data['elapsed_time'], bins=100, color='purple')
plt.title('Distribution of Elapsed Time', size=16)
plt.xlabel('Elapsed Time (in milliseconds)', size=13)
plt.ylabel('Count', size=13)
plt.show()

From the table below it shows subset of the data that falls in the top 1% of 'elapsed_time' which could suggest outliers.

In [None]:
# Check the values on the high end of 'elapsed_time'
high_elapsed_time = train_data[train_data['elapsed_time'] > train_data['elapsed_time'].quantile(0.99)]
high_elapsed_time

So I will set all 'elapsed_time' values above the 99th percentile to the 99th percentile value. This would limit the effect of extreme values without completely removing them from the dataset.

In [None]:
# Cap 'elapsed_time' at the 99th percentile
train_data['elapsed_time'] = train_data['elapsed_time'].clip(upper=train_data['elapsed_time'].quantile(0.99))

The maximum value is now significantly lower than before, while the other statistics (mean, standard deviation, etc.) remain similar. This means that the extreme high values have been limited, which should help to reduce their influence on the model.

In [None]:
# Verify the change
train_data['elapsed_time'].describe()

### Feature Engineering

In [None]:
CATEGORICAL = ['event_name', 'name','fqid', 'room_fqid', 'text_fqid']
NUMERICAL = ['elapsed_time','level','page','room_coor_x', 'room_coor_y',
        'screen_coor_x', 'screen_coor_y', 'hover_duration']

def feature_engineer(dataset_df):
    dfs = []
    for c in CATEGORICAL:
        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('nunique')
        tmp.name = tmp.name + '_nunique'
        dfs.append(tmp)
    for c in NUMERICAL:
        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('mean')
        dfs.append(tmp)
    for c in NUMERICAL:
        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('std')
        tmp.name = tmp.name + '_std'
        dfs.append(tmp)
    dataset_df = pd.concat(dfs,axis=1)
    dataset_df = dataset_df.fillna(-1)
    dataset_df = dataset_df.reset_index()
    dataset_df = dataset_df.set_index('session_id')
    return dataset_df



### Improved version of feature engineering function

In [None]:
CATEGORICAL = ['event_name', 'name','fqid', 'room_fqid', 'text_fqid']
NUMERICAL = ['elapsed_time','level','page','room_coor_x', 'room_coor_y',
        'screen_coor_x', 'screen_coor_y', 'hover_duration']
BINNING = ['elapsed_time', 'room_coor_x', 'room_coor_y', 'screen_coor_x', 'screen_coor_y', 'hover_duration']

# Define feature engineering function
def feature_engineer_ver2(dataset_df):
    dfs = []
    for c in CATEGORICAL:
        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('nunique')
        tmp.name = c + '_nunique'
        dfs.append(tmp)

    for c in NUMERICAL:
        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('mean')
        tmp.name = c + '_mean'
        dfs.append(tmp)

        # Compute standard deviation only for certain features
        if c in BINNING:
            tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('std')
            tmp.name = c + '_std'
            dfs.append(tmp)

        # Binning
        if c in BINNING:  # Check if column is in the list of columns to bin
            dataset_df[c+'_bin'] = pd.qcut(dataset_df[c], q=4, duplicates='drop')
            tmp = dataset_df.groupby(['session_id','level_group'])[c+'_bin'].agg('count')
            tmp.name = c + '_bin_count'
            dfs.append(tmp)

    # Interaction between screen coordinates
    if 'screen_coor_x' in NUMERICAL and 'screen_coor_y' in NUMERICAL:
        # Compute Euclidean distance instead of product
        dataset_df['screen_coor'] = np.sqrt(dataset_df['screen_coor_x']**2 + dataset_df['screen_coor_y']**2)
        tmp = dataset_df.groupby(['session_id','level_group'])['screen_coor'].agg(['mean', 'std'])
        tmp.columns = ['screen_coor_mean', 'screen_coor_std']
        dfs.append(tmp)

    # Aggregated features
    if 'hover_duration' in NUMERICAL:
        dataset_df['total_hover_duration'] = dataset_df.groupby(['session_id'])['hover_duration'].transform('sum')
        tmp = dataset_df.groupby(['session_id','level_group'])['total_hover_duration'].agg('mean')
        tmp.name = 'total_hover_duration_mean'
        dfs.append(tmp)

    dataset_df = pd.concat(dfs,axis=1)
    dataset_df = dataset_df.fillna(-1)
    dataset_df = dataset_df.reset_index()
    dataset_df = dataset_df.set_index('session_id')

    dataset_df['screen_coor_mean'] = dataset_df['screen_coor_mean'].astype('int32')
    for col in dataset_df.select_dtypes(include='float16').columns:
        dataset_df[col] = dataset_df[col].astype('float32')

    return dataset_df


In [None]:
CATEGORICAL = ['event_name', 'name','fqid', 'room_fqid', 'text_fqid']
NUMERICAL = ['elapsed_time','level','page','room_coor_x', 'room_coor_y',
        'screen_coor_x', 'screen_coor_y', 'hover_duration']
BINNING = ['elapsed_time', 'room_coor_x', 'room_coor_y', 'screen_coor_x', 'screen_coor_y', 'hover_duration']

from sklearn.preprocessing import PowerTransformer

def feature_engineer_ver3(dataset_df):
    dfs = []
    pt = PowerTransformer(method='yeo-johnson')

    for c in CATEGORICAL:
        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('nunique')
        tmp.name = c + '_nunique'
        dfs.append(tmp)

        # Create dummy variables for top N most common events and names
        top_N = dataset_df[c].value_counts()[:10].index
        for val in top_N:
            dataset_df[c + '_' + val] = (dataset_df[c] == val).astype(int)
        tmp = dataset_df.groupby(['session_id','level_group']).agg({c + '_' + val: 'sum' for val in top_N})
        dfs.append(tmp)

    for c in NUMERICAL:
        # Fill missing values with the column median
        dataset_df[c].fillna(dataset_df[c].median(), inplace=True)

        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('mean')
        tmp.name = c + '_mean'
        dfs.append(tmp)

        # Compute standard deviation only for certain features
        if c in BINNING:
            tmp = dataset_df.groupby(['session_id','level_group'])[c].agg('std')
            tmp.name = c + '_std'
            dfs.append(tmp)

        # Normalize 'elapsed_time' column
        if c == 'elapsed_time':
            dataset_df[c] = pt.fit_transform(dataset_df[[c]])

        # Binning
        if c in BINNING:  # Check if column is in the list of columns to bin
            dataset_df[c+'_bin'] = pd.qcut(dataset_df[c], q=4, duplicates='drop')
            #dataset_df[c+'_bin'] = pd.qcut(dataset_df[c], q=4, duplicates='drop').astype('category')

            tmp = dataset_df.groupby(['session_id','level_group'])[c+'_bin'].agg('count')
            tmp.name = c + '_bin_count'
            dfs.append(tmp)

    # Interaction between screen coordinates
    if 'screen_coor_x' in NUMERICAL and 'screen_coor_y' in NUMERICAL:
        # Compute Euclidean distance instead of product
        dataset_df['screen_coor'] = np.sqrt(dataset_df['screen_coor_x']**2 + dataset_df['screen_coor_y']**2)
        tmp = dataset_df.groupby(['session_id','level_group'])['screen_coor'].agg(['mean', 'std'])
        tmp.columns = ['screen_coor_mean', 'screen_coor_std']
        dfs.append(tmp)

    # Aggregated features
    if 'hover_duration' in NUMERICAL:
        dataset_df['total_hover_duration'] = dataset_df.groupby(['session_id'])['hover_duration'].transform('sum')
        tmp = dataset_df.groupby(['session_id','level_group'])['total_hover_duration'].agg('mean')
        tmp.name = 'total_hover_duration_mean'
        dfs.append(tmp)

    dataset_df = pd.concat(dfs, axis=1)
    dataset_df = dataset_df.fillna(-1)
    dataset_df = dataset_df.reset_index()
    dataset_df = dataset_df.set_index('session_id')

    return dataset_df



In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer

CATEGORICAL = ['event_name', 'name','fqid', 'room_fqid', 'text_fqid']
NUMERICAL = ['elapsed_time','level','page','room_coor_x', 'room_coor_y',
             'screen_coor_x', 'screen_coor_y', 'hover_duration']

def feature_engineer_ver4(dataset_df):
    dfs = []
    le = LabelEncoder()
    discretizer = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile')

    for c in CATEGORICAL:
        # Label encoding for categorical features
        dataset_df[c+'_encoded'] = le.fit_transform(dataset_df[c].astype(str))
        tmp = dataset_df.groupby(['session_id','level_group'])[c+'_encoded'].agg(['mean', 'std'])
        tmp.columns = [c + '_encoded_mean', c + '_encoded_std']
        dfs.append(tmp)

    for c in NUMERICAL:
        # Fill missing values with the column median
        dataset_df[c].fillna(dataset_df[c].median(), inplace=True)

        # Calculate sum, mean and std for numerical features
        tmp = dataset_df.groupby(['session_id','level_group'])[c].agg(['sum', 'mean', 'std'])
        tmp.columns = [c + '_sum', c + '_mean', c + '_std']
        dfs.append(tmp)

        # Apply binning to numerical features
        dataset_df[c+'_binned'] = discretizer.fit_transform(dataset_df[[c]])
        tmp = dataset_df.groupby(['session_id','level_group'])[c+'_binned'].agg(['mean', 'std'])
        tmp.columns = [c + '_binned_mean', c + '_binned_std']
        dfs.append(tmp)

    # Interaction between screen coordinates
    if 'screen_coor_x' in NUMERICAL and 'screen_coor_y' in NUMERICAL:
        # Compute Euclidean distance instead of product
        dataset_df['screen_coor'] = np.sqrt(dataset_df['screen_coor_x']**2 + dataset_df['screen_coor_y']**2)
        tmp = dataset_df.groupby(['session_id','level_group'])['screen_coor'].agg(['sum', 'mean', 'std'])
        tmp.columns = ['screen_coor_sum', 'screen_coor_mean', 'screen_coor_std']
        dfs.append(tmp)

    # Aggregated features
    if 'hover_duration' in NUMERICAL:
        dataset_df['total_hover_duration'] = dataset_df.groupby(['session_id'])['hover_duration'].transform('sum')
        tmp = dataset_df.groupby(['session_id','level_group'])['total_hover_duration'].agg(['mean', 'std'])
        tmp.columns = ['total_hover_duration_mean', 'total_hover_duration_std']
        dfs.append(tmp)

    dataset_df = pd.concat(dfs, axis=1)
    dataset_df = dataset_df.fillna(-1)
    dataset_df = dataset_df.reset_index()
    dataset_df = dataset_df.set_index('session_id')

    dataset_df['page_sum'] = dataset_df['page_sum'].astype('int32')
    for col in dataset_df.select_dtypes(include='float16').columns:
        dataset_df[col] = dataset_df[col].astype('float32')

    return dataset_df


In [None]:
from sklearn.preprocessing import LabelEncoder

def feature_engineer_ver5(data):
    # Encode categorical variables
    categorical_cols = ['event_name', 'name', 'fqid', 'room_fqid', 'text_fqid']
    encoder = LabelEncoder()
    for col in categorical_cols:
        data[col] = encoder.fit_transform(data[col])

    # Generate historical features
    # Here we calculate the average elapsed_time for all previous events in the same session
    data['avg_elapsed_time'] = data.groupby('session_id')['elapsed_time'].transform('mean')

    # You can add more historical features as needed
    # For example, count the number of 'navigate_click' events for each session
    data['navigate_click_count'] = data[data['event_name'] == 'navigate_click'].groupby('session_id')['event_name'].transform('count')

    # Or calculate the maximum hover_duration for each session
    data['max_hover_duration'] = data.groupby('session_id')['hover_duration'].transform('max')
    # Count of each event type
    event_counts = data.groupby('session_id')['event_name'].value_counts().unstack(fill_value=0)
    data = data.join(event_counts, on='session_id')

    # Time since the last event
    data['time_since_last_event'] = data.groupby('session_id')['elapsed_time'].diff()

    # Number of events in fullscreen mode
    data['fullscreen_event_count'] = data[data['fullscreen'] == 1].groupby('session_id')['fullscreen'].transform('count')

    # Number of events with music on
    data['music_event_count'] = data[data['music'] == 1].groupby('session_id')['music'].transform('count')

    # Number of unique levels played
    data['unique_levels_count'] = data.groupby('session_id')['level'].transform('nunique')

    return data


### Test run on sample of dataset *****************

In [None]:
#sample = train_data.sample(frac=0.05)
#dataset_df = feature_engineer(train_data)
#dataset_df = feature_engineer_ver2(train_data)
#dataset_df = feature_engineer_ver3(sample)
#dataset_df = feature_engineer_ver4(train_data)
dataset_df = feature_engineer_ver5(train_data)

#Also, remember to apply the same transformations to your test data.
#test_data = feature_engineer_ver4(test_data)

In [None]:
# Replace `inf` values:
dataset_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Replace `NaN` values with column mean:
dataset_df.fillna(dataset_df.mean(), inplace=True)


### Trial Run on Model

In [None]:
!pip install tensorflow_addons
!pip install tensorflow_decision_forests
!pip install tensorflow
!pip install tensorflow --upgrade
!pip install keras --upgrade

import tensorflow as tf
import tensorflow_decision_forests as tfdf

In [None]:
def split_dataset(dataset, test_ratio=0.20):
    USER_LIST = dataset.index.unique()
    split = int(len(USER_LIST) * (1 - 0.20))
    return dataset.loc[USER_LIST[:split]], dataset.loc[USER_LIST[split:]]

train_x, valid_x = split_dataset(dataset_df)
print("{} examples in training, {} examples in testing.".format(
    len(train_x), len(valid_x)))

In [None]:
tfdf.keras.get_all_models()

In [None]:

# Fetch the unique list of user sessions in the validation dataset. We assigned
# `session_id` as the index of our feature engineered dataset. Hence fetching
# the unique values in the index column will give us a list of users in the
# validation set.
VALID_USER_LIST = valid_x.index.unique()

# Create a dataframe for storing the predictions of each question for all users
# in the validation set.
# For this, the required size of the data frame is:
# (no: of users in validation set  x no of questions).
# We will initialize all the predicted values in the data frame to zero.
# The dataframe's index column is the user `session_id`s.
prediction_df = pd.DataFrame(data=np.zeros((len(VALID_USER_LIST),18)), index=VALID_USER_LIST)

# Create an empty dictionary to store the models created for each question.
models = {}

# Create an empty dictionary to store the evaluation score for each question.
evaluation_dict ={}


In [None]:

for q_no in range(1,19):

    # Select level group for the question based on the q_no.
    if q_no<=3: grp = '0-4'
    elif q_no<=13: grp = '5-12'
    elif q_no<=22: grp = '13-22'
    print("### q_no", q_no, "grp", grp)


    # Filter the rows in the datasets based on the selected level group.
    train_df = train_x.loc[train_x.level_group == grp]
    train_users = train_df.index.values
    valid_df = valid_x.loc[valid_x.level_group == grp]
    valid_users = valid_df.index.values

    # Select the labels for the related q_no.
    train_labels = labels.loc[labels.q==q_no].set_index('session').loc[train_users]
    valid_labels = labels.loc[labels.q==q_no].set_index('session').loc[valid_users]


    # Add the label to the filtered datasets.
    train_df["correct"] = train_labels["correct"]
    valid_df["correct"] = valid_labels["correct"]

    # There's one more step required before we can train the model.
    # We need to convert the datatset from Pandas format (pd.DataFrame)
    # into TensorFlow Datasets format (tf.data.Dataset).
    # TensorFlow Datasets is a high performance data loading library
    # which is helpful when training neural networks with accelerators like GPUs and TPUs.
    # We are omitting `level_group`, since it is not needed for training anymore.
    train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df.loc[:, train_df.columns != 'level_group'], label="correct")
    valid_ds = tfdf.keras.pd_dataframe_to_tf_dataset(valid_df.loc[:, valid_df.columns != 'level_group'], label="correct")

    # We will now create the Gradient Boosted Trees Model with default settings.
    # By default the model is set to train for a classification task.
    gbtm = tfdf.keras.RandomForestModel(verbose=0)
    gbtm.compile(metrics=["accuracy"])
    # Train the model.
    gbtm.fit(x=train_ds)

    # Store the model
    models[f'{grp}_{q_no}'] = gbtm

    # Evaluate the trained model on the validation dataset and store the
    # evaluation accuracy in the `evaluation_dict`.
    inspector = gbtm.make_inspector()
    inspector.evaluation()
    evaluation = gbtm.evaluate(x=valid_ds,return_dict=True)
    evaluation_dict[q_no] = evaluation["accuracy"]

    # Use the trained model to make predictions on the validation dataset and
    # store the predicted values in the `prediction_df` dataframe.
    predict = gbtm.predict(x=valid_ds)
    prediction_df.loc[valid_users, q_no-1] = predict.flatten()

In [None]:
for name, value in evaluation_dict.items():
  print(f"question {name}: accuracy {value:.4f}")

print("\nAverage accuracy", sum(evaluation_dict.values())/18)

In [None]:
inspector = models['0-4_1'].make_inspector()

print(f"Available variable importances:")
for importance in inspector.variable_importances().keys():
  print("\t", importance)
inspector.variable_importances()["NUM_AS_ROOT"]

In [None]:
# F1 score  0.6319707 (feature_engineer)
# F1 score  0.64316726 (feature_engineer_ver2)
# F1 score  0.64058185 (feature_engineer_ver4)

true_df = pd.DataFrame(data=np.zeros((len(VALID_USER_LIST),18)), index=VALID_USER_LIST)
for i in range(18):
    # Get the true labels.
    tmp = labels.loc[labels.q == i+1].set_index('session').loc[VALID_USER_LIST]
    true_df[i] = tmp.correct.values

max_score = 0; best_threshold = 0

# Loop through threshold values from 0.4 to 0.8 and select the threshold with
# the highest `F1 score`.
# using uniform threashold = 0.5 for the baseline


# for threshold in np.arange(0.4,0.8,0.01):
#     metric = tf.keras.metrics.F1Score(average="macro",threshold=threshold)
#     y_true = tf.one_hot(true_df.values.reshape((-1)), depth=2)
#     y_pred = tf.one_hot((prediction_df.values.reshape((-1))>threshold).astype('int'), depth=2)
#     metric.update_state(y_true, y_pred)
#     f1_score = metric.result().numpy()
#     if f1_score > max_score:
#         max_score = f1_score
#         best_threshold = threshold
metric = tf.keras.metrics.F1Score(average="macro",threshold=0.5)
y_true = tf.one_hot(true_df.values.reshape((-1)), depth=2)
y_pred = tf.one_hot((prediction_df.values.reshape((-1))>0.5).astype('int'), depth=2)
metric.update_state(y_true, y_pred)
f1_score = metric.result().numpy()


print("threshold ", 0.5, "\tF1 score ", f1_score)