In [1]:
# Neural network BERT-powered text classification

# Dependencies
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy
import os
import toml
import re
import itertools
from text_cleaner import *
import operator
from collections import Counter
import pickle
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])

def extract_hashtags(post_text):
    HASH_RE = re.compile(r"\#\w+")
    out_list = re.findall(HASH_RE, post_text)
    return out_list

print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# Load the .csv with archetypes
arch_df = pd.read_csv('archetypes_pl.csv', index_col=0)

# Save the order of columns
trait_list = arch_df.columns.tolist()

# Show the table header and column list
print(trait_list)
arch_df.head()

['innocent', 'sage', 'explorer', 'outlaw', 'magician', 'hero', 'lover', 'jester', 'everyman', 'caregiver', 'ruler', 'creator', 'dominant', 'submissive', 'maximalist', 'minimalist', 'inspiring', 'systematic', 'discovering', 'conservative', 'verifying', 'overlooking', 'sharpening', 'harmonic', 'empathic', 'matter_of_fact', 'brave', 'protective', 'generous', 'thrifty', 'favourable', 'balanced', 'sensuality', 'intelligent', 'believe', 'egocentric', 'allocentric']


Unnamed: 0_level_0,innocent,sage,explorer,outlaw,magician,hero,lover,jester,everyman,caregiver,...,protective,generous,thrifty,favourable,balanced,sensuality,intelligent,believe,egocentric,allocentric
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
marek_grodzki,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,2.0,3.0,4.0,4.0,3.0,4.0,4.0,3.0,0.0,0.0
vege_style_life,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,4.0,4.0,3.0,3.0,3.0,2.0,0.0,3.0
oliwka__2007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,2.0,2.0,0.0,3.0,1.0,2.0,4.0,1.0,0.0,3.0
z_przestrzeni_serca,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,3.0,0.0,4.0,4.0,3.0,4.0,4.0,0.0,1.0
zaradne_warsztaty,3.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,3.0,4.0,...,3.0,4.0,0.0,2.0,2.0,4.0,2.0,3.0,1.0,3.0


In [3]:
# Table preprocessing - replace all NaN with 2 (Unrelated/Don't know class), replace 0-5 values with the ones in range -1.0 - 1.0
arch_df = arch_df.fillna(2.0)

# Remove duplicated annotations, to exclude conflicting entries
arch_df = arch_df[~arch_df.index.duplicated(keep='first')]

# Print the head of the dataset after modification
arch_df.head()

Unnamed: 0_level_0,innocent,sage,explorer,outlaw,magician,hero,lover,jester,everyman,caregiver,...,protective,generous,thrifty,favourable,balanced,sensuality,intelligent,believe,egocentric,allocentric
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
marek_grodzki,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,2.0,3.0,4.0,4.0,3.0,4.0,4.0,3.0,0.0,0.0
vege_style_life,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,4.0,4.0,3.0,3.0,3.0,2.0,0.0,3.0
oliwka__2007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,2.0,2.0,0.0,3.0,1.0,2.0,4.0,1.0,0.0,3.0
z_przestrzeni_serca,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,3.0,0.0,4.0,4.0,3.0,4.0,4.0,0.0,1.0
zaradne_warsztaty,3.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,3.0,4.0,...,3.0,4.0,0.0,2.0,2.0,4.0,2.0,3.0,1.0,3.0


In [4]:
# Divide tables into train/test dataset
train_arch_df, test_arch_df = train_test_split(arch_df, test_size=0.2, random_state=61)

# Save both .csv into train_test_instagram directory
train_arch_df.to_csv("train_test_instagram/train_df.csv")
test_arch_df.to_csv("train_test_instagram/test_df.csv")

In [5]:
# Create pandas-like dataset
BASE_DIR = "instagram_cleared"

def clean_post(src_text: str) -> str:
    # Extract posts and hashtags
    extracted_text = remove_stopwords(clean_up_text(src_text))
    extracted_hashtags = extract_hashtags(src_text)
    return extracted_text + extracted_hashtags

def generate_dataset(dataset_df: pd.DataFrame) -> pd.DataFrame:
    out_dataset = pd.DataFrame(columns=["text"] + list(dataset_df.columns))
    # Iterate over whole DataFrame
    for i, row in tqdm(dataset_df.iterrows()):
        trait_row = copy.deepcopy(row)
        profile_posts = []

        # Get all posts per profile
        profile_path = os.path.join(BASE_DIR, i)
        for file in os.listdir(profile_path):
            if not file.endswith(".toml"):
                with open(os.path.join(profile_path, file), "r") as post_f:
                    read_text = post_f.read()
                    profile_posts.extend(clean_post(read_text))
        trait_row["text"] = " ".join(profile_posts)
        out_dataset = out_dataset.append(trait_row)
    out_dataset = out_dataset.reset_index(drop=True)
    return out_dataset

# Initialize text dataset DataFrames
train_dataset = generate_dataset(train_arch_df)
test_dataset = generate_dataset(test_arch_df)

406it [00:17, 23.37it/s]
102it [00:04, 24.37it/s]


In [6]:
# Save generated datasets to .csv
train_dataset.to_csv("train_test_instagram/train_dataset.csv")
test_dataset.to_csv("train_test_instagram/test_dataset.csv")

In [7]:
# Load datasets
train_dataset = pd.read_csv("train_test_instagram/train_dataset.csv", index_col=0)
test_dataset = pd.read_csv("train_test_instagram/test_dataset.csv", index_col=0)

for i in [i for i in list(range(len(train_dataset.columns))) if i != 0]:
    train_dataset.iloc[:, i] = pd.to_numeric(train_dataset.iloc[:, i], downcast="integer")
    test_dataset.iloc[:, i] = pd.to_numeric(test_dataset.iloc[:, i], downcast="integer")
    
train_dataset.iloc[:, 0] = train_dataset.iloc[:, 0].astype(str)
test_dataset.iloc[:, 0] = test_dataset.iloc[:, 0].astype(str)

In [8]:
# Model creation and training
# Select a trait/text set
train_labels = copy.deepcopy(train_dataset)
train_labels = train_labels.drop(columns="text")

test_labels = copy.deepcopy(test_dataset)
test_labels = test_labels.drop(columns="text")

def format_labels(data_set):
    return (tf.keras.utils.to_categorical(np.array(data_set.pop("innocent")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("sage")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("explorer")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("outlaw")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("magician")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("hero")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("lover")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("jester")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("everyman")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("caregiver")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("ruler")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("creator")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("dominant")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("submissive")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("maximalist")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("minimalist")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("inspiring")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("systematic")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("discovering")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("conservative")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("verifying")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("overlooking")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("sharpening")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("harmonic")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("empathic")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("matter_of_fact")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("brave")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("protective")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("generous")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("thrifty")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("favourable")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("balanced")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("sensuality")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("intelligent")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("believe")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("egocentric")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("allocentric")), num_classes=5))

X_train = train_dataset.pop("text")
y_train = format_labels(train_dataset)
X_test = test_dataset.pop("text")
y_test = format_labels(test_dataset)

In [9]:
# Text encoder
VOCAB_CNT = 160000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=VOCAB_CNT)
encoder.adapt(np.array(X_train))

In [10]:
# Model - Functional API
def build_model(embedded_dim: int) -> tf.keras.Model:
    def encode_text(inputs):
        return encoder(inputs)
    
    # Define model base - Embedding and LSTMs
    inputs = tf.keras.Input(shape=(None, ), dtype=tf.string, name="text_input")
    encoded_input = encode_text(inputs)
    x = tf.keras.layers.Embedding(len(encoder.get_vocabulary()), embedded_dim, mask_zero=True)(encoded_input)
    x = tf.keras.layers.SpatialDropout1D(0.5)(x)
    x = tf.keras.layers.LSTM(embedded_dim)(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    
    # Output layers
    # Archetypes
    innocent_out = tf.keras.layers.Dense(5, name="innocent_out", activation="softmax")(x)
    sage_out = tf.keras.layers.Dense(5, name="sage_out", activation="softmax")(x)
    explorer_out = tf.keras.layers.Dense(5, name="explorer_out", activation="softmax")(x)
    outlaw_out = tf.keras.layers.Dense(5, name="outlaw_out", activation="softmax")(x)
    magician_out = tf.keras.layers.Dense(5, name="magician_out", activation="softmax")(x)
    hero_out = tf.keras.layers.Dense(5, name="hero_out", activation="softmax")(x)
    lover_out = tf.keras.layers.Dense(5, name="lover_out", activation="softmax")(x)
    jester_out = tf.keras.layers.Dense(5, name="jester_out", activation="softmax")(x)
    everyman_out = tf.keras.layers.Dense(5, name="everyman_out", activation="softmax")(x)
    caregiver_out = tf.keras.layers.Dense(5, name="caregiver_out", activation="softmax")(x)
    ruler_out = tf.keras.layers.Dense(5, name="ruler_out", activation="softmax")(x)
    creator_out = tf.keras.layers.Dense(5, name="creator_out", activation="softmax")(x)
    # Traits
    dominant_out = tf.keras.layers.Dense(5, name="dominant_out", activation="softmax")(x)
    submissive_out = tf.keras.layers.Dense(5, name="submissive_out", activation="softmax")(x)
    maximalist_out = tf.keras.layers.Dense(5, name="maximalist_out", activation="softmax")(x)
    minimalist_out = tf.keras.layers.Dense(5, name="minimalist_out", activation="softmax")(x)
    inspiring_out = tf.keras.layers.Dense(5, name="inspiring_out", activation="softmax")(x)
    systematic_out = tf.keras.layers.Dense(5, name="systematic_out", activation="softmax")(x)
    discovering_out = tf.keras.layers.Dense(5, name="discovering_out", activation="softmax")(x)
    conservative_out = tf.keras.layers.Dense(5, name="conservative_out", activation="softmax")(x)
    verifying_out = tf.keras.layers.Dense(5, name="verifying_out", activation="softmax")(x)
    overlooking_out = tf.keras.layers.Dense(5, name="overlooking_out", activation="softmax")(x)
    sharpening_out = tf.keras.layers.Dense(5, name="sharpening_out", activation="softmax")(x)
    harmonic_out = tf.keras.layers.Dense(5, name="harmonic_out", activation="softmax")(x)
    empathic_out = tf.keras.layers.Dense(5, name="empathic_out", activation="softmax")(x)
    matter_of_fact_out = tf.keras.layers.Dense(5, name="matter_of_fact_out", activation="softmax")(x)
    brave_out = tf.keras.layers.Dense(5, name="brave_out", activation="softmax")(x)
    protective_out = tf.keras.layers.Dense(5, name="protective_out", activation="softmax")(x)
    generous_out = tf.keras.layers.Dense(5, name="generous_out", activation="softmax")(x)
    thrifty_out = tf.keras.layers.Dense(5, name="thrifty_out", activation="softmax")(x)
    favourable_out = tf.keras.layers.Dense(5, name="favourable_out", activation="softmax")(x)
    balanced_out = tf.keras.layers.Dense(5, name="balanced_out", activation="softmax")(x)
    sensuality_out = tf.keras.layers.Dense(5, name="sensuality_out", activation="softmax")(x)
    intelligent_out = tf.keras.layers.Dense(5, name="intelligent_out", activation="softmax")(x)
    believe_out = tf.keras.layers.Dense(5, name="believe_out", activation="softmax")(x)
    egocentric_out = tf.keras.layers.Dense(5, name="egocentric_out", activation="softmax")(x)
    allocentric_out = tf.keras.layers.Dense(5, name="allocentric_out", activation="softmax")(x)
    
    # Define full model
    model = tf.keras.Model(
        inputs=inputs,
        outputs=[
            innocent_out,
            sage_out,
            explorer_out,
            outlaw_out,
            magician_out,
            hero_out,
            lover_out,
            jester_out,
            everyman_out,
            caregiver_out,
            ruler_out,
            creator_out,
            dominant_out,
            submissive_out,
            maximalist_out,
            minimalist_out,
            inspiring_out,
            systematic_out,
            discovering_out,
            conservative_out,
            verifying_out,
            overlooking_out,
            sharpening_out,
            harmonic_out,
            empathic_out,
            matter_of_fact_out,
            brave_out,
            protective_out,
            generous_out,
            thrifty_out,
            favourable_out,
            balanced_out,
            sensuality_out,
            intelligent_out,
            believe_out,
            egocentric_out,
            allocentric_out
        ])
    
    return model

In [13]:
# Training callbacks
model = build_model(embedded_dim=100)

# Generate output list
output_list = "val_innocent_out_accuracy,val_sage_out_accuracy,val_explorer_out_accuracy,val_outlaw_out_accuracy,val_magician_out_accuracy,val_hero_out_accuracy,val_lover_out_accuracy,val_jester_out_accuracy,val_everyman_out_accuracy,val_caregiver_out_accuracy,val_ruler_out_accuracy,val_creator_out_accuracy,val_dominant_out_accuracy,val_submissive_out_accuracy,val_maximalist_out_accuracy,val_minimalist_out_accuracy,val_inspiring_out_accuracy,val_systematic_out_accuracy,val_discovering_out_accuracy,val_conservative_out_accuracy,val_verifying_out_accuracy,val_overlooking_out_accuracy,val_sharpening_out_accuracy,val_harmonic_out_accuracy,val_empathic_out_accuracy,val_matter_of_fact_out_accuracy,val_brave_out_accuracy,val_protective_out_accuracy,val_generous_out_accuracy,val_thrifty_out_accuracy,val_favourable_out_accuracy,val_balanced_out_accuracy,val_sensuality_out_accuracy,val_intelligent_out_accuracy,val_believe_out_accuracy,val_egocentric_out_accuracy,val_allocentric_out_accuracy".split(",")


# Plot graph
tf.keras.utils.plot_model(model, "my_model.png", show_shapes=True)

# Compile the model
# Compile the model
model.compile(loss={
                    "innocent_out": tf.keras.losses.CategoricalCrossentropy(),
                    "sage_out": tf.keras.losses.CategoricalCrossentropy(),
                    "explorer_out": tf.keras.losses.CategoricalCrossentropy(),
                    "outlaw_out": tf.keras.losses.CategoricalCrossentropy(),
                    "magician_out": tf.keras.losses.CategoricalCrossentropy(),
                    "hero_out": tf.keras.losses.CategoricalCrossentropy(),
                    "lover_out": tf.keras.losses.CategoricalCrossentropy(),
                    "jester_out": tf.keras.losses.CategoricalCrossentropy(),
                    "everyman_out": tf.keras.losses.CategoricalCrossentropy(),
                    "caregiver_out": tf.keras.losses.CategoricalCrossentropy(),
                    "ruler_out": tf.keras.losses.CategoricalCrossentropy(),
                    "creator_out": tf.keras.losses.CategoricalCrossentropy(),
                    "dominant_out": tf.keras.losses.CategoricalCrossentropy(),
                    "submissive_out": tf.keras.losses.CategoricalCrossentropy(),
                    "maximalist_out": tf.keras.losses.CategoricalCrossentropy(),
                    "minimalist_out": tf.keras.losses.CategoricalCrossentropy(),
                    "inspiring_out": tf.keras.losses.CategoricalCrossentropy(),
                    "systematic_out": tf.keras.losses.CategoricalCrossentropy(),
                    "discovering_out": tf.keras.losses.CategoricalCrossentropy(),
                    "conservative_out": tf.keras.losses.CategoricalCrossentropy(),
                    "verifying_out": tf.keras.losses.CategoricalCrossentropy(),
                    "overlooking_out": tf.keras.losses.CategoricalCrossentropy(),
                    "sharpening_out": tf.keras.losses.CategoricalCrossentropy(),
                    "harmonic_out": tf.keras.losses.CategoricalCrossentropy(),
                    "empathic_out": tf.keras.losses.CategoricalCrossentropy(),
                    "matter_of_fact_out": tf.keras.losses.CategoricalCrossentropy(),
                    "brave_out": tf.keras.losses.CategoricalCrossentropy(),
                    "protective_out": tf.keras.losses.CategoricalCrossentropy(),
                    "generous_out": tf.keras.losses.CategoricalCrossentropy(),
                    "thrifty_out": tf.keras.losses.CategoricalCrossentropy(),
                    "favourable_out": tf.keras.losses.CategoricalCrossentropy(),
                    "balanced_out": tf.keras.losses.CategoricalCrossentropy(),
                    "sensuality_out": tf.keras.losses.CategoricalCrossentropy(),
                    "intelligent_out": tf.keras.losses.CategoricalCrossentropy(),
                    "believe_out": tf.keras.losses.CategoricalCrossentropy(),
                    "egocentric_out": tf.keras.losses.CategoricalCrossentropy(),
                    "allocentric_out": tf.keras.losses.CategoricalCrossentropy()
                },
              optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-4, rho=0.9, momentum=0.1, epsilon=1e-7, centered=False),
              loss_weights={
                    "innocent_out": 1.0,
                    "sage_out": 1.0,
                    "explorer_out": 1.0,
                    "outlaw_out": 1.0,
                    "magician_out": 1.0,
                    "hero_out": 1.0,
                    "lover_out": 1.0,
                    "jester_out": 1.0,
                    "everyman_out": 1.0,
                    "caregiver_out": 1.0,
                    "ruler_out": 1.0,
                    "creator_out": 1.0,
                    "dominant_out": 1.0,
                    "submissive_out": 1.0,
                    "maximalist_out": 1.0,
                    "minimalist_out": 1.0,
                    "inspiring_out": 1.0,
                    "systematic_out": 1.0,
                    "discovering_out": 1.0,
                    "conservative_out": 1.0,
                    "verifying_out": 1.0,
                    "overlooking_out": 1.0,
                    "sharpening_out": 1.0,
                    "harmonic_out": 1.0,
                    "empathic_out": 1.0,
                    "matter_of_fact_out": 1.0,
                    "brave_out": 1.0,
                    "protective_out": 1.0,
                    "generous_out": 1.0,
                    "thrifty_out": 1.0,
                    "favourable_out": 1.0,
                    "balanced_out": 1.0,
                    "sensuality_out": 1.0,
                    "intelligent_out": 1.0,
                    "believe_out": 1.0,
                    "egocentric_out": 1.0,
                    "allocentric_out": 1.0
                },
              metrics={
                    "innocent_out": "accuracy",
                    "sage_out": "accuracy",
                    "explorer_out": "accuracy",
                    "outlaw_out": "accuracy",
                    "magician_out": "accuracy",
                    "hero_out": "accuracy",
                    "lover_out": "accuracy",
                    "jester_out": "accuracy",
                    "everyman_out": "accuracy",
                    "caregiver_out": "accuracy",
                    "ruler_out": "accuracy",
                    "creator_out": "accuracy",
                    "dominant_out": "accuracy",
                    "submissive_out": "accuracy",
                    "maximalist_out": "accuracy",
                    "minimalist_out": "accuracy",
                    "inspiring_out": "accuracy",
                    "systematic_out": "accuracy",
                    "discovering_out": "accuracy",
                    "conservative_out": "accuracy",
                    "verifying_out": "accuracy",
                    "overlooking_out": "accuracy",
                    "sharpening_out": "accuracy",
                    "harmonic_out": "accuracy",
                    "empathic_out": "accuracy",
                    "matter_of_fact_out": "accuracy",
                    "brave_out": "accuracy",
                    "protective_out": "accuracy",
                    "generous_out": "accuracy",
                    "thrifty_out": "accuracy",
                    "favourable_out": "accuracy",
                    "balanced_out": "accuracy",
                    "sensuality_out": "accuracy",
                    "intelligent_out": "accuracy",
                    "believe_out": "accuracy",
                    "egocentric_out": "accuracy",
                    "allocentric_out": "accuracy"
                })

early_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_y_accuracy', min_delta=0, patience=10, verbose=1,
    mode='auto', baseline=None, restore_best_weights=False
)

model_path = os.path.dirname("train_test_instagram/best-model-{acc_int:03d}.h5")

def save_aggregated_model(epoch, logs):
    relevantAcc = output_list
    accuracies = [logs[k] for k in relevantAcc]
    avg_acc = sum(accuracies) / len(relevantAcc)
    current_best = logs.get("val_y_accuracy")
    if current_best is None:
        current_best = 0.0
    print(f"Average Accuracies: {avg_acc}")
    logs["val_y_accuracy"] = avg_acc
    acc_int = round(avg_acc*100)
    if np.less(current_best, avg_acc):
        print(f"Old best accuracy: {current_best}, new best: {avg_acc}")
        model.save(filepath=model_path)

model_save_callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=save_aggregated_model)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs_single_model")
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_y_accuracy', factor=0.2,
                              patience=5, min_lr=1e-7)
with tf.device("/GPU:0"):
    history = model.fit(X_train, y_train, epochs=100,
                        batch_size=10,
                        validation_data=(X_test, y_test),
                        verbose=1,
                        callbacks=[model_save_callback,
                                   tensorboard_callback,
                                   reduce_lr,
                                   early_callback])

Epoch 1/100
Average Accuracies: 0.340752526312261




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 2/100
Average Accuracies: 0.39745628712950526




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 3/100
Average Accuracies: 0.41653418943688675




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 4/100
Average Accuracies: 0.4382618001183948




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 5/100
Average Accuracies: 0.4443561297816199




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 6/100
Average Accuracies: 0.452040284469321




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 7/100
Average Accuracies: 0.4485956635829565




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 8/100
Average Accuracies: 0.45786964289239934




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 9/100
Average Accuracies: 0.4583995845672247




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 10/100
Average Accuracies: 0.4586645554046373




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 11/100
Average Accuracies: 0.45786964289239934




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 12/100
Average Accuracies: 0.45839958496995875




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 13/100
Average Accuracies: 0.4607843225066726




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 14/100
Average Accuracies: 0.4639639725556245




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 15/100
Average Accuracies: 0.4639639725556245




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 16/100
Average Accuracies: 0.4636990017182118




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 17/100
Average Accuracies: 0.4636990017182118




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 18/100
Average Accuracies: 0.45998940999443466




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 19/100
Average Accuracies: 0.4636990017182118




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 20/100
Average Accuracies: 0.4607843225066726




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 21/100
Average Accuracies: 0.4626391183685612




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 22/100
Average Accuracies: 0.46290408920597387




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 23/100
Average Accuracies: 0.4626391183685612




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 24/100
Average Accuracies: 0.4626391183685612




INFO:tensorflow:Assets written to: train_test_instagram/assets


INFO:tensorflow:Assets written to: train_test_instagram/assets


Epoch 00024: early stopping


In [16]:
# Evaluate results
test_results = model.evaluate(X_test, y_test)

print('Average accuracy on test set:', test_results[-1])

Average accuracy on test set: 0.46078431606292725


In [17]:
print(model.predict(np.array([" ".join(clean_post("""Czy można jeść słodycze podczas odchudzania?🍭⁣⁣
⁣⁣
Cześć!:)⁣⁣
⁣⁣
Często można zauważyć podejście u osób, które się odchudzają, że odrzucają oni w 100% cukier albo propagują oni, że jednym słusznym podejściem jest eliminacja wszystkich przyjemności. 🍩⁣⁣
⁣⁣
Z drugiej strony, czasami widzimy osoby, które propagują, że można spożywać wszystko, tylko wystarczy liczyć kalorię i to schudniemy.🍫 ⁣⁣
⁣⁣
📍 Natomiast dzisiaj, dzielę się z Tobą moim sposobem, który u mnie działa oraz u moich podopiecznych. Podchodzę do tego w taki sposób, że większość mojego jadłospisu, jest bazowanie na zdrowych produktach, natomiast, jeżeli, uzupełnimy wszystko, co potrzebne jest dla naszego ciała do prawidłowego funkcjonowania, to wtedy wdrażam, te produkty rekreacyjne, które sprawiają mi przyjemność. ⁣⁣
⁣⁣
🤓 Uważam, że jest to ważne, żeby mieć satysfakcję z naszej diety, bo to pozwoli mi, wytrwać na niej o wiele dłużej, niż jakbym miał jakieś restrykcje.⁣⁣
⁣⁣
Oczywiście, jest to schemat, który nie pasuję dla każdego.⁣⁣
⁣⁣
I zawsze, gdy ktoś pyta się mnie, czy można jeść słodycze, zadaję pewne pytania.⁣⁣
⁣⁣
➡️ Czy mieści się to w ujemnym bilansie kalorycznym?⁣⁣
➡️ Czy nawet jak się mieści, spełniasz całe zapotrzebowanie na witaminy, błonnik czy składniki mineralne?⁣⁣
➡️ Czy po zjedzeniu tej słodkości, nie sięgniesz po więcej i przez to przekroczysz swoje zapotrzebowanie?⁣⁣
⁣⁣
⁣⁣
🎥 Nagrałem o tym materiał, w którym, właśnie szerzej omawiam ten temat, także zachęcam sprawdzić.⁣⁣
Link znajdziesz w bio!⁣⁣
⁣⁣
Pozdrawiam i życzę miłego dnia! ❤️⁣⁣
⁣⁣
PS. Jestem ciekaw, czy jeżeli, mógł/a byś wybrać jakiś produkt, to byłby na słodko, czy słono?:)🍕🍦⁣⁣
⁣⁣
#śniadanie #zdrowejedzenie #fit #kawa #goodmorning #wiemcojem #jedzenie #sniadanie #yummy #dieta #poland #zdrowo #coffee #dziendobry #delicious #morning #sniadaniemistrzow #instagood #foodie #warsaw #odchudzanie #owsianka #breakfasttime #eggs #czystamicha #tasty #obiad #healthybreakfast #redukcja
"""
))])))

[array([[0.08234137, 0.09427559, 0.27826136, 0.45477778, 0.09034394]],
      dtype=float32), array([[0.08405096, 0.13923657, 0.38638055, 0.2821158 , 0.10821613]],
      dtype=float32), array([[0.13480206, 0.16894606, 0.3724491 , 0.21610977, 0.10769295]],
      dtype=float32), array([[0.17977378, 0.23488882, 0.42693394, 0.10077747, 0.05762597]],
      dtype=float32), array([[0.06854977, 0.16648166, 0.45472577, 0.25227427, 0.05796852]],
      dtype=float32), array([[0.14867997, 0.21711433, 0.45418668, 0.13493356, 0.0450854 ]],
      dtype=float32), array([[0.15380305, 0.13802584, 0.35841367, 0.26325595, 0.08650151]],
      dtype=float32), array([[0.09846382, 0.21008976, 0.4233267 , 0.19262315, 0.07549662]],
      dtype=float32), array([[0.04653741, 0.08956078, 0.27165723, 0.42724183, 0.16500278]],
      dtype=float32), array([[0.066985  , 0.14898935, 0.3753875 , 0.2777323 , 0.13090576]],
      dtype=float32), array([[0.14030604, 0.16317062, 0.43497688, 0.16985701, 0.09168945]],
      dty

In [19]:
test_model = tf.keras.models.load_model("train_test_instagram/")
new_test_results = test_model.evaluate(X_test, y_test)
print('Average accuracy on test set - loaded model:', new_test_results[-1])


Average accuracy on test set - loaded model: 0.46078431606292725
