In [1]:
# Associate words with archetypes/character traits as intermediate layer
# and with influencer as the "last" layer

# Dependencies
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy
import os
import toml
import re
import itertools
from text_cleaner import *
import operator
from collections import Counter
import pickle

def extract_hashtags(post_text):
    HASH_RE = re.compile(r"\#\w+")
    out_list = re.findall(HASH_RE, post_text)
    return out_list

## AGDS

In [2]:
# Load the .csv with archetypes
arch_df = pd.read_csv('archetypes_pl.csv', index_col=0)

# Save the order of columns
trait_list = arch_df.columns.tolist()

# Show the table header and column list
print(trait_list)
arch_df.head()

['innocent', 'sage', 'explorer', 'outlaw', 'magician', 'hero', 'lover', 'jester', 'everyman', 'caregiver', 'ruler', 'creator', 'dominant', 'submissive', 'maximalist', 'minimalist', 'inspiring', 'systematic', 'discovering', 'conservative', 'verifying', 'overlooking', 'sharpening', 'harmonic', 'empathic', 'matter_of_fact', 'brave', 'protective', 'generous', 'thrifty', 'favourable', 'balanced', 'sensuality', 'intelligent', 'believe', 'egocentric', 'allocentric']


Unnamed: 0_level_0,innocent,sage,explorer,outlaw,magician,hero,lover,jester,everyman,caregiver,...,protective,generous,thrifty,favourable,balanced,sensuality,intelligent,believe,egocentric,allocentric
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
marek_grodzki,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,2.0,3.0,4.0,4.0,3.0,4.0,4.0,3.0,0.0,0.0
vege_style_life,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,4.0,4.0,3.0,3.0,3.0,2.0,0.0,3.0
oliwka__2007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,2.0,2.0,0.0,3.0,1.0,2.0,4.0,1.0,0.0,3.0
z_przestrzeni_serca,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,3.0,0.0,4.0,4.0,3.0,4.0,4.0,0.0,1.0
zaradne_warsztaty,3.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,3.0,4.0,...,3.0,4.0,0.0,2.0,2.0,4.0,2.0,3.0,1.0,3.0


In [3]:
# Table preprocessing - replace all NaN with 2 (Unrelated/Don't know class), replace 0-5 values with the ones in range -1.0 - 1.0
arch_df = arch_df.fillna(2)

# Remove duplicated annotations, to exclude conflicting entries
arch_df = arch_df[~arch_df.index.duplicated(keep='first')]

# Print the head of the dataset after modification
arch_df.head()

Unnamed: 0_level_0,innocent,sage,explorer,outlaw,magician,hero,lover,jester,everyman,caregiver,...,protective,generous,thrifty,favourable,balanced,sensuality,intelligent,believe,egocentric,allocentric
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
marek_grodzki,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,2.0,3.0,4.0,4.0,3.0,4.0,4.0,3.0,0.0,0.0
vege_style_life,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,4.0,4.0,3.0,3.0,3.0,2.0,0.0,3.0
oliwka__2007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,2.0,2.0,0.0,3.0,1.0,2.0,4.0,1.0,0.0,3.0
z_przestrzeni_serca,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,3.0,0.0,4.0,4.0,3.0,4.0,4.0,0.0,1.0
zaradne_warsztaty,3.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,3.0,4.0,...,3.0,4.0,0.0,2.0,2.0,4.0,2.0,3.0,1.0,3.0


In [4]:
# Check if a user has a non-empty directory in the dataset, otherwise delete the user from the list
available_arch_df = copy.deepcopy(arch_df)
posts = []

BASE_DIR = "instagram_cleared"

# Iterate over whole DataFrame
for i, row in tqdm(arch_df.iterrows()):
    profile_posts = []
    profile_hashtags = []
    
    # Get all posts per profile
    profile_path = os.path.join(BASE_DIR, i)
    for file in os.listdir(profile_path):
        if not file.endswith(".toml"):
            with open(os.path.join(profile_path, file), "r") as post_f:
                read_text = post_f.read()
                profile_posts.append(remove_stopwords(clean_up_text(read_text)))
                profile_hashtags.append(extract_hashtags(read_text))

    # Merge lists - a single list for a single influencer
    profile_hashtags = list(itertools.chain.from_iterable(profile_hashtags))
    posts.append(list(itertools.chain.from_iterable([profile_posts, [profile_hashtags]])))

508it [00:11, 43.83it/s]


In [5]:
# Map usernames to indices
users = list(arch_df.index.values)
user_indices = {k: users.index(k) for k in users}

In [6]:
# Load the required pickles
with open("softmax_word_trait_array.pickle", "rb") as f:
    word_df = pickle.load(f)

# Word map - to easily create output vectors
word_map = word_df.columns.tolist()

In [7]:
from scipy.special import softmax

def get_trait_dot_product(post_text: str, word_map: list, word_dataframe: pd.DataFrame) -> list:
    # Filter out the text
    filtered_post = remove_stopwords(clean_up_text(post_text))
    filtered_post += extract_hashtags(post_text)
    
    # Create a vector for dot product vector
    post_vector = [0] * len(word_map)
    
    # Calculate word occurrences
    word_ctr = Counter(filtered_post)
    
    for word, freq in word_ctr.items():
        if word in word_map:
            post_vector[word_map.index(word)] = freq
    
    # Calculate dot product for a given text
    word_dot = word_dataframe.dot(post_vector)
    
    out_vec = pd.Series()
    for trait in trait_list:
        out_vec = out_vec.append(pd.Series([np.argmax(softmax(word_dot.loc[trait]))], index=[trait]))
    
    return out_vec

In [8]:
# Read the table from file
new_arch_df = pd.read_csv("influencer_recalc.csv", header=0, index_col=0)

In [9]:
# Method for calculating the dot product of trait <-> influencer relation
def get_influencer_dot_product(trait_output: list, influencer_dataframe: pd.DataFrame) -> pd.DataFrame:
    return influencer_dataframe.dot(trait_output)

# Method for calculating the similarity
def calculate_similarity(post_text: str, 
                         word_map: list, 
                         word_dataframe: pd.DataFrame,
                         influencer_dataframe: pd.DataFrame) -> pd.DataFrame:
    
    # Calculate word-trait dot product
    post_result = get_trait_dot_product(post_text, word_map, word_dataframe)
    
    # Calculate trait-influencer dot product
    inf_dot_product = get_influencer_dot_product(post_result, influencer_dataframe)

    # Get the sum of influencer traits
    influencer_sum = influencer_dataframe.sum(axis=1)
    
    # Normalize the results
    inf_dot_product = inf_dot_product.divide(influencer_sum)
    
    # Generate new dataframe - one row per influencer
    inf_df = pd.Series(index=influencer_dataframe.index)
    
    # Replace all data in temporary df with calculated post result
    for idx in inf_df.index:
        inf_df.loc[idx] = np.linalg.norm(influencer_dataframe.loc[idx] - post_result)
    
    return inf_df

In [10]:
# Trait accuracy - round the results
def natural_round(x: float) -> int:
    out = int(x // 1)
    return out + 1 if (x - out) >= 0.5 else out

def accuracy_per_trait(input_vector: pd.Series, annotated_vector: pd.Series) -> np.array:
    out_array = np.array([0] * 37, dtype=np.int)
    for i in range(len(out_array)):
        if natural_round(input_vector[i]) == annotated_vector[i]:
            out_array[i] = 1
    return out_array

In [11]:
pbar = tqdm(arch_df.iterrows())
accuracy = 0

# Out accuracy vector
total_accuracy = np.array([0] * 37, dtype=np.int)

for idx, row in pbar:
    user_text = list(itertools.chain.from_iterable(posts[users.index(idx)]))
    user_text = " ".join(user_text)
    sim_output = get_trait_dot_product(user_text, word_map, word_df)
    user_accuracy = accuracy_per_trait(sim_output, row)
    total_accuracy += user_accuracy
    pbar.set_description(f"Average accuracy: {round(np.mean(np.divide(total_accuracy, users.index(idx)+1))*100, 2)}")

  out_vec = pd.Series()
Average accuracy: 49.56: : 508it [13:31,  1.60s/it]


In [12]:
# Show total accuracy
scaled_accuracy = np.divide(total_accuracy, len(arch_df))
avg_accuracy = np.mean(scaled_accuracy)

print("--- ACCURACY ON TRAINING DATASET ---")

print(f"Average accuracy: {round(avg_accuracy*100, 2)}%")
print("Accuracy per trait:")
for i in range(len(trait_list)):
    print(f"{trait_list[i]}: {round(scaled_accuracy[i] * 100, 2)}%")

--- ACCURACY ON TRAINING DATASET ---
Average accuracy: 49.56%
Accuracy per trait:
innocent: 47.05%
sage: 44.49%
explorer: 43.7%
outlaw: 43.11%
magician: 37.2%
hero: 49.8%
lover: 57.48%
jester: 49.41%
everyman: 27.17%
caregiver: 46.26%
ruler: 48.23%
creator: 40.55%
dominant: 53.54%
submissive: 58.86%
maximalist: 42.13%
minimalist: 54.33%
inspiring: 42.32%
systematic: 54.92%
discovering: 54.92%
conservative: 46.65%
verifying: 42.91%
overlooking: 44.09%
sharpening: 68.9%
harmonic: 51.18%
empathic: 49.02%
matter_of_fact: 54.92%
brave: 68.5%
protective: 61.61%
generous: 34.45%
thrifty: 66.93%
favourable: 69.09%
balanced: 40.75%
sensuality: 48.23%
intelligent: 36.22%
believe: 56.69%
egocentric: 51.18%
allocentric: 47.05%


## AGDS - accuracy on test dataset

In [13]:
# Load the .csv with archetypes
arch_df = pd.read_csv('test_archetypes_pl.csv', index_col=0)

# Save the order of columns
trait_list = arch_df.columns.tolist()

# Show the table header and column list
print(trait_list)
arch_df.head()

# Table preprocessing - replace all NaN with 2 (Unrelated/Don't know class), replace 0-5 values with the ones in range -1.0 - 1.0
arch_df = arch_df.fillna(2)

# Remove duplicated annotations, to exclude conflicting entries
arch_df = arch_df[~arch_df.index.duplicated(keep='first')]

# Print the head of the dataset after modification
arch_df.head()

# Check if a user has a non-empty directory in the dataset, otherwise delete the user from the list
available_arch_df = copy.deepcopy(arch_df)
posts = []

BASE_DIR = "instagram_cleared"

# Iterate over whole DataFrame
for i, row in tqdm(arch_df.iterrows()):
    profile_posts = []
    profile_hashtags = []
    
    # Get all posts per profile
    profile_path = os.path.join(BASE_DIR, i)
    for file in os.listdir(profile_path):
        if not file.endswith(".toml"):
            with open(os.path.join(profile_path, file), "r") as post_f:
                read_text = post_f.read()
                profile_posts.append(remove_stopwords(clean_up_text(read_text)))
                profile_hashtags.append(extract_hashtags(read_text))

    # Merge lists - a single list for a single influencer
    profile_hashtags = list(itertools.chain.from_iterable(profile_hashtags))
    posts.append(list(itertools.chain.from_iterable([profile_posts, [profile_hashtags]])))
    
# Map usernames to indices
users = list(arch_df.index.values)
user_indices = {k: users.index(k) for k in users}

pbar = tqdm(arch_df.iterrows())
accuracy = 0

# Out accuracy vector
test_total_accuracy = np.array([0] * 37, dtype=np.int)

for idx, row in pbar:
    user_text = list(itertools.chain.from_iterable(posts[users.index(idx)]))
    user_text = " ".join(user_text)
    sim_output = get_trait_dot_product(user_text, word_map, word_df)
    user_accuracy = accuracy_per_trait(sim_output, row)
    test_total_accuracy += user_accuracy
    pbar.set_description(f"Average test dataset accuracy: {round(np.mean(np.divide(test_total_accuracy, users.index(idx)+1))*100, 2)}")
    
# Show total accuracy
scaled_test_accuracy = np.divide(test_total_accuracy, len(arch_df))
avg_test_accuracy = np.mean(scaled_test_accuracy)

print("--- ACCURACY ON TESTING DATASET ---")

print(f"Average test dataset accuracy: {round(avg_test_accuracy*100, 2)}%")
print("Accuracy per trait:")
for i in range(len(trait_list)):
    print(f"{trait_list[i]}: {round(scaled_test_accuracy[i] * 100, 2)}%")

4it [00:00, 35.61it/s]

['innocent', 'sage', 'explorer', 'outlaw', 'magician', 'hero', 'lover', 'jester', 'everyman', 'caregiver', 'ruler', 'creator', 'dominant', 'submissive', 'maximalist', 'minimalist', 'inspiring', 'systematic', 'discovering', 'conservative', 'verifying', 'overlooking', 'sharpening', 'harmonic', 'empathic', 'matter_of_fact', 'brave', 'protective', 'generous', 'thrifty', 'favourable', 'balanced', 'sensuality', 'intelligent', 'believe', 'egocentric', 'allocentric']


177it [00:04, 42.56it/s]
  out_vec = pd.Series()
Average test dataset accuracy: 17.12: : 177it [05:06,  1.73s/it]

--- ACCURACY ON TESTING DATASET ---
Average test dataset accuracy: 17.12%
Accuracy per trait:
innocent: 19.77%
sage: 18.64%
explorer: 12.43%
outlaw: 11.3%
magician: 13.56%
hero: 29.38%
lover: 22.03%
jester: 27.68%
everyman: 13.56%
caregiver: 12.43%
ruler: 23.16%
creator: 14.12%
dominant: 7.91%
submissive: 9.6%
maximalist: 9.04%
minimalist: 6.78%
inspiring: 12.43%
systematic: 18.64%
discovering: 26.55%
conservative: 15.25%
verifying: 7.91%
overlooking: 5.08%
sharpening: 22.03%
harmonic: 17.51%
empathic: 24.86%
matter_of_fact: 19.77%
brave: 44.07%
protective: 12.99%
generous: 16.38%
thrifty: 30.51%
favourable: 21.47%
balanced: 11.3%
sensuality: 15.82%
intelligent: 11.86%
believe: 22.6%
egocentric: 14.12%
allocentric: 10.73%





## AI - Recurrent Neural Network (LSTM) - accuracy on training dataset

In [14]:
# AI model comparison
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])
    
# Dataset preparation + text preprocessing
def clean_post(src_text: str) -> str:
    # Extract posts and hashtags
    extracted_text = remove_stopwords(clean_up_text(src_text))
    extracted_hashtags = extract_hashtags(src_text)
    return extracted_text + extracted_hashtags

def generate_dataset(dataset_df: pd.DataFrame) -> pd.DataFrame:
    out_dataset = pd.DataFrame(columns=["text"] + list(dataset_df.columns))
    # Iterate over whole DataFrame
    for i, row in tqdm(dataset_df.iterrows()):
        trait_row = copy.deepcopy(row)
        profile_posts = []

        # Get all posts per profile
        profile_path = os.path.join(BASE_DIR, i)
        for file in os.listdir(profile_path):
            if not file.endswith(".toml"):
                with open(os.path.join(profile_path, file), "r") as post_f:
                    read_text = post_f.read()
                    profile_posts.extend(clean_post(read_text))
        trait_row["text"] = " ".join(profile_posts)
        out_dataset = out_dataset.append(trait_row)
    out_dataset = out_dataset.reset_index(drop=True)
    return out_dataset

def format_labels(data_set):
    return (tf.keras.utils.to_categorical(np.array(data_set.pop("innocent")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("sage")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("explorer")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("outlaw")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("magician")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("hero")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("lover")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("jester")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("everyman")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("caregiver")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("ruler")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("creator")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("dominant")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("submissive")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("maximalist")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("minimalist")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("inspiring")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("systematic")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("discovering")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("conservative")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("verifying")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("overlooking")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("sharpening")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("harmonic")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("empathic")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("matter_of_fact")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("brave")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("protective")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("generous")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("thrifty")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("favourable")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("balanced")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("sensuality")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("intelligent")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("believe")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("egocentric")), num_classes=5),
    tf.keras.utils.to_categorical(np.array(data_set.pop("allocentric")), num_classes=5))

In [15]:
# Data preparation

# Load the .csv with archetypes
arch_df = pd.read_csv('archetypes_pl.csv', index_col=0)

# Save the order of columns
trait_list = arch_df.columns.tolist()

# Show the table header and column list
print(trait_list)
arch_df.head()

# Table preprocessing - replace all NaN with 2 (Unrelated/Don't know class), replace 0-5 values with the ones in range -1.0 - 1.0
arch_df = arch_df.fillna(2.0)

# Remove duplicated annotations, to exclude conflicting entries
arch_df = arch_df[~arch_df.index.duplicated(keep='first')]

# Print the head of the dataset after modification
arch_df.head()

# Create pandas-like dataset
BASE_DIR = "instagram_cleared"

# Initialize text dataset DataFrames
train_dataset = generate_dataset(arch_df)

# Create X,y sets
X_train = train_dataset.pop("text")
y_train = format_labels(train_dataset)

4it [00:00, 32.26it/s]

['innocent', 'sage', 'explorer', 'outlaw', 'magician', 'hero', 'lover', 'jester', 'everyman', 'caregiver', 'ruler', 'creator', 'dominant', 'submissive', 'maximalist', 'minimalist', 'inspiring', 'systematic', 'discovering', 'conservative', 'verifying', 'overlooking', 'sharpening', 'harmonic', 'empathic', 'matter_of_fact', 'brave', 'protective', 'generous', 'thrifty', 'favourable', 'balanced', 'sensuality', 'intelligent', 'believe', 'egocentric', 'allocentric']


508it [00:14, 34.06it/s]


In [16]:
test_model = tf.keras.models.load_model("train_test_instagram/")
new_train_results = test_model.evaluate(X_train, y_train)

print("--- ACCURACY ON TRAINING DATASET ---")

print('Average accuracy on training set - loaded model:', np.mean(new_train_results[-37:]))

--- ACCURACY ON TRAINING DATASET ---
Average accuracy on training set - loaded model: 0.444988293422235


## AI - Recurrent Neural Network (LSTM) - accuracy on test dataset

In [17]:
# Load the .csv with archetypes
arch_df = pd.read_csv('test_archetypes_pl.csv', index_col=0)

# Save the order of columns
trait_list = arch_df.columns.tolist()

# Show the table header and column list
print(trait_list)
arch_df.head()

# Table preprocessing - replace all NaN with 2 (Unrelated/Don't know class), replace 0-5 values with the ones in range -1.0 - 1.0
arch_df = arch_df.fillna(2.0)

# Remove duplicated annotations, to exclude conflicting entries
arch_df = arch_df[~arch_df.index.duplicated(keep='first')]

# Print the head of the dataset after modification
arch_df.head()

# Create pandas-like dataset
BASE_DIR = "instagram_cleared"

# Initialize text dataset DataFrames
test_dataset = generate_dataset(arch_df)

# Create X,y sets
X_test = test_dataset.pop("text")
y_test = format_labels(test_dataset)

0it [00:00, ?it/s]

['innocent', 'sage', 'explorer', 'outlaw', 'magician', 'hero', 'lover', 'jester', 'everyman', 'caregiver', 'ruler', 'creator', 'dominant', 'submissive', 'maximalist', 'minimalist', 'inspiring', 'systematic', 'discovering', 'conservative', 'verifying', 'overlooking', 'sharpening', 'harmonic', 'empathic', 'matter_of_fact', 'brave', 'protective', 'generous', 'thrifty', 'favourable', 'balanced', 'sensuality', 'intelligent', 'believe', 'egocentric', 'allocentric']


177it [00:05, 33.68it/s]


In [18]:
new_test_results = test_model.evaluate(X_test, y_test)

print("--- ACCURACY ON TEST DATASET ---")

print('Average accuracy on test dataset - loaded model:', np.mean(new_test_results[-37:]))

--- ACCURACY ON TEST DATASET ---
Average accuracy on test dataset - loaded model: 0.5167201066742072
