In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, LSTM, GRU, Input, concatenate
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix

2025-05-02 17:16:09.518781: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746206169.778416      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746206169.863536      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
df = pd.read_csv("/kaggle/input/mypersonality/mypersonality_with_sentiment.csv", encoding='ISO-8859-1')

df['TRANSITIVITY'] = df['TRANSITIVITY'].fillna('mean')

In [3]:
import re
def clean_text(text):
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)  # Remove URLs
    text = re.sub(r"[^a-zA-Z]", " ", text)  # Remove special characters and numbers
    text = re.sub(r"\s+", " ", text)  # Remove extra whitespaces
    return text.lower().strip()

In [4]:
df['cleaned_STATUS'] = df['STATUS'].apply(clean_text)

In [5]:
# Step 3: Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['cleaned_STATUS'])

# Convert text to sequences of integers
sequences = tokenizer.texts_to_sequences(df['cleaned_STATUS'])

In [6]:
# Step 4: Padding (ensure uniform length)
max_len = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

In [7]:
label_encoder = LabelEncoder()
y = df[['cEXT', 'cNEU', 'cAGR', 'cCON', 'cOPN']]
y_encoded = y.apply(label_encoder.fit_transform)
y = y_encoded

y

Unnamed: 0,cEXT,cNEU,cAGR,cCON,cOPN
0,0,1,0,0,1
1,0,1,0,0,1
2,0,1,0,0,1
3,0,1,0,0,1
4,0,1,0,0,1
...,...,...,...,...,...
9912,0,0,1,0,1
9913,1,1,1,1,1
9914,1,1,1,1,1
9915,0,1,0,0,1


In [8]:
tf_vectorizer = TfidfVectorizer(use_idf=False, norm=None)
tf_embeddings = tf_vectorizer.fit_transform(df['cleaned_STATUS']).toarray()
df['tf_embedding'] = list(tf_embeddings)
print(df)

                               #AUTHID  \
0     b7b7764cfa1c523e4e93ab2a79a946c4   
1     b7b7764cfa1c523e4e93ab2a79a946c4   
2     b7b7764cfa1c523e4e93ab2a79a946c4   
3     b7b7764cfa1c523e4e93ab2a79a946c4   
4     b7b7764cfa1c523e4e93ab2a79a946c4   
...                                ...   
9912  deb899e426c1a5c66c24eeb0d7df6257   
9913  ea28a927cb6663480ea33ca917c3c8ba   
9914  ea28a927cb6663480ea33ca917c3c8ba   
9915  5532642937eb3497a43e15dbb23a9d2d   
9916  a286b7286b1247d4a7851709e9f31e1e   

                                                 STATUS  sEXT  sNEU  sAGR  \
0                           likes the sound of thunder.  2.65  3.00  3.15   
1     is so sleepy it's not even funny that's she ca...  2.65  3.00  3.15   
2     is sore and wants the knot of muscles at the b...  2.65  3.00  3.15   
3            likes how the day sounds in this new song.  2.65  3.00  3.15   
4                                           is home. <3  2.65  3.00  3.15   
...                              

In [9]:
X = np.array(df['tf_embedding'].tolist())

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
#Logistic Regression
# Train individual base classifiers
num_traits = y.shape[1]
base_predictions_LR = np.zeros((X_test.shape[0], num_traits))
for i, trait in enumerate(y_train.columns):
    print(f"\nTraining Logistic Regression for {trait}...")

    # Logistic Regression Model
    lr_model = LogisticRegression(max_iter=500, random_state=42)

    # Access the i-th column of y_train using .iloc
    lr_model.fit(X_train, y_train.iloc[:, i])

    # Store probability predictions
    base_predictions_LR[:, i] = lr_model.predict_proba(X_test)[:, 1]

    # Predict class labels
    y_pred = lr_model.predict(X_test)

    # Print performance metrics
    print(f"LR Accuracy for {trait}: {accuracy_score(y_test.iloc[:, i], y_pred):.4f}")
    print(f"Classification Report for Trait {trait}:\n", classification_report(y_test.iloc[:, i], y_pred))
    print(confusion_matrix(y_test.iloc[:, i],y_pred))


Training Logistic Regression for cEXT...
LR Accuracy for cEXT: 0.5963
Classification Report for Trait cEXT:
               precision    recall  f1-score   support

           0       0.62      0.74      0.68      1131
           1       0.54      0.41      0.47       853

    accuracy                           0.60      1984
   macro avg       0.58      0.57      0.57      1984
weighted avg       0.59      0.60      0.59      1984

[[834 297]
 [504 349]]

Training Logistic Regression for cNEU...
LR Accuracy for cNEU: 0.6351
Classification Report for Trait cNEU:
               precision    recall  f1-score   support

           0       0.67      0.82      0.74      1255
           1       0.51      0.31      0.39       729

    accuracy                           0.64      1984
   macro avg       0.59      0.57      0.56      1984
weighted avg       0.61      0.64      0.61      1984

[[1031  224]
 [ 500  229]]

Training Logistic Regression for cAGR...
LR Accuracy for cAGR: 0.5902
Class

In [12]:
tf_vectorizer = TfidfVectorizer(norm='l2')
tfidf_embeddings = tf_vectorizer.fit_transform(df['cleaned_STATUS']).toarray()
df['tfidf_embedding'] = list(tfidf_embeddings)
print(df)

                               #AUTHID  \
0     b7b7764cfa1c523e4e93ab2a79a946c4   
1     b7b7764cfa1c523e4e93ab2a79a946c4   
2     b7b7764cfa1c523e4e93ab2a79a946c4   
3     b7b7764cfa1c523e4e93ab2a79a946c4   
4     b7b7764cfa1c523e4e93ab2a79a946c4   
...                                ...   
9912  deb899e426c1a5c66c24eeb0d7df6257   
9913  ea28a927cb6663480ea33ca917c3c8ba   
9914  ea28a927cb6663480ea33ca917c3c8ba   
9915  5532642937eb3497a43e15dbb23a9d2d   
9916  a286b7286b1247d4a7851709e9f31e1e   

                                                 STATUS  sEXT  sNEU  sAGR  \
0                           likes the sound of thunder.  2.65  3.00  3.15   
1     is so sleepy it's not even funny that's she ca...  2.65  3.00  3.15   
2     is sore and wants the knot of muscles at the b...  2.65  3.00  3.15   
3            likes how the day sounds in this new song.  2.65  3.00  3.15   
4                                           is home. <3  2.65  3.00  3.15   
...                              

In [13]:
X = np.array(df['tfidf_embedding'].tolist())
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

num_traits = y.shape[1]
base_predictions_SVM = np.zeros((X_test.shape[0], num_traits))
# Train individual base classifiers
for i, trait in enumerate(y.columns):
    print(f"\nTraining Support Vector Machine for {trait}...")

    # SVC Model
    svm_model = SVC(probability = True,max_iter=500, random_state=42)

    # Access the i-th column of y_train using .iloc
    svm_model.fit(X_train, y_train.iloc[:, i])

    # Store probability predictions
    base_predictions_SVM[:, i] =svm_model.predict_proba(X_test)[:, 1]

    # Predict class labels
    y_pred = svm_model.predict(X_test)

    # Print performance metrics
    print(f"SVM Accuracy for {trait}: {accuracy_score(y_test.iloc[:, i], y_pred):.4f}")
    print(f"Classification Report for Trait {trait}:\n", classification_report(y_test.iloc[:, i], y_pred))




Training Support Vector Machine for cEXT...




SVM Accuracy for cEXT: 0.5307
Classification Report for Trait cEXT:
               precision    recall  f1-score   support

           0       0.60      0.54      0.57      1131
           1       0.46      0.52      0.49       853

    accuracy                           0.53      1984
   macro avg       0.53      0.53      0.53      1984
weighted avg       0.54      0.53      0.53      1984


Training Support Vector Machine for cNEU...




SVM Accuracy for cNEU: 0.5494
Classification Report for Trait cNEU:
               precision    recall  f1-score   support

           0       0.66      0.58      0.62      1255
           1       0.41      0.49      0.44       729

    accuracy                           0.55      1984
   macro avg       0.53      0.54      0.53      1984
weighted avg       0.57      0.55      0.56      1984


Training Support Vector Machine for cAGR...




SVM Accuracy for cAGR: 0.5297
Classification Report for Trait cAGR:
               precision    recall  f1-score   support

           0       0.50      0.51      0.50       927
           1       0.56      0.55      0.55      1057

    accuracy                           0.53      1984
   macro avg       0.53      0.53      0.53      1984
weighted avg       0.53      0.53      0.53      1984


Training Support Vector Machine for cCON...




SVM Accuracy for cCON: 0.5307
Classification Report for Trait cCON:
               precision    recall  f1-score   support

           0       0.56      0.57      0.56      1053
           1       0.50      0.49      0.50       931

    accuracy                           0.53      1984
   macro avg       0.53      0.53      0.53      1984
weighted avg       0.53      0.53      0.53      1984


Training Support Vector Machine for cOPN...




SVM Accuracy for cOPN: 0.6421
Classification Report for Trait cOPN:
               precision    recall  f1-score   support

           0       0.29      0.34      0.31       483
           1       0.78      0.74      0.76      1501

    accuracy                           0.64      1984
   macro avg       0.54      0.54      0.54      1984
weighted avg       0.66      0.64      0.65      1984



In [15]:
!pip install gensim

Collecting scipy<1.14.0,>=1.7.0 (from gensim)
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.6/38.6 MB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.15.2
    Uninstalling scipy-1.15.2:
      Successfully uninstalled scipy-1.15.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tsfresh 0.21.0 requires scipy>=1.14.0; python_version >= "3.10", but you have scipy 1.13.1 which is incompatible.
nilearn 0.11.1 req

In [16]:
!pip install --upgrade numpy
!pip install --upgrade gensim

Collecting numpy
  Downloading numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m70.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mkl-umath 0.1.1 requires numpy<1.27.0,>=1.26.4, but you have numpy 2.2.5 which is incompatible.
mkl-random 1.2.4 requires numpy<1.27.0,>=1.26.4, but you have

In [17]:
import gensim
import gensim.downloader as api
from sklearn.feature_extraction.text import TfidfVectorizer

print("Downloading GloVe model...")
glove_model = api.load("glove-wiki-gigaword-300")

def get_glove_embedding(text):
    words = text.split()
    embedding = np.zeros(300)
    valid_words = 0
    for word in words:
        if word in glove_model:
            embedding += glove_model[word]
            valid_words += 1
    return embedding / valid_words if valid_words > 0 else embedding

df['glove_embedding'] = df['cleaned_STATUS'].apply(get_glove_embedding)

Downloading GloVe model...


In [18]:
from tqdm import tqdm
from transformers import BertTokenizer,TFBertModel
import tensorflow as tf
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertModel.from_pretrained('bert-base-uncased')

def get_bert_embedding_tf(text):
    inputs = tokenizer(text, return_tensors="tf", truncation=True, padding='max_length', max_length=128)
    outputs = model(inputs)
    cls_embedding = outputs.last_hidden_state[:, 0, :]  # shape: (1, 768)
    return cls_embedding.numpy().squeeze()

bert_embeddings = []

for text in tqdm(df['cleaned_STATUS'], desc="Generating BERT embeddings"):
    emb = get_bert_embedding_tf(text)
    bert_embeddings.append(emb)

# Save embeddings to the DataFrame
df['bert_embedding'] = bert_embeddings


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

2025-05-02 18:13:00.100206: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of 

In [19]:
df['bert_embedding']

0       [-0.58995575, 0.35185087, -0.44570354, -0.1526...
1       [-0.021560851, 0.1460592, 0.25796595, 0.053007...
2       [-0.2930482, -0.10041052, 0.053389184, -0.4097...
3       [0.04045186, 0.14642319, 0.049569365, 0.068141...
4       [-0.49706703, -0.061802343, 0.12012787, -0.027...
                              ...                        
9912    [0.19637783, 0.22456835, -0.44969934, 0.019096...
9913    [-0.36582077, 0.10346095, 0.31438214, -0.19925...
9914    [-0.23742399, 0.23591363, 0.11827278, -0.06432...
9915    [-0.05553739, 0.37499806, -0.05417125, 0.09915...
9916    [0.015811851, -0.071456686, 0.28310397, -0.050...
Name: bert_embedding, Length: 9917, dtype: object

In [20]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Prepare Inputs
X = np.array(df['bert_embedding'].tolist())  # shape: (samples, 768)

embedding_dim = X.shape[1]  # 768
num_traits = y.shape[1]

# Reshape for LSTM: (samples, time_steps=1, features=768)
X = X.reshape(-1, 1, embedding_dim)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

base_predictions_BiLSTM = np.zeros((X_test.shape[0], num_traits))

# Define model
def build_bilstm(input_shape):
    model = Sequential([
        Bidirectional(LSTM(64, return_sequences=False), input_shape=input_shape),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train one model per trait
for i, trait in enumerate(y.columns):
    print(f"\n Training BiLSTM for {trait}...")

    model = build_bilstm((1, embedding_dim))  # input_shape = (timesteps, features)

    model.fit(
        X_train,
        y_train[trait],
        epochs=5,
        batch_size=32,
        validation_split=0.1,
        verbose=1
    )

    # Predict and evaluate
    proba = model.predict(X_test)
    preds = (proba > 0.5).astype(int)
    base_predictions_BiLSTM[:, i] = proba[:, 0]

    print(f"\n BiLSTM Accuracy for {trait}: {accuracy_score(y_test[trait], preds):.4f}")
    print(f" Classification Report:\n{classification_report(y_test[trait], preds)}")
    print(f" Confusion Matrix:\n{confusion_matrix(y_test[trait], preds)}")



 Training BiLSTM for cEXT...


  super().__init__(**kwargs)


Epoch 1/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 0.5652 - loss: 0.6868 - val_accuracy: 0.5705 - val_loss: 0.6807
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5740 - loss: 0.6768 - val_accuracy: 0.5705 - val_loss: 0.6791
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5888 - loss: 0.6703 - val_accuracy: 0.5718 - val_loss: 0.6765
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5886 - loss: 0.6689 - val_accuracy: 0.5718 - val_loss: 0.7023
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6045 - loss: 0.6608 - val_accuracy: 0.5781 - val_loss: 0.6755
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step

 BiLSTM Accuracy for cEXT: 0.5771
 Classification Report:
              precision    recall  f1-score   support

       

  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - accuracy: 0.6236 - loss: 0.6647 - val_accuracy: 0.6310 - val_loss: 0.6597
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.6185 - loss: 0.6601 - val_accuracy: 0.6335 - val_loss: 0.6715
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6328 - loss: 0.6443 - val_accuracy: 0.6146 - val_loss: 0.6641
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6552 - loss: 0.6270 - val_accuracy: 0.6096 - val_loss: 0.6681
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6518 - loss: 0.6207 - val_accuracy: 0.6134 - val_loss: 0.6713
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step

 BiLSTM Accuracy for cNEU: 0.6346
 Classification Report:
              precision    recall  f1-score   support

           0    

  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.5244 - loss: 0.6950 - val_accuracy: 0.5756 - val_loss: 0.6793
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5513 - loss: 0.6877 - val_accuracy: 0.5453 - val_loss: 0.6785
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5568 - loss: 0.6819 - val_accuracy: 0.5668 - val_loss: 0.6759
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5699 - loss: 0.6708 - val_accuracy: 0.5957 - val_loss: 0.6684
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5966 - loss: 0.6610 - val_accuracy: 0.5932 - val_loss: 0.6734
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

 BiLSTM Accuracy for cAGR: 0.5645
 Classification Report:
              precision    recall  f1-score   support

           0      

  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.5377 - loss: 0.6926 - val_accuracy: 0.5693 - val_loss: 0.6833
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5901 - loss: 0.6754 - val_accuracy: 0.5353 - val_loss: 0.6867
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5920 - loss: 0.6691 - val_accuracy: 0.5869 - val_loss: 0.6910
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6196 - loss: 0.6509 - val_accuracy: 0.5504 - val_loss: 0.6834
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6232 - loss: 0.6439 - val_accuracy: 0.5642 - val_loss: 0.6754
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step

 BiLSTM Accuracy for cCON: 0.5917
 Classification Report:
              precision    recall  f1-score   support

           0      

  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.7423 - loss: 0.5790 - val_accuracy: 0.7418 - val_loss: 0.5540
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7413 - loss: 0.5654 - val_accuracy: 0.7418 - val_loss: 0.5642
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7379 - loss: 0.5623 - val_accuracy: 0.7406 - val_loss: 0.5633
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7490 - loss: 0.5367 - val_accuracy: 0.7431 - val_loss: 0.5492
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7525 - loss: 0.5208 - val_accuracy: 0.7406 - val_loss: 0.5531
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step

 BiLSTM Accuracy for cOPN: 0.7626
 Classification Report:
              precision    recall  f1-score   support

           0     

In [21]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Convert BERT embeddings column to NumPy array
X = np.array(df['bert_embedding'].tolist())  # shape: (samples, 768)


# Reshape for Conv1D input: (samples, timesteps, features)
X = X.reshape(-1, 768, 1)

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CNN model
def build_cnn(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=5, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        Conv1D(filters=128, kernel_size=3, activation='relu'),
        GlobalMaxPooling1D(),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Store predictions
base_predictions_CNN = np.zeros((X_test.shape[0], y.shape[1]))

# Train model per trait
for i, trait in enumerate(y.columns):
    print(f"\nTraining CNN for trait: {trait}")

    model = build_cnn((768, 1))  # input shape = (timesteps, features)

    # Fit model — use .values.reshape(-1, 1) to ensure correct shape
    model.fit(
        X_train,
        y_train[trait].values.reshape(-1, 1),
        epochs=5,
        batch_size=32,
        validation_split=0.1,
        verbose=1
    )

    # Predict on test set
    proba = model.predict(X_test)
    preds = (proba > 0.5).astype(int)

    base_predictions_CNN[:, i] = proba[:, 0]

    print(f"\nAccuracy for {trait}: {accuracy_score(y_test[trait], preds):.4f}")
    print(f"Classification Report:\n{classification_report(y_test[trait], preds)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y_test[trait], preds)}")



Training CNN for trait: cEXT
Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 41ms/step - accuracy: 0.5452 - loss: 0.6952 - val_accuracy: 0.5705 - val_loss: 0.6871
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.5877 - loss: 0.6819 - val_accuracy: 0.5705 - val_loss: 0.6879
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 42ms/step - accuracy: 0.5654 - loss: 0.6853 - val_accuracy: 0.5705 - val_loss: 0.6839
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 45ms/step - accuracy: 0.5698 - loss: 0.6843 - val_accuracy: 0.5705 - val_loss: 0.6836
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 42ms/step - accuracy: 0.5770 - loss: 0.6829 - val_accuracy: 0.5705 - val_loss: 0.6834
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step

Accuracy for cEXT: 0.5701
Classification Report:
              precision    recall  f1-score   support

           0      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/step - accuracy: 0.5994 - loss: 0.6834 - val_accuracy: 0.6297 - val_loss: 0.6629
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 46ms/step - accuracy: 0.6106 - loss: 0.6724 - val_accuracy: 0.6297 - val_loss: 0.6613
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - accuracy: 0.6189 - loss: 0.6675 - val_accuracy: 0.6297 - val_loss: 0.6595
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 42ms/step - accuracy: 0.6144 - loss: 0.6692 - val_accuracy: 0.6297 - val_loss: 0.6599
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 46ms/step - accuracy: 0.6252 - loss: 0.6668 - val_accuracy: 0.6297 - val_loss: 0.6595
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step

Accuracy for cNEU: 0.6326
Classification Report:
              precision    recall  f1-score   support

           0      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 42ms/step - accuracy: 0.5142 - loss: 0.7025 - val_accuracy: 0.5630 - val_loss: 0.6887
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 40ms/step - accuracy: 0.5176 - loss: 0.6937 - val_accuracy: 0.5630 - val_loss: 0.6917
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 42ms/step - accuracy: 0.4960 - loss: 0.6949 - val_accuracy: 0.5630 - val_loss: 0.6887
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 45ms/step - accuracy: 0.5278 - loss: 0.6922 - val_accuracy: 0.5630 - val_loss: 0.6883
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 40ms/step - accuracy: 0.5219 - loss: 0.6924 - val_accuracy: 0.5630 - val_loss: 0.6868
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step

Accuracy for cAGR: 0.5328
Classification Report:
              precision    recall  f1-score   support

           0       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 41ms/step - accuracy: 0.5190 - loss: 0.7046 - val_accuracy: 0.5567 - val_loss: 0.6893
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 45ms/step - accuracy: 0.5226 - loss: 0.6932 - val_accuracy: 0.5567 - val_loss: 0.6885
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - accuracy: 0.5439 - loss: 0.6900 - val_accuracy: 0.5567 - val_loss: 0.6890
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - accuracy: 0.5447 - loss: 0.6902 - val_accuracy: 0.5567 - val_loss: 0.6888
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - accuracy: 0.5478 - loss: 0.6889 - val_accuracy: 0.5567 - val_loss: 0.6887
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step

Accuracy for cCON: 0.5307
Classification Report:
              precision    recall  f1-score   support

           0       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 46ms/step - accuracy: 0.7251 - loss: 0.6084 - val_accuracy: 0.7418 - val_loss: 0.5888
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - accuracy: 0.7404 - loss: 0.5831 - val_accuracy: 0.7418 - val_loss: 0.5778
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - accuracy: 0.7427 - loss: 0.5752 - val_accuracy: 0.7418 - val_loss: 0.5771
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 48ms/step - accuracy: 0.7419 - loss: 0.5790 - val_accuracy: 0.7418 - val_loss: 0.5720
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 45ms/step - accuracy: 0.7367 - loss: 0.5805 - val_accuracy: 0.7418 - val_loss: 0.5717
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step

Accuracy for cOPN: 0.7566
Classification Report:
              precision    recall  f1-score   support

           0      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Bidirectional, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming df['glove_embedding'] is a list of fixed-size vectors
X = np.array(df['glove_embedding'].tolist())  # shape: (samples, embedding_dim)

base_predictions_BIGRU = np.zeros((X_test.shape[0], num_traits))  # holdout size = 20%


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define model builder (without Embedding layer, since you're using GloVe vectors)
def build_bigru(input_shape):
    model = Sequential([
        Bidirectional(GRU(64, return_sequences=False), input_shape=input_shape),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Loop over each personality trait
for i, trait in enumerate(y.columns):
    print(f"\nTraining BiGRU for {trait}...")

    model = build_bigru((X_train.shape[1], 1))  # input_shape: (timesteps, features)
    
    # Reshape X for RNN input: (samples, timesteps, features) => here we use (samples, embedding_dim, 1)
    model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train.iloc[:, i],
              epochs=5, batch_size=32, validation_split=0.1, verbose=1)

    proba = model.predict(X_test.reshape(-1, X_test.shape[1], 1))
    preds = (proba > 0.5).astype(int)

    base_predictions_BIGRU[:, i] = proba[:, 0]

    print(f"BiGRU Accuracy for {trait}: {accuracy_score(y_test.iloc[:, i], preds):.4f}")
    print(f"Classification Report for Trait {trait}:")
    print(classification_report(y_test.iloc[:, i], preds))
    print(confusion_matrix(y_test.iloc[:, i], preds))



Training BiGRU for cEXT...
Epoch 1/5


  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 174ms/step - accuracy: 0.5809 - loss: 0.6841 - val_accuracy: 0.5705 - val_loss: 0.6832
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 170ms/step - accuracy: 0.5849 - loss: 0.6797 - val_accuracy: 0.5705 - val_loss: 0.6831
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 172ms/step - accuracy: 0.5767 - loss: 0.6815 - val_accuracy: 0.5705 - val_loss: 0.6843
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 171ms/step - accuracy: 0.5738 - loss: 0.6827 - val_accuracy: 0.5705 - val_loss: 0.6831
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 169ms/step - accuracy: 0.5760 - loss: 0.6817 - val_accuracy: 0.5705 - val_loss: 0.6835
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step
BiGRU Accuracy for cEXT: 0.5701
Classification Report for Trait cEXT:
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 182ms/step - accuracy: 0.6208 - loss: 0.6682 - val_accuracy: 0.6297 - val_loss: 0.6595
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 182ms/step - accuracy: 0.6180 - loss: 0.6646 - val_accuracy: 0.6297 - val_loss: 0.6620
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 173ms/step - accuracy: 0.6270 - loss: 0.6604 - val_accuracy: 0.6297 - val_loss: 0.6605
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 177ms/step - accuracy: 0.6221 - loss: 0.6609 - val_accuracy: 0.6297 - val_loss: 0.6592
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 176ms/step - accuracy: 0.6234 - loss: 0.6598 - val_accuracy: 0.6322 - val_loss: 0.6614
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step
BiGRU Accuracy for cNEU: 0.6326
Classification Report for Trait cNEU:
              precision    recall  f1-score   

  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 177ms/step - accuracy: 0.5108 - loss: 0.6929 - val_accuracy: 0.5680 - val_loss: 0.6873
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 174ms/step - accuracy: 0.5252 - loss: 0.6918 - val_accuracy: 0.5542 - val_loss: 0.6910
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 192ms/step - accuracy: 0.5256 - loss: 0.6916 - val_accuracy: 0.5542 - val_loss: 0.6876
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 173ms/step - accuracy: 0.5303 - loss: 0.6914 - val_accuracy: 0.5605 - val_loss: 0.6864
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 170ms/step - accuracy: 0.5345 - loss: 0.6903 - val_accuracy: 0.5642 - val_loss: 0.6848
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step
BiGRU Accuracy for cAGR: 0.5398
Classification Report for Trait cAGR:
              precision    recall  f1-score   

  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 171ms/step - accuracy: 0.5359 - loss: 0.6913 - val_accuracy: 0.5567 - val_loss: 0.6873
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 170ms/step - accuracy: 0.5314 - loss: 0.6914 - val_accuracy: 0.5567 - val_loss: 0.6872
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 169ms/step - accuracy: 0.5423 - loss: 0.6893 - val_accuracy: 0.5529 - val_loss: 0.6894
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 169ms/step - accuracy: 0.5371 - loss: 0.6903 - val_accuracy: 0.5592 - val_loss: 0.6886
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 167ms/step - accuracy: 0.5419 - loss: 0.6889 - val_accuracy: 0.5579 - val_loss: 0.6881
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step
BiGRU Accuracy for cCON: 0.5328
Classification Report for Trait cCON:
              precision    recall  f1-score   

  super().__init__(**kwargs)


[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 173ms/step - accuracy: 0.7328 - loss: 0.6026 - val_accuracy: 0.7418 - val_loss: 0.5716
Epoch 2/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 179ms/step - accuracy: 0.7398 - loss: 0.5750 - val_accuracy: 0.7418 - val_loss: 0.5720
Epoch 3/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 173ms/step - accuracy: 0.7391 - loss: 0.5744 - val_accuracy: 0.7418 - val_loss: 0.5714
Epoch 4/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 168ms/step - accuracy: 0.7400 - loss: 0.5752 - val_accuracy: 0.7418 - val_loss: 0.5719
Epoch 5/5
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 170ms/step - accuracy: 0.7351 - loss: 0.5786 - val_accuracy: 0.7418 - val_loss: 0.5732
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step
BiGRU Accuracy for cOPN: 0.7566
Classification Report for Trait cOPN:
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
# Shape: (n_samples, 5) each → concatenate across axis=1
meta_features = np.concatenate([
    base_predictions_LR,
    base_predictions_SVM,
    base_predictions_BIGRU,
    base_predictions_BiLSTM,
    base_predictions_CNN
], axis=1)  # shape: (n_samples, 15)

print(meta_features.shape)  # (samples, 15)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score, classification_report

def build_meta_dnn(input_dim, output_dim):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_dim,)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(output_dim, activation='sigmoid')  # for multilabel classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# input_dim = 25 (from 5 models × 5 traits), output_dim = 5 (5 traits)
meta_model = build_meta_dnn(input_dim=25, output_dim=5)

# Train the meta-classifier
meta_model.fit(meta_features, y_test, epochs=10, batch_size=32, validation_split=0.1)

# Predict
meta_preds_proba = meta_model.predict(meta_features)
meta_preds = (meta_preds_proba > 0.5).astype(int)

# Evaluate per trait
for i, trait in enumerate(y.columns):
    print(f"\n Meta-Classifier Accuracy for {trait}: {accuracy_score(y_test.iloc[:, i], meta_preds[:, i]):.4f}")
    print(classification_report(y_test.iloc[:, i], meta_preds[:, i]))



(1984, 25)
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.1071 - loss: 0.6865 - val_accuracy: 0.0503 - val_loss: 0.6569
Epoch 2/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0458 - loss: 0.6598 - val_accuracy: 0.0503 - val_loss: 0.6524
Epoch 3/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0493 - loss: 0.6509 - val_accuracy: 0.0503 - val_loss: 0.6503
Epoch 4/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0430 - loss: 0.6540 - val_accuracy: 0.0503 - val_loss: 0.6447
Epoch 5/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0465 - loss: 0.6494 - val_accuracy: 0.0503 - val_loss: 0.6423
Epoch 6/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0448 - loss: 0.6476 - val_accuracy: 0.0503 - val_loss: 0.6405
Epoch 7/10
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━