### Imports and version checks
Load core libraries and confirm their versions for reproducibility.

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
)


2.20.0
2.3.2
2.3.2
1.7.1


## Reading Data

In [2]:
df = pd.read_csv("data/personality_datasert.csv")

#### Preview raw data
Inspect the first few rows of the imported dataset to confirm structure and field names.

In [3]:
df.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,No,4.0,6.0,No,13.0,5.0,Extrovert
1,9.0,Yes,0.0,0.0,Yes,0.0,3.0,Introvert
2,9.0,Yes,1.0,2.0,Yes,5.0,2.0,Introvert
3,0.0,No,6.0,7.0,No,14.0,8.0,Extrovert
4,3.0,No,9.0,4.0,No,8.0,5.0,Extrovert


    - Time_spent_Alone: Hours spent alone daily (0–11).
    - Stage_fear: Presence of stage fright (Yes/No).
    - Social_event_attendance: Frequency of social events (0–10).
    - Going_outside: Frequency of going outside (0–7).
    - Drained_after_socializing: Feeling drained after socializing (Yes/No).
    - Friends_circle_size: Number of close friends (0–15).
    - Post_frequency: Social media post frequency (0–10).
    - Personality: Target variable (Extrovert/Introvert).*


In [4]:
df = df.replace({
    "Yes": 1, "No": 0,
    "Introvert": 0, "Extrovert": 1
})

  df = df.replace({


#### Check encoded values
Verify that categorical replacements were applied correctly.

In [5]:
df.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,0,4.0,6.0,0,13.0,5.0,1
1,9.0,1,0.0,0.0,1,0.0,3.0,0
2,9.0,1,1.0,2.0,1,5.0,2.0,0
3,0.0,0,6.0,7.0,0,14.0,8.0,1
4,3.0,0,9.0,4.0,0,8.0,5.0,1


#### Create train/validation split
Hold out 20% of the data for validation to monitor generalization.

In [6]:
# 80% for training
train_df = df.sample(frac=0.8, random_state=42).copy()
# remaining 20% for validation and testing
val_df = df.drop(train_df.index).copy()

In [7]:
# scale feature columns to (0, 1) without touching the target
feature_cols = [col for col in train_df.columns if col != "Personality"]

max_val = train_df[feature_cols].max(axis=0)
min_val = train_df[feature_cols].min(axis=0)
value_range = (max_val - min_val).replace(0, 1e-9)  # guard against zero range

train_df[feature_cols] = (train_df[feature_cols] - min_val) / value_range
val_df[feature_cols] = (val_df[feature_cols] - min_val) / value_range

#### Normalize feature ranges
Scale each feature to the 0–1 range using min–max normalization to stabilize training.

In [8]:
# now let's separate the targets and labels
X_train = train_df.drop('Personality',axis=1)
X_val = val_df.drop('Personality',axis=1)
y_train = train_df['Personality']
y_val = val_df['Personality']

# We'll need to pass the shape
# of features/inputs as an argument
# in our model, so let's define a variable 
# to save it.
input_shape = [X_train.shape[1]]

input_shape

[7]

#### Split features and labels
Separate predictors from the target and record the model input shape.

## creating a model

In [9]:
model = tf.keras.Sequential([

    tf.keras.layers.Dense(units=64, activation='relu',
                          input_shape=input_shape),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1, activation='sigmoid')  # sigmoid here
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
model.summary()

#### Inspect model architecture
Summarize the Keras model to confirm layer shapes and parameter counts.

In [11]:
# adam optimizer works pretty well for
# all kinds of problems and is a good starting point
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',   # <-- correct loss for binary classification
    metrics=['accuracy']          # track accuracy too
)

#### Compile the model
Configure optimizer, loss, and metrics for binary classification.

## training the model

In [12]:
losses = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    # it will use 'batch_size' number
                   # of examples per example
    batch_size=32,    # smaller batch helps gradient updates for small dataset
    epochs=50         # more epochs to let the model learn patterns
)

Epoch 1/50
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8935 - loss: 0.4117 - val_accuracy: 0.9517 - val_loss: 0.2202
Epoch 2/50
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9297 - loss: 0.2715 - val_accuracy: 0.9517 - val_loss: 0.2108
Epoch 3/50
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9297 - loss: 0.2680 - val_accuracy: 0.9517 - val_loss: 0.2098
Epoch 4/50
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9297 - loss: 0.2661 - val_accuracy: 0.9517 - val_loss: 0.2056
Epoch 5/50
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9297 - loss: 0.2638 - val_accuracy: 0.9517 - val_loss: 0.2038
Epoch 6/50
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9297 - loss: 0.2622 - val_accuracy: 0.9517 - val_loss: 0.2014
Epoch 7/50
[1m73/73[0m [32m━━━━━━━━━━

In [13]:
# Predict first three validation samples and return class labels
pred_samples = model.predict(X_val.iloc[0:3, :])
pred_classes = (pred_samples > 0.5).astype(int)
pred_classes

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


array([[0],
       [1],
       [0]])

#### Quick prediction sample
Generate class labels for a few validation samples to sanity-check the trained model.

In [14]:
y_val.iloc[0:3]

1     0
4     1
11    0
Name: Personality, dtype: int64

#### Compare with true labels
Review ground-truth labels for the sampled validation rows.

## accuracy 

In [15]:
predictions = model.predict(X_val)
pred_labels = (predictions > 0.5).astype(int)

# Compare with true labels
accuracy = np.mean(pred_labels.flatten() == y_val.values)

print("Validation Accuracy:", accuracy)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Validation Accuracy: 0.9517241379310345


#### Extended evaluation metrics
Compute confusion matrix, precision, recall, F1, and ROC AUC on the validation split.

In [16]:
# Get final epoch accuracy
train_acc = losses.history['accuracy'][-1]
val_acc = losses.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {train_acc:.4f}")
print(f"Final Validation Accuracy: {val_acc:.4f}")

# Quick check
if train_acc > val_acc + 0.05:
    print("Warning: Possible overfitting")
elif train_acc < 0.6 and val_acc < 0.6:
    print("Warning: Possible underfitting")
else:
    print("Model seems well-fitted")


Final Training Accuracy: 0.9302
Final Validation Accuracy: 0.9517
Model seems well-fitted


In [18]:
# Confusion matrix and classification metrics
true_labels = y_val.values
probabilities = predictions.flatten()
pred_labels = pred_labels.flatten()

cm = confusion_matrix(true_labels, pred_labels)
precision = precision_score(true_labels, pred_labels)
recall = recall_score(true_labels, pred_labels)
f1 = f1_score(true_labels, pred_labels)
roc_auc = roc_auc_score(true_labels, probabilities)

print("Confusion matrix:\n", cm)
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1 score: {f1:.3f}")
print(f"ROC AUC: {roc_auc:.3f}")
print("\nClassification report:\n", classification_report(true_labels, pred_labels, target_names=["Introvert", "Extrovert"]))

Confusion matrix:
 [[251  11]
 [ 17 301]]
Precision: 0.965
Recall: 0.947
F1 score: 0.956
ROC AUC: 0.974

Classification report:
               precision    recall  f1-score   support

   Introvert       0.94      0.96      0.95       262
   Extrovert       0.96      0.95      0.96       318

    accuracy                           0.95       580
   macro avg       0.95      0.95      0.95       580
weighted avg       0.95      0.95      0.95       580

