In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import InputLayer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_validate
from  scikeras.wrappers import KerasClassifier
import keras_tuner as kt

In [50]:
def load_data():
    train_data = pd.read_csv('train.csv')
    return train_data
train_data = load_data()
target_data = train_data["Personality"].copy()
training_data = train_data.drop(columns=["Personality","id"])

training_data['Socializing_effect'] = training_data['Social_event_attendance'] + training_data['Going_outside'] 
training_data['probability_of_having_friends'] = training_data['Socializing_effect'] / (training_data['Socializing_effect'].max() + 1e-5)
training_data['prob_of_going_outside'] = training_data['Going_outside'] / (training_data['Going_outside'].max() + 1e-5)
training_data['online_presence'] = training_data['Post_frequency'] * training_data['Friends_circle_size']

num_attributes = training_data.select_dtypes(include=[np.number]).columns.tolist()
cat_attributes = training_data.select_dtypes(exclude=[np.number]).columns.tolist()


corr_matrix = training_data.select_dtypes(include=[np.number]).corr()
training_data.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Socializing_effect,probability_of_having_friends,prob_of_going_outside,online_presence
0,0.0,No,6.0,4.0,No,15.0,5.0,10.0,0.588235,0.571428,75.0
1,1.0,No,7.0,3.0,No,10.0,8.0,10.0,0.588235,0.428571,80.0
2,6.0,Yes,1.0,0.0,,3.0,0.0,1.0,0.058823,0.0,0.0
3,3.0,No,7.0,3.0,No,11.0,5.0,10.0,0.588235,0.428571,55.0
4,1.0,No,4.0,4.0,No,13.0,,8.0,0.470588,0.571428,


In [None]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean',missing_values=np.nan)),
    ('scaler', StandardScaler()),])

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent', missing_values=np.nan)),
    ('one_hot_encoding', OneHotEncoder(handle_unknown='ignore', sparse_output=False)),
])

pre_processor = ColumnTransformer(transformers=[
    ('num', num_pipeline, num_attributes),
    ('cat', cat_pipeline, cat_attributes),
])

le = LabelEncoder()
X_processed = pre_processor.fit_transform(training_data)
y_processed = le.fit_transform(target_data)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [54]:
metrics = [
    tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.Precision(name="precision"),
    tf.keras.metrics.Recall(name="recall") ]


def build_model(hp):
    n_layers = hp.Int("no of layers", 1, 3, default=2)
    n_neurons = hp.Int("no of neurons", 32, 512, step=32, default=128)
    learning_rate = hp.Float("learning rate", 1e-4, 1e-2, sampling='log', default=1e-3)
    optimizer = hp.Choice("optimizer", ['adam', 'sgd'], default='adam')
    
    if optimizer == 'adam':
        opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    else:
        opt = tf.keras.optimizers.SGD(learning_rate=learning_rate)
        
    model = tf.keras.Sequential()
    model.add(InputLayer(shape=(X_processed.shape[1],)))
    for _ in range(n_layers):
        model.add(Dense(n_neurons, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=metrics)
    
    return model 


In [56]:

def create_model(input_shape):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=input_shape))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
    return model

nn_clf = KerasClassifier(model=create_model,model__input_shape=(X_processed.shape[1],),epochs=10,
                         batch_size=32,verbose=1,random_state=42)

nn_clf.fit(X_processed, y_processed)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-07-07 23:37:10.979682: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-07-07 23:37:10.980811: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, othe

[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 1.0000 - loss: 0.1781 - precision: 0.9245 - recall: 0.9274
Epoch 2/10
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 1.0000 - loss: 0.1698 - precision: 0.9362 - recall: 0.9164
Epoch 3/10
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 1.0000 - loss: 0.1956 - precision: 0.9360 - recall: 0.9037
Epoch 4/10
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 1.0000 - loss: 0.2326 - precision: 0.9339 - recall: 0.8868
Epoch 5/10
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 1.0000 - loss: 0.3227 - precision: 0.9245 - recall: 0.8700
Epoch 6/10
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 1.0000 - loss: 0.3334 - precision: 0.9284 - recall: 0.8819
Epoch 7/10
[1m579/579[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

AttributeError: 'super' object has no attribute '__sklearn_tags__'

AttributeError: 'super' object has no attribute '__sklearn_tags__'

KerasClassifier(
	model=<function create_model at 0x378520ee0>
	build_fn=None
	warm_start=False
	random_state=42
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=32
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=10
	model__input_shape=(13,)
	class_weight=None
)

In [None]:
tuner = kt.RandomSearch(build_model, max_trials=5, objective='val_accuracy', overwrite=True, project_name='personality_tuning', seed=42)
tuner.search(X_train, y_train_encoded, epochs=10, validation_data=(X_val, y_val_encoded), batch_size=32)
top_parameters = tuner.get_best_hyperparameters()[0]
print("Best hyperparameters:", top_parameters.values)