In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score



In [2]:
# Define the Decision Tree classifier
classifier = DecisionTreeClassifier()

# Define the parameters for grid search
parameters = {
    'criterion': ['gini', 'entropy'],  # Criterion for splitting
    'max_depth': [None, 10, 20, 30, 40],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split a node
    'min_samples_leaf': [1, 2, 4]  # Minimum number of samples required at each leaf node
}

# Initialize GridSearchCV
classifier_dt = GridSearchCV(classifier, param_grid=parameters, scoring='accuracy', cv=5)

# Load the training and test data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# Encode the 'Gender' column in both training and test data
label_encoder = LabelEncoder()
train_data['Gender'] = label_encoder.fit_transform(train_data['Gender'])
test_data['Gender'] = label_encoder.transform(test_data['Gender'])

# Define column names
column_names = ['Gender', 'Age', 'openness', 'neuroticism', 'conscientiousness', 'agreeableness', 'extraversion', 'Personality (class label)']

# Assign column names to both datasets
train_data.columns = column_names
test_data.columns = column_names



In [3]:
# Split features and target variables
x_train = train_data.drop('Personality (class label)', axis=1)
y_train = train_data['Personality (class label)']

x_test = test_data.drop('Personality (class label)', axis=1)
y_test = test_data['Personality (class label)']



In [4]:
# Fit the Decision Tree classifier
classifier_dt.fit(x_train, y_train)

# Predict on the test data
y_pred = classifier_dt.predict(x_test)


In [5]:

# Calculate accuracy
score = accuracy_score(y_pred, y_test)
print("Accuracy:", score)


Accuracy: 0.24761904761904763


In [6]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision
precision = precision_score(y_test, y_pred, average='weighted')  
print("Precision:", precision)

# Calculate recall
recall = recall_score(y_test, y_pred, average='weighted')  
print("Recall:", recall)

# Calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')  
print("F1 Score:", f1)


Precision: 0.3471224235838357
Recall: 0.24761904761904763
F1 Score: 0.2681666134153557
