In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score



In [2]:
# Define the KNN classifier
classifier = KNeighborsClassifier()

# Define the parameters for grid search
parameters = {
    'n_neighbors': [3, 5, 7, 9],  # Number of neighbors to consider
    'weights': ['uniform', 'distance'],  # Weighting scheme for neighbors
    'metric': ['euclidean', 'manhattan']  # Distance metric
}

# Initialize GridSearchCV
classifier_knn = GridSearchCV(classifier, param_grid=parameters, scoring='accuracy', cv=5)

# Load the training and test data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# Encode the 'Gender' column in both training and test data
label_encoder = LabelEncoder()
train_data['Gender'] = label_encoder.fit_transform(train_data['Gender'])
test_data['Gender'] = label_encoder.transform(test_data['Gender'])

# Define column names
column_names = ['Gender', 'Age', 'openness', 'neuroticism', 'conscientiousness', 'agreeableness', 'extraversion', 'Personality (class label)']

# Assign column names to both datasets
train_data.columns = column_names
test_data.columns = column_names



In [3]:
# Split features and target variables
x_train = train_data.drop('Personality (class label)', axis=1)
y_train = train_data['Personality (class label)']

x_test = test_data.drop('Personality (class label)', axis=1)
y_test = test_data['Personality (class label)']

# Fit the KNN classifier
classifier_knn.fit(x_train, y_train)

# Predict on the test data
y_pred = classifier_knn.predict(x_test)



In [4]:
# Calculate accuracy
score = accuracy_score(y_pred, y_test)
print("Accuracy:", score)


Accuracy: 0.2634920634920635


In [5]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision
precision = precision_score(y_test, y_pred, average='weighted')  
print("Precision:", precision)

# Calculate recall
recall = recall_score(y_test, y_pred, average='weighted')  
print("Recall:", recall)

# Calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')  
print("F1 Score:", f1)


Precision: 0.3376037429639673
Recall: 0.2634920634920635
F1 Score: 0.27967510951482727
