In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score



In [2]:
# Define the Random Forest classifier
classifier = RandomForestClassifier()

# Define the parameters for grid search
parameters = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize GridSearchCV
classifier_rf = GridSearchCV(classifier, param_grid=parameters, scoring='accuracy', cv=5)

# Load the training and test data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# Encode the 'Gender' column in both training and test data
label_encoder = LabelEncoder()
train_data['Gender'] = label_encoder.fit_transform(train_data['Gender'])
test_data['Gender'] = label_encoder.transform(test_data['Gender'])

# Define column names
column_names = ['Gender', 'Age', 'openness', 'neuroticism', 'conscientiousness', 'agreeableness', 'extraversion', 'Personality (class label)']

# Assign column names to both datasets
train_data.columns = column_names
test_data.columns = column_names



In [3]:
# Split features and target variables
x_train = train_data.drop('Personality (class label)', axis=1)
y_train = train_data['Personality (class label)']

x_test = test_data.drop('Personality (class label)', axis=1)
y_test = test_data['Personality (class label)']



In [4]:
# Fit the Random Forest classifier
classifier_rf.fit(x_train, y_train)

# Predict on the test data
y_pred = classifier_rf.predict(x_test)


In [5]:
# Calculate accuracy
score = accuracy_score(y_pred, y_test)
print("Accuracy:", score)


Accuracy: 0.3238095238095238


In [6]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision
precision = precision_score(y_test, y_pred, average='weighted')  
print("Precision:", precision)

# Calculate recall
recall = recall_score(y_test, y_pred, average='weighted')  
print("Recall:", recall)

# Calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')  
print("F1 Score:", f1)


Precision: 0.3896534531041568
Recall: 0.3238095238095238
F1 Score: 0.3442811561289491
