Personality Prediction System through CV Analysis

In [1]:
!pip install nltk scikit-learn pandas numpy




In [2]:
import nltk
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [3]:
# Simulated dataset for demonstration
data = {
    'CV_Text': [
        "Experienced data analyst with a knack for deriving insights from data...",
        "Creative designer with a strong sense of color theory and aesthetics...",
        "Organized project manager with a record of meeting deadlines...",
        "Customer service representative with a passion for helping people...",
    ],
    'Openness': [1, 1, 0, 0],
    'Conscientiousness': [1, 1, 1, 0],
    'Extraversion': [0, 1, 1, 1],
    'Agreeableness': [0, 1, 0, 1],
    'Emotional_Stability': [1, 0, 1, 1]
}
df = pd.DataFrame(data)


In [4]:
# Function to preprocess text
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
    return " ".join(filtered_words)

# Apply preprocessing to each CV text
df['Processed_Text'] = df['CV_Text'].apply(preprocess_text)


In [5]:
# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(df['Processed_Text']).toarray()

# Target traits
y_openness = df['Openness']
y_conscientiousness = df['Conscientiousness']
y_extraversion = df['Extraversion']
y_agreeableness = df['Agreeableness']
y_emotional_stability = df['Emotional_Stability']


In [6]:
# Function to train and evaluate model for each trait
def train_and_evaluate(X, y, trait_name):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"Results for {trait_name}:")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}\n")
    return model

# Train models for each personality trait
models = {}
models['Openness'] = train_and_evaluate(X, y_openness, "Openness")
models['Conscientiousness'] = train_and_evaluate(X, y_conscientiousness, "Conscientiousness")
models['Extraversion'] = train_and_evaluate(X, y_extraversion, "Extraversion")
models['Agreeableness'] = train_and_evaluate(X, y_agreeableness, "Agreeableness")
models['Emotional_Stability'] = train_and_evaluate(X, y_emotional_stability, "Emotional Stability")


Results for Openness:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0

Accuracy: 0.00



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results for Conscientiousness:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

Accuracy: 1.00

Results for Extraversion:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

Accuracy: 1.00

Results for Agreeableness:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       1.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0

Accuracy: 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results for Emotional Stability:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0

Accuracy: 0.00



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
# Sample new CV text
new_cv = "A detail-oriented software developer with a passion for solving complex problems..."

# Preprocess and vectorize
processed_cv = preprocess_text(new_cv)
vectorized_cv = vectorizer.transform([processed_cv]).toarray()

# Predict personality traits
predictions = {trait: model.predict(vectorized_cv)[0] for trait, model in models.items()}
print("Personality Predictions for the new CV:")
print(predictions)


Personality Predictions for the new CV:
{'Openness': 0, 'Conscientiousness': 1, 'Extraversion': 1, 'Agreeableness': 0, 'Emotional_Stability': 1}
