In [1]:
import numpy as np

from autohire.utils import parse_pdf, parse_resume_df
from autohire.bow import BagOfWords
from autohire.encoder import LabelEncoder
from autohire.model import BayesianMulticlassModel
from autohire.explainer import BayesianModelExplainer


if __name__ == "__main__":
    x_train, y_train = parse_resume_df()
    bag_of_words = BagOfWords(x_train)
    label_encoder = LabelEncoder(y_train)

    x_train = bag_of_words.get_counts(x_train)
    y_train = label_encoder.encode(y_train)
    model = BayesianMulticlassModel(len(label_encoder), len(bag_of_words))
    model.fit(x_train=x_train, y_train=y_train)

    x_test_input = parse_pdf("data/resumes/computers_2.pdf")
    x_test = bag_of_words.get_counts(x_test_input)
    result = model.predict(x_train[0])
    result = label_encoder.decode(result)

    for job in result[:5]:
        print(job)

    explainable_model = BayesianModelExplainer(label_encoder, bag_of_words)
    explainable_model.fit(x_train=x_train, y_train=y_train)

    print(
        """
ANALYSIS OF TRAINED PRIOR
-------------------------"""
    )
    explainable_model.explain()

    print(
        """
ANALYSIS OF TRAINED EVIDENCE
----------------------------"""
    )
    explainable_model.explain(x_test_input)


Data Science
Civil Engineer
Mechanical Engineer
Operations Manager
Health and fitness

ANALYSIS OF TRAINED PRIOR
-------------------------
Operations Manager: ges korea shipments hpm qatar monitored prepaid marshalling honeywell fat
DevOps Engineer: workload allocate eclerx biztalk birla wwf solid ada annet knoxed
PMO: dollars ratios pivots impacting collate saints particularly transitioned paced konkani
Hadoop: affect zookeeper hat hdp vaccines uses heights hiveql handy hpe
ETL Developer: mart niche advertisement aggregator agnel athena rejection tftput prabhakar tftpconnection
Business Analyst: proteus installations concerned zambia bau configurations hiral observation infonet plastics
Java Developer: resrent pjlce newspapers validations deduplication december debug debtor sierra ordering
Web Designing: golchha nettech movie nitka diifernt iv diagnosis cityspaceindia shooping incorporate
Data Science: yyyy addresses xii adjunct ymcaust advocator aeronautics accelerating accelerator v