In [None]:
MODEL_DIR  = '../models'
MODEL_FILE = 'ensemble_models.pkl'

ROLE_COLS  = ['DevType']
TECH_COLS  = ['LanguageHaveWorkedWith',
              'DatabaseHaveWorkedWith',
              'WebframeHaveWorkedWith',
              'MiscTechHaveWorkedWith',
              'ToolsTechHaveWorkedWith']

EXCLUDE_ROLES = ['Other (please specify):',
                 'Student',
                 'Designer',
                 'Educator',
                 'Marketing or sales professional',
                 'Engineering manager',
                 'Senior Executive (C-Suite, VP, etc.)',
                 'Product manager',
                 'Engineer, site reliability']

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px

import pickle
import os

import warnings
warnings.filterwarnings("ignore", message="X does not have valid feature names")

### Load model

In [None]:
models_dict = pickle.load(open(os.path.join(MODEL_DIR, MODEL_FILE), 'rb'))
models_dict

## Predict job

In [None]:
skills = ['Scala', 'Julia']

### Validate and preprocess

In [None]:
# Validate that all features are identical
models_features = [list(model.feature_names_in_) for job, model in models_dict.items()]
identical = all(model_features==models_features[0] for model_features in models_features)
if not identical:
    raise Exception("Features are expected to be the same for all models")

In [None]:
# Extract properties
jobs = list(models_dict.keys())
features = pd.Series(models_dict[jobs[0]].feature_names_in_)
ohe_skills = features.isin(skills).astype(int)

In [None]:
# Validate skills and features
skills_in_features = pd.Series(skills).isin(features)
if not skills_in_features.all():
    missing_features = skills_in_features[skills_in_features == False].index.tolist()
    error_message = "Those skills are not a part of model: " + str(missing_features)
    raise Exception(error_message)

### Predict and plot

In [None]:
# Predict
predictions = (pd.Series({job: model.predict_proba([ohe_skills])[0][1]
                         for job, model in models_dict.items()})
               .sort_values(ascending=False))

In [None]:
fig = px.bar(predictions.sort_values(), orientation='h')
fig.update_xaxes(visible=False, showticklabels=False).update_yaxes(title='', visible=True).update_layout(showlegend=False)
fig.show()