# Recommender System
---

## 1. Current Work Scope

### Import Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
import keras
import pathlib

### Filter for Skills and Languages using Customized Jaccard Index

In [2]:
def score_skill(skill, skillFilter):
    skill = [x.upper() for x in skill]
    skillFilter = [x.upper() for x in skillFilter]

    skill = set(skill).intersection(skillFilter)

    intersection = len(list(set(skill).intersection(skillFilter)))
    union = (len(set(skill)) + len(set(skillFilter))) - intersection

    jaccard_index = float(intersection) / union
    return round(jaccard_index, 2)

In [3]:
def score_lang(lang, langFilter):
    lang = [x.upper() for x in lang]
    langFilter = [x.upper() for x in langFilter]

    lang = set(lang).intersection(langFilter)

    intersection = len(list(set(lang).intersection(langFilter)))
    union = (len(set(lang)) + len(set(langFilter))) - intersection

    jaccard_index = float(intersection) / union
    return round(jaccard_index, 2)

### Filter for Unfixed Age and Salary using Fuzzy Index

In [4]:
def score_age(age, ageFilter, tolerance):
    if age >= ageFilter[0] and age <= ageFilter[1]:
        index = 1
    elif age >= ageFilter[1] and age <= ageFilter[1] + tolerance:
        index = round((tolerance + 1 - age + ageFilter[1])/(tolerance + 1), 2)
    elif age <= ageFilter[0] and age >= ageFilter[0] - tolerance:
        index = round((tolerance + 1 - ageFilter[0] + age)/(tolerance + 1), 2)
    else:
        index = 0

    return index

In [5]:
def score_salary(salary, salaryFilter, tol):
    if salary >= salaryFilter[0] and salary <= salaryFilter[1]:
        index = 1
    elif salary >= salaryFilter[1]:
        index = round((tol + 1 + salaryFilter[1] - salary)/(tol + 1), 2)
        if index < 0:
            index = 0
    elif salary <= salaryFilter[0]:
        index = 1

    return index

### Demo for Giving Person Score/Index

In [6]:
person = {"skill": ["C", "C++", "Java", "Python"], "lang": ["Indonesian", "English", "Mandarin"], "age": 22, "salary": 1.5}
ageFilter = [23, 27]
tolerance = 5

skillFilter = ["C", "C++", "Java"]
langFilter = ["English", "Mandarin", "Javanese"]

salaryFilter = [2, 5]
tol = 1

print(score_skill(person["skill"], skillFilter))
print(score_lang(person["lang"], langFilter))
print(score_age(person["age"], ageFilter, tolerance))
print(score_salary(person["salary"], salaryFilter, tol))

1.0
0.67
0.83
1


### Initializing Data to #1 Model Train and Test

In [7]:
data_appl = [
    {"SKILL": ["C", "C++", "JAVA", "PYTHON"], "LANG": ["ENGLISH"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["C", "C++", "JAVA", "PYTHON", "TENSORFLOW"], "LANG": ["ENGLISH"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["C", "C++", "JAVA", "PYTHON"], "LANG": ["ENGLISH", "MANDARIN"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["C", "C++", "JAVA", "PYTHON"], "LANG": ["ENGLISH", "MANDARIN", "JAVANESE"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["C", "PYTHON"], "LANG": ["ENGLISH"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["C", "JAVA", "PYTHON"], "LANG": ["ENGLISH"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["PYTHON"], "LANG": ["ENGLISH"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["R", "PYTHON", "TABLEAU"], "LANG": ["INDONESIAN"], "AGE": 21, "SALARY": 4.5}, 
    {"SKILL": ["C", "R", "JAVA"], "LANG": ["INDONESIAN", "ENGLISH"], "AGE": 32, "SALARY": 7.5}, 
    {"SKILL": ["NODE.JS", "NESTJS", "REACT"], "LANG": ["ENGLISH", "MANDARIN"], "AGE": 18, "SALARY": 2}, 
    {"SKILL": ["ASP.NET", "C#", "REACT"], "LANG": ["MANDARIN", "INDONESIAN"], "AGE": 22, "SALARY": 4}, 
    {"SKILL": ["ANGULAR", ".NET", "APACHE"], "LANG": ["INDONESIAN", "ENGLISH", "FRENCH"], "AGE": 25, "SALARY": 6}, 
    {"SKILL": ["BASH", "LINUX"], "LANG": ["INDONESIAN", "ENGLISH"], "AGE": 27, "SALARY": 5.5}, 
    {"SKILL": ['PYTHON', "TENSORFLOW"], "LANG": ["FRENCH", "INDONESIAN"], "AGE": 39, "SALARY": 10}, 
    {"SKILL": ["TELECOMMUNICATIONS", "MULTIMEDIA"], "LANG": ["JAPANESE", "FRENCH"], "AGE": 51, "SALARY": 20}, 
    {"SKILL": ["BIOCHEMICAL ENGINEERING", "BIOMEDICAL ENGINEERING"], "LANG": ["INDONESIAN", "MANDARIN", "JAPANESE"], "AGE": 45, "SALARY": 16}, 
    {"SKILL": ["BLOCKCHAIN"], "LANG": ["INDONESIAN", "ENGLISH"], "AGE": 22, "SALARY": 3.5}, 
    {"SKILL": ["BUSINESS", "BUSINESS DASHBOARDS"], "LANG": ["ENGLISH", "FRENCH"], "AGE": 29, "SALARY": 8}, 
    {"SKILL": ["MARKETING", "MARKETING AUTOMATION"], "LANG": ["INDONESIAN", "MANDARIN"], "AGE": 33, "SALARY": 10}, 
    {"SKILL": ["CIVIL ENGINEERING"], "LANG": ["INDONESIAN", "FRENCH"], "AGE": 37, "SALARY": 9.5}, 
    {"SKILL": ["CODEIGNITER", "PHP", "LARAVEL"], "LANG": ["INDONESIAN"], "AGE": 19, "SALARY": 3}, 
    {"SKILL": ["COMPUTER GRAPHICS", "GRAPHICS"], "LANG": ["ENGLISH"], "AGE": 20, "SALARY": 3.5}, 
    {"SKILL": ["PROGRAMMING", "C++"], "LANG": ["FRENCH", "INDONESIAN"], "AGE": 24, "SALARY": 6}, 
    {"SKILL": ["AGILE", "ALGORITHM"], "LANG": ["INDONESIAN", "ENGLISH", "FRENCH", "MANDARIN", "JAPANESE"], "AGE": 43, "SALARY": 17}, 
    {"SKILL": ["DART", "FLUTTER"], "LANG": ["MANDARIN", "ENGLISH"], "AGE": 38, "SALARY": 15}, 
    {"SKILL": ["DEBUGGING", "DEADLOCK"], "LANG": ["MANDARIN"], "AGE": 37, "SALARY": 12.5}, 
    {"SKILL": ["DOCKER", "EXPRESSJS"], "LANG": ["INDONESIAN"], "AGE": 17, "SALARY": 1.5}, 
    {"SKILL": ["FIGMA", "USER INTERFACE"], "LANG": ["INDONESIAN", "MANDARIN"], "AGE": 29, "SALARY": 6.5}, 
    {"SKILL": ["VIRTUALBOX", "OPERATING SYSTEM"], "LANG": ["FRENCH", "INDONESIAN", "MANDARIN"], "AGE": 30, "SALARY": 8.5}, 
    {"SKILL": ["OPENCV", "OPENGL"], "LANG": ["MANDARIN"], "AGE": 40, "SALARY": 10.5}, 
    {"SKILL": ["PERL", "RUBY"], "LANG": ["JAPANESE"], "AGE": 32, "SALARY": 8.5}, 
    {"SKILL": ["RELATIONAL DATABASE", "MYSQL"], "LANG": ["INDONESIAN", "FRENCH"], "AGE": 42, "SALARY": 14.5}, 
    {"SKILL": ["MOBILE", "KOTLIN"], "LANG": ["INDONESIAN", "MANDARIN"], "AGE": 39, "SALARY": 10.5}, 
    {"SKILL": ["CLOUD HOSTING", "KUBERNETES"], "LANG": ["MANDARIN", "JAPANESE"], "AGE": 18, "SALARY": 3}, 
    {"SKILL": ["C", "C++", "UNITY"], "LANG": ["JAPANESE", "FRENCH"], "AGE": 22, "SALARY": 4.5}, 
    {"SKILL": ["JAVA", "XML"], "LANG": ["INDONESIAN", "ENGLISH"], "AGE": 46, "SALARY": 16.5},
    {"SKILL": ["JAVA", "XML"], "LANG": ["INDONESIAN", "ENGLISH"], "AGE": 46, "SALARY": 16.5},
    {"SKILL": ["MOBILE", "KOTLIN"], "LANG": ["INDONESIAN", "MANDARIN"], "AGE": 39, "SALARY": 10.5},
    {"SKILL": ["MARKETING", "TABLEAU"], "LANG": ["INDONESIAN"], "AGE": 21, "SALARY": 4.5},
    {"SKILL": ["BASH", "LINUX"], "LANG": ["INDONESIAN", "ENGLISH"], "AGE": 27, "SALARY": 5.5},
    {"SKILL": ["CODEIGNITER", "PHP", "LARAVEL"], "LANG": ["INDONESIAN"], "AGE": 19, "SALARY": 3},
    {"SKILL": ["OPENCV", "OPENGL"], "LANG": ["MANDARIN"], "AGE": 40, "SALARY": 10.5},
    {"SKILL": ["CIVIL ENGINEERING"], "LANG": ["INDONESIAN", "FRENCH"], "AGE": 37, "SALARY": 9.5},
    {"SKILL": ["CLOUD HOSTING", "KUBERNETES"], "LANG": ["MANDARIN", "JAPANESE"], "AGE": 18, "SALARY": 3},
    {"SKILL": ["BUSINESS", "BUSINESS DASHBOARDS"], "LANG": ["ENGLISH", "FRENCH"], "AGE": 29, "SALARY": 8},
    {"SKILL": ["MACHINE LEARNING", "TENSORFLOW"], "LANG": ["FRENCH", "INDONESIAN"], "AGE": 39, "SALARY": 10},
    {"SKILL": ["AGILE", "ALGORITHM"], "LANG": ["INDONESIAN", "ENGLISH", "FRENCH", "MANDARIN", "JAPANESE"], "AGE": 43, "SALARY": 17},
    {"SKILL": ["DEBUGGING", "DEADLOCK"], "LANG": ["MANDARIN"], "AGE": 37, "SALARY": 12.5},
    {"SKILL": ["DOCKER", "EXPRESSJS"], "LANG": ["INDONESIAN"], "AGE": 17, "SALARY": 1.5},
    {"SKILL": ["MARKETING", "MARKETING AUTOMATION"], "LANG": ["INDONESIAN", "MANDARIN"], "AGE": 33, "SALARY": 10},
    {"SKILL": ["RELATIONAL DATABASE", "MYSQL"], "LANG": ["INDONESIAN", "FRENCH"], "AGE": 42, "SALARY": 14.5},
    {"SKILL": ["TELECOMMUNICATIONS", "MULTIMEDIA"], "LANG": ["JAPANESE", "FRENCH"], "AGE": 51, "SALARY": 20},
    {"SKILL": ["BLOCKCHAIN"], "LANG": ["INDONESIAN", "ENGLISH"], "AGE": 22, "SALARY": 3.5},
    {"SKILL": ["ANGULAR", ".NET", "APACHE"], "LANG": ["INDONESIAN", "ENGLISH", "FRENCH"], "AGE": 25, "SALARY": 6},
    {"SKILL": ["NODE.JS", "NESTJS", "REACT"], "LANG": ["ENGLISH", "MANDARIN"], "AGE": 18, "SALARY": 2},
    {"SKILL": ["ASP.NET", "C#", "REACT"], "LANG": ["MANDARIN", "INDONESIAN"], "AGE": 22, "SALARY": 4}
]

data_filter = [
    {"ageFilter": [23, 27], "tolerance": 5, "skillFilter": ["C", "C++", "Java"], "langFilter": ["English", "Mandarin", "Javanese"], "salaryFilter": [3, 12], "tol": 1},
    {"ageFilter": [23, 27], "tolerance": 5, "skillFilter": ["C", "C++", "Java"], "langFilter": ["English", "Mandarin"], "salaryFilter": [3, 12], "tol": 1},
    {"ageFilter": [23, 27], "tolerance": 5, "skillFilter": ["C", "Java"], "langFilter": ["English", "Mandarin", "Javanese"], "salaryFilter": [3, 12], "tol": 1},
    {"ageFilter": [23, 27], "tolerance": 5, "skillFilter": ["Java"], "langFilter": ["Javanese"], "salaryFilter": [3, 12], "tol": 1},
    {"ageFilter": [19, 34], "tolerance": 3, "skillFilter": ["R", "XML", "Matlab", "Python"], "langFilter": ["Indonesian", "English", "Japanese"], "salaryFilter": [8.5, 15.2], "tol": 2.3},
    {"ageFilter": [16, 51], "tolerance": 6, "skillFilter": ["PHP", "C++", "Java", "Python"], "langFilter": ["French", "English"], "salaryFilter": [0.0, 7.8], "tol": 0.8},
    {"ageFilter": [28, 45], "tolerance": 7, "skillFilter": ["Python", "Linux", "Machine Learning", "Tensorflow"], "langFilter": ["Indonesian", "English", "French"], "salaryFilter": [4.3, 19.5], "tol": 2.5},
    {"ageFilter": [18, 32], "tolerance": 2, "skillFilter": ["Kotlin", "Dart", "ASP.NET", "C#", "Angular"], "langFilter": ["Indonesian", "French", "Mandarin"], "salaryFilter": [1.8, 11.7], "tol": 1.1},
    {"ageFilter": [31, 39], "tolerance": 4, "skillFilter": ["Java", "Bash", "Linux", "C#", "ASP.NET"], "langFilter": ["Indonesian", "English"], "salaryFilter": [6.9, 14.3], "tol": 1.7},
    {"ageFilter": [23, 42], "tolerance": 6, "skillFilter": ["Java", "Matlab", "Tensorflow", "Machine Learning", "Python"], "langFilter": ["French", "English", "Mandarin"], "salaryFilter": [2.7, 13.5], "tol": 1.5},
    {"ageFilter": [22, 29], "tolerance": 1, "skillFilter": ["Java", "C++", "C#", "Python"], "langFilter": ["Indonesian"], "salaryFilter": [3.5, 10.8], "tol": 0.6},
    {"ageFilter": [25, 35], "tolerance": 4, "skillFilter": ["Python", "Kubernetes", "Cloud Hosting"], "langFilter": ["English", "French"], "salaryFilter": [5.2, 16.4], "tol": 2.1},
    {"ageFilter": [20, 30], "tolerance": 3, "skillFilter": ["C", "C++", "Java"], "langFilter": ["Mandarin"], "salaryFilter": [2.0, 8.7], "tol": 0.9},
    {"ageFilter": [28, 45], "tolerance": 6, "skillFilter": ["Python", "Machine Learning", "Tensorflow", "Kotlin"], "langFilter": ["English"], "salaryFilter": [6.8, 14.1], "tol": 1.3},
    {"ageFilter": [24, 31], "tolerance": 2, "skillFilter": ["PHP", "CodeIgniter", "Laravel", "Bash", "Linux"], "langFilter": ["French", "Japanese"], "salaryFilter": [1.4, 9.9], "tol": 1.0},
    {"ageFilter": [30, 40], "tolerance": 5, "skillFilter": ["Java", "C#", "Angular", "React"], "langFilter": ["English", "Japanese"], "salaryFilter": [4.9, 13.7], "tol": 2.4},
    {"ageFilter": [22, 26], "tolerance": 4, "skillFilter": ["Java", "C++", "Python"], "langFilter": ["Indonesian"], "salaryFilter": [3.2, 10.5], "tol": 0.7},
    {"ageFilter": [25, 35], "tolerance": 3, "skillFilter": ["Python", "Machine Learning", "Tensorflow", "NestJS"], "langFilter": ["English", "Mandarin"], "salaryFilter": [6.0, 15.8], "tol": 1.8},
    {"ageFilter": [28, 40], "tolerance": 6, "skillFilter": ["C", "Java", "C#", "React"], "langFilter": ["French", "Mandarin"], "salaryFilter": [2.5, 11.9], "tol": 1.2},
    {"ageFilter": [19, 25], "tolerance": 2, "skillFilter": ["Python", "Matlab", "Tableau"], "langFilter": ["English"], "salaryFilter": [1.2, 9.6], "tol": 0.5},
    {"ageFilter": [30, 45], "tolerance": 5, "skillFilter": ["Java", "Cloud Hosting", "Kubernetes"], "langFilter": ["English", "Japanese"], "salaryFilter": [5.7, 14.6], "tol": 2.7},
    {"ageFilter": [23, 27], "tolerance": 4, "skillFilter": ["C++", "Unity", "Linux"], "langFilter": ["French"], "salaryFilter": [3.6, 10.3], "tol": 0.8},
    {"ageFilter": [26, 33], "tolerance": 3, "skillFilter": ["Python", "Tensorflow", "Machine Learning"], "langFilter": ["Mandarin", "Japanese"], "salaryFilter": [6.3, 15.4], "tol": 1.5},
    {"ageFilter": [22, 28], "tolerance": 6, "skillFilter": ["C++", "Java", "XML", "Agile"], "langFilter": ["English", "Mandarin"], "salaryFilter": [2.3, 9.7], "tol": 1.0},
    {"ageFilter": [27, 35], "tolerance": 5, "skillFilter": ["Python", "Java", "Machine Learning"], "langFilter": ["Indonesian", "English"], "salaryFilter": [5.1, 13.2], "tol": 2.3},
    {"ageFilter": [24, 32], "tolerance": 4, "skillFilter": ["Java", "React", "Angular"], "langFilter": ["Mandarin"], "salaryFilter": [4.5, 12.8], "tol": 1.7},
    {"ageFilter": [29, 40], "tolerance": 7, "skillFilter": ["Python", "Linux", "Telecommunication"], "langFilter": ["English", "French"], "salaryFilter": [3.9, 11.5], "tol": 1.4},
    {"ageFilter": [22, 26], "tolerance": 4, "skillFilter": ["Java", "C++", "Python"], "langFilter": ["Indonesian"], "salaryFilter": [3.2, 10.5], "tol": 0.7},
    {"ageFilter": [40, 50], "tolerance": 6, "skillFilter": ["Biochemical Engineering", "Python", "R", "Machine Learning"], "langFilter": ["English", "French"], "salaryFilter": [8.9, 17.6], "tol": 2.3},
    {"ageFilter": [36, 45], "tolerance": 5, "skillFilter": ["Biomedical Engineering", "Python", "Matlab", "Telecommunication"], "langFilter": ["English", "Mandarin"], "salaryFilter": [7.1, 16.3], "tol": 2.1},
    {"ageFilter": [38, 55], "tolerance": 7, "skillFilter": ["Biochemical Engineering", "Python", "Matlab", "Cloud Hosting"], "langFilter": ["English", "Japanese"], "salaryFilter": [9.2, 18.9], "tol": 2.7},
    {"ageFilter": [42, 48], "tolerance": 4, "skillFilter": ["Biomedical Engineering", "C++", "Java", "Unity"], "langFilter": ["English"], "salaryFilter": [6.7, 15.5], "tol": 1.9},
    {"ageFilter": [39, 53], "tolerance": 6, "skillFilter": ["Biochemical Engineering", "Python", "Matlab", "Agile"], "langFilter": ["English", "French"], "salaryFilter": [8.3, 17.2], "tol": 2.4},
    {"ageFilter": [44, 60], "tolerance": 7, "skillFilter": ["Biomedical Engineering", "Python", "R", "Machine Learning", "Tensorflow"], "langFilter": ["English"], "salaryFilter": [10.5, 20.0], "tol": 3.0},
    {"ageFilter": [22, 27], "tolerance": 3, "skillFilter": ["Python", "Matlab", "Agile"], "langFilter": ["English", "French"], "salaryFilter": [3.5, 10.8], "tol": 1.2},
    {"ageFilter": [31, 36], "tolerance": 4, "skillFilter": ["Java", "Python", "Agile"], "langFilter": ["English"], "salaryFilter": [5.2, 13.7], "tol": 1.7},
    {"ageFilter": [24, 29], "tolerance": 5, "skillFilter": ["Python", "Cloud Hosting", "Kubernetes"], "langFilter": ["English", "Mandarin"], "salaryFilter": [4.8, 12.5], "tol": 1.5},
    {"ageFilter": [35, 40], "tolerance": 4, "skillFilter": ["C++", "Java", "Tableau"], "langFilter": ["English"], "salaryFilter": [6.0, 15.6], "tol": 1.9},
    {"ageFilter": [26, 31], "tolerance": 6, "skillFilter": ["Java", "C#", "Mobile"], "langFilter": ["English", "Mandarin"], "salaryFilter": [4.6, 11.2], "tol": 1.3},
    {"ageFilter": [33, 38], "tolerance": 5, "skillFilter": ["Python", "Cloud Hosting", "Kubernetes"], "langFilter": ["English", "Japanese"], "salaryFilter": [5.9, 14.3], "tol": 1.6},
    {"ageFilter": [22, 27], "tolerance": 3, "skillFilter": ["C++", "Java", "Mobile"], "langFilter": ["English"], "salaryFilter": [3.8, 10.5], "tol": 1.1},
    {"ageFilter": [31, 36], "tolerance": 4, "skillFilter": ["Java", "Python", "Machine Learning"], "langFilter": ["English", "Mandarin"], "salaryFilter": [5.6, 13.1], "tol": 1.8},
    {"ageFilter": [24, 29], "tolerance": 5, "skillFilter": ["Python", "R", "Machine Learning"], "langFilter": ["English", "Japanese"], "salaryFilter": [4.2, 12.8], "tol": 1.4},
    {"ageFilter": [35, 40], "tolerance": 4, "skillFilter": ["Java", "XML", "Cloud Hosting"], "langFilter": ["English"], "salaryFilter": [6.4, 15.0], "tol": 1.7},
    {"ageFilter": [26, 31], "tolerance": 6, "skillFilter": ["C++", "Java", "Bash"], "langFilter": ["English", "Mandarin"], "salaryFilter": [4.5, 11.9], "tol": 1.3},
    {"ageFilter": [33, 38], "tolerance": 5, "skillFilter": ["Python", "R", "Tensorflow"], "langFilter": ["English", "Japanese"], "salaryFilter": [5.8, 14.1], "tol": 1.6},
    {"ageFilter": [22, 27], "tolerance": 3, "skillFilter": ["Java", "XML", "Kotlin"], "langFilter": ["English"], "salaryFilter": [3.9, 10.1], "tol": 1.0},
    {"ageFilter": [31, 36], "tolerance": 4, "skillFilter": ["Python", "Cloud Hosting", "Kubernetes"], "langFilter": ["English", "Mandarin"], "salaryFilter": [5.4, 13.3], "tol": 1.7},
    {"ageFilter": [24, 29], "tolerance": 5, "skillFilter": ["Java", "XML", "Kotlin"], "langFilter": ["English", "Japanese"], "salaryFilter": [4.0, 12.3], "tol": 1.4},
    {"ageFilter": [35, 40], "tolerance": 4, "skillFilter": ["Python", "R", "Tensorflow"], "langFilter": ["English"], "salaryFilter": [6.2, 14.8], "tol": 1.6},
    {"ageFilter": [26, 31], "tolerance": 6, "skillFilter": ["Java", "XML", "Kotlin"], "langFilter": ["English", "Mandarin"], "salaryFilter": [4.3, 11.6], "tol": 1.2},
    {"ageFilter": [33, 38], "tolerance": 5, "skillFilter": ["Python", "R", "Tensorflow"], "langFilter": ["English", "Japanese"], "salaryFilter": [5.6, 13.9], "tol": 1.5},
    {"ageFilter": [22, 27], "tolerance": 3, "skillFilter": ["Java", "XML", "Kotlin"], "langFilter": ["English"], "salaryFilter": [3.7, 9.8], "tol": 0.9}
]

data_filter_train, data_filter_test = train_test_split(data_filter, train_size=0.80, shuffle=True, random_state=1)
data_appl_train, data_appl_test = train_test_split(data_appl, train_size=0.80, shuffle=True, random_state=1)

x_train = []
y_train = []

for _ in range(len(data_filter_train)):
    x_train.append([])
    y_train.append([])
    
i = 0
for filt in data_filter_train:
    for person in data_appl_train:
        sk = score_skill(person["SKILL"], filt["skillFilter"])
        lg = score_lang(person["LANG"], filt["langFilter"])
        ag = score_age(person["AGE"], filt["ageFilter"], filt["tolerance"])
        sl = score_salary(person["SALARY"], filt["salaryFilter"], filt["tol"])
        total = sk+lg+ag+sl
        x_train[i].append([sk, lg, ag, sl])
        y_train[i].append(total)
    i = i+1

x_test = []
y_test = []

for _ in range(len(data_filter_test)):
    x_test.append([])
    y_test.append([])
    
i = 0
for filt in data_filter_test:
    for person in data_appl_test:
        sk = score_skill(person["SKILL"], filt["skillFilter"])
        lg = score_lang(person["LANG"], filt["langFilter"])
        ag = score_age(person["AGE"], filt["ageFilter"], filt["tolerance"])
        sl = score_salary(person["SALARY"], filt["salaryFilter"], filt["tol"])
        total = sk+lg+ag+sl
        x_test[i].append([sk, lg, ag, sl])
        y_test[i].append(total)
    i = i+1

### Making #1 Model: Simple Model for Scoring

In [8]:
x_train = np.array(x_train).reshape(-1, 4)
y_train = np.array(y_train).reshape(-1, 1)

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(16, activation='relu', input_shape=np.array(x_train[0]).shape),
    tf.keras.layers.Dense(1, activation='relu')
])

model.compile(optimizer='adam', loss='mse')
model.fit(x_train, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x2870c3e5a60>

In [9]:
x_test = np.array(x_test).reshape(-1, 4)
y_test = np.array(y_test).reshape(-1, 1)

model.evaluate(x_test, y_test)



1.656195308896713e-05

In [10]:
data_appl_demo = [
    {"SKILL": ["C", "JAVA", "PYTHON", "TENSORFLOW", "C#"], "LANG": ["ENGLISH", "MANDARIN", "JAVANESE"], "AGE": 23, "SALARY": 3.5}
]
data_filter_demo = [
    {"ageFilter": [23, 27], "tolerance": 5, "skillFilter": ["C", "C++", "Java"], "langFilter": ["English", "Mandarin", "Javanese"], "salaryFilter": [3, 12], "tol": 1}
]

x_demo = []
y_demo = []

for _ in range(len(data_filter_demo)):
    x_demo.append([])
    y_demo.append([])
    
i = 0
for filt in data_filter_demo:
    for person in data_appl_demo:
        sk = score_skill(person["SKILL"], filt["skillFilter"])
        lg = score_lang(person["LANG"], filt["langFilter"])
        ag = score_age(person["AGE"], filt["ageFilter"], filt["tolerance"])
        sl = score_salary(person["SALARY"], filt["salaryFilter"], filt["tol"])
        total = sk+lg+ag+sl
        x_demo[i].append([sk, lg, ag, sl])
        y_demo[i].append(total)
    i = i+1

print(y_demo[0][0])

x_demo = np.array(x_demo).reshape(-1, 4)
y_pred = model.predict(x_demo)

print(y_pred)

3.67
[[3.6769288]]


### Save Model and Convert It to TFLite File

In [11]:
export_dir = './my_model'

tf.saved_model.save(model, export_dir=export_dir)

optimization = tf.lite.Optimize.DEFAULT

converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
converter.optimizations = [optimization]

tflite_model = converter.convert()

tflite_model_file = pathlib.Path('./model.tflite')
tflite_model_file.write_bytes(tflite_model)



INFO:tensorflow:Assets written to: ./my_model\assets


INFO:tensorflow:Assets written to: ./my_model\assets


1872

---
## 2. Experimental Work

### Initializing Data for #2 Model Train and Test

In [None]:
from gensim.models import Word2Vec

x_appl_train = []
x_filter_train = []
x_af_train = []
y_train = []

for _ in range(len(data_filter_train)):
    x_appl_train.append([])
    x_filter_train.append([])
    x_af_train.append([])
    y_train.append([])

i = 0
for filt in data_filter_train:
    j = 0
    for person in data_appl_train:
        sentences = [skill.split() for skill in person["SKILL"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[skill.split()].mean() for skill in person["SKILL"]]
        skill_emb = np.mean(embeddings)
        
        sentences = [lang.split() for lang in person["LANG"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[lang].mean() for lang in person["LANG"]]
        lang_emb = np.mean(embeddings)
        
        sentences = [skill.split() for skill in filt["skillFilter"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[skill.split()].mean() for skill in filt["skillFilter"]]
        skillf_emb = np.mean(embeddings)
        
        sentences = [lang.split() for lang in filt["langFilter"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[lang].mean() for lang in filt["langFilter"]]
        langf_emb = np.mean(embeddings)
        
        sk = score_skill(person["SKILL"], filt["skillFilter"])
        lg = score_lang(person["LANG"], filt["langFilter"])
        ag = score_age(person["AGE"], filt["ageFilter"], filt["tolerance"])
        sl = score_salary(person["SALARY"], filt["salaryFilter"], filt["tol"])
        total = sk+lg+ag+sl
        x_appl_train[i].append([skill_emb, lang_emb])
        x_filter_train[i].append([skillf_emb, langf_emb])
        x_af_train[i].append([ag, sl])
        y_train[i].append(total)
    i = i+1
    
x_appl_test = []
x_af_test = []
x_filter_test = []
y_test = []

for _ in range(len(data_filter_test)):
    x_appl_test.append([])
    x_af_test.append([])
    x_filter_test.append([])
    y_test.append([])
    
i = 0
for filt in data_filter_test:
    for person in data_appl_test:
        sentences = [skill.upper().split() for skill in person["SKILL"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[skill.upper().split()].mean() for skill in person["SKILL"]]
        skill_emb = np.mean(embeddings)
        
        sentences = [lang.upper().split() for lang in person["LANG"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[lang.upper().split()].mean() for lang in person["LANG"]]
        lang_emb = np.mean(embeddings)
        
        sentences = [skill.upper().split() for skill in filt["skillFilter"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[skill.upper().split()].mean() for skill in filt["skillFilter"]]
        skillf_emb = np.mean(embeddings)
        
        sentences = [lang.upper().split() for lang in filt["langFilter"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[lang.upper().split()].mean() for lang in filt["langFilter"]]
        langf_emb = np.mean(embeddings)
        
        sk = score_skill(person["SKILL"], filt["skillFilter"])
        lg = score_lang(person["LANG"], filt["langFilter"])
        ag = score_age(person["AGE"], filt["ageFilter"], filt["tolerance"])
        sl = score_salary(person["SALARY"], filt["salaryFilter"], filt["tol"])
        total = sk+lg+ag+sl
        x_appl_test[i].append([skill_emb, lang_emb])
        x_af_test[i].append([ag, sl])
        x_filter_test[i].append([skillf_emb, langf_emb])
        y_test[i].append(total)
    i = i+1

### Making #2 Model: More Complicated Model

In [None]:
from sklearn.preprocessing import StandardScaler

x_appl_train = np.array(x_appl_train).reshape(-1, 2)
x_af_train = np.array(x_af_train).reshape(-1, 2)
x_filter_train = np.array(x_filter_train).reshape(-1, 2)
y_train = np.array(y_train).reshape(-1, 1)

scalerAppl = StandardScaler()
scalerAppl.fit(x_appl_train)
x_appl_train = scalerAppl.transform(x_appl_train)

scalerAF = StandardScaler()
scalerAF.fit(x_af_train)
x_af_train = scalerAF.transform(x_af_train)

scalerFilter = StandardScaler()
scalerFilter.fit(x_filter_train)
x_filter_train = scalerFilter.transform(x_filter_train)

num_outputs = 32
appl_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs)
])

af_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs)
])

filter_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs)  
])

input_appl = tf.keras.layers.Input(shape=(x_appl_train[0].shape))
vu = appl_NN(input_appl)
vu = tf.linalg.l2_normalize(vu, axis=1)

input_af = tf.keras.layers.Input(shape=(x_af_train[0].shape))
vx = af_NN(input_af)
vx = tf.linalg.l2_normalize(vx, axis=1)

input_filter = tf.keras.layers.Input(shape=(x_filter_train[0].shape))
vm = filter_NN(input_filter)
vm = tf.linalg.l2_normalize(vm, axis=1)

concatenated_tensor = tf.keras.layers.Concatenate()([vu, vx, vm])
output = tf.keras.layers.Dense(1, activation=None)(concatenated_tensor)

model = tf.keras.Model([input_appl, input_af, input_filter], output)
model.compile(optimizer='adagrad', loss='mse')
model.fit([x_appl_train, x_af_train, x_filter_train], y_train, epochs=100)

In [None]:
x_appl_test = np.array(x_appl_test).reshape(-1, 2)
x_af_test = np.array(x_af_test).reshape(-1, 2)
x_filter_test = np.array(x_filter_test).reshape(-1, 2)
y_test = np.array(y_test).reshape(-1, 1)

x_appl_test = scalerAppl.transform(x_appl_test)
x_af_test = scalerAF.transform(x_af_test)
x_filter_test = scalerFilter.transform(x_filter_test)

model.evaluate([x_appl_test, x_af_test, x_filter_test], y_test)

In [None]:
data_appl_demo = [
    {"SKILL": ["C++", "PYTHON", "TENSORFLOW", "R", "JAVA"], "LANG": ["SPANISH"], "AGE": 23, "SALARY": 10}
]
data_filter_demo = [
    {"ageFilter": [23, 27], "tolerance": 5, "skillFilter": ["C", "C++", "Java"], "langFilter": ["English", "Haiti", "Spanish"], "salaryFilter": [3, 12], "tol": 1}
]

x_appl_demo = []
x_af_demo = []
x_filter_demo = []
y_demo = []

for _ in range(len(data_filter_demo)):
    x_appl_demo.append([])
    x_af_demo.append([])
    x_filter_demo.append([])
    y_demo.append([])
    
i = 0
for filt in data_filter_demo:
    j = 0
    for person in data_appl_demo:
        sentences = [skill.split() for skill in person["SKILL"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[skill.split()].mean() for skill in person["SKILL"]]
        skill_emb = np.mean(embeddings)
        
        sentences = [lang.split() for lang in person["LANG"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[lang].mean() for lang in person["LANG"]]
        lang_emb = np.mean(embeddings)
        
        sentences = [skill.split() for skill in filt["skillFilter"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[skill.split()].mean() for skill in filt["skillFilter"]]
        skillf_emb = np.mean(embeddings)
        
        sentences = [lang.split() for lang in filt["langFilter"]]
        model_vec = Word2Vec(sentences, min_count=1)
        embeddings = [model_vec.wv[lang].mean() for lang in filt["langFilter"]]
        langf_emb = np.mean(embeddings)
        
        sk = score_skill(person["SKILL"], filt["skillFilter"])
        lg = score_lang(person["LANG"], filt["langFilter"])
        ag = score_age(person["AGE"], filt["ageFilter"], filt["tolerance"])
        sl = score_salary(person["SALARY"], filt["salaryFilter"], filt["tol"])
        total = sk+lg+ag+sl
        x_appl_demo[i].append([skill_emb, lang_emb])
        x_af_demo[i].append([ag, sl])
        x_filter_demo[i].append([skillf_emb, langf_emb])
        y_demo[i].append(total)
        j = j+1
    i = i+1

print(y_demo[0][0])

x_appl_demo = np.array(x_appl_demo).reshape(-1, 2)
x_af_demo = np.array(x_af_demo).reshape(-1, 2)
x_filter_demo = np.array(x_filter_demo).reshape(-1, 2)
x_appl_demo = scalerAppl.transform(x_appl_demo)
x_af_demo = scalerAF.transform(x_af_demo)
x_filter_demo = scalerFilter.transform(x_filter_demo)

y_pred = model.predict([x_appl_demo, x_af_demo, x_filter_demo])

print(y_pred)