In [102]:
!pip install pandas
!pip install numpy
!pip install scikit-learn



In [103]:
!pip install nltk



In [104]:
!pip install wordtodigits
!pip install inflect



In [105]:
!pip install absl-py
!pip install tensorflow
!pip install tensorflow_hub



In [106]:
#Importing libraries 
import pandas as pd
import numpy as np
import nltk #module for text preprocessing
import wordtodigits
import inflect #module to convert plural text into singular
import tensorflow_hub as hub #module to get pretrained tensorflow model
from sklearn.metrics.pairwise import cosine_similarity

# Employee Preference Data Analysis

In [107]:
Employee_Preference_Data = pd.read_csv('CCMLEmployeeData.csv')
Employee_Preference_Data

Unnamed: 0,Name,Domain,Event1,Event2
0,Bryan Brock,Management,Jobs,Hackathons
1,Joseph Sullivan,Coding,Certifications,Webinars
2,Sherri Dawson,Security,Internships,Fests
3,Dustin Ferguson,Hardware,Competitions,Webinars
4,Kayla Young,Web Development,Expos,Certifications
...,...,...,...,...
178,Katelyn Barnes,Coding,Courses,Webinars
179,Gabriel Cruz,Other,Talks,Fests
180,Christina Williams,Software Architecture,Talks,Fests
181,William Turner,Development Processes,Trainings,Seminars


In [108]:
Domain = sorted(Employee_Preference_Data.Domain.unique())
#print(str(len(Domain)) + ' Total Domains')
Domain

['Artificial Intelligence',
 'Blockchain',
 'C',
 'C++',
 'Cloud Computing',
 'Coding',
 'Data Science',
 'Development Processes',
 'Finance',
 'Hardware',
 'Higher Education',
 'IoT',
 'Java',
 'JavaScript',
 'Machine Learning',
 'Management',
 'Mobile Applications',
 'Networking',
 'Other',
 'Python',
 'Security',
 'Software Architecture',
 'Web Development']

In [109]:
Event1 = Employee_Preference_Data.Event1.unique()
Event1

array(['Jobs', 'Certifications', 'Internships', 'Competitions', 'Expos', 'Workshops', 'Trainings', 'Seminars', 'Hackathons', 'Fests', 'Talks', 'Webinars', 'Courses'], dtype=object)

In [110]:
Event2 = Employee_Preference_Data.Event2.unique()
Event2

array(['Hackathons', 'Webinars', 'Fests', 'Certifications', 'Seminars', 'Courses', 'Expos', 'Jobs', 'Trainings', 'Internships', 'Competitions', 'Talks', 'Workshops'], dtype=object)

In [111]:
Events = set(Event1) | set(Event2)
Events = sorted(list(Events))
#print(str(len(Events))+ ' Total Events')
Events

['Certifications',
 'Competitions',
 'Courses',
 'Expos',
 'Fests',
 'Hackathons',
 'Internships',
 'Jobs',
 'Seminars',
 'Talks',
 'Trainings',
 'Webinars',
 'Workshops']

In [112]:
pd.options.display.max_colwidth=10000

# Preprocessing Input Events

In [113]:
input = pd.read_csv('input.csv')
input

Unnamed: 0,input_events
0,Get a System Administration certification from PurpleHat today.
1,Lockdown special courses on Ydemi. 22 hours left!
2,CodeBoost codeathon is live now!
3,Attend the Computer Vision and Deep Learning Summit tomorrow
4,AI & Big Data Expo in 36 hours
5,Book Online Tickets for workshops on C programming
6,Spoonshot is hiring a Work From Home - Data Science Internship for 6 months in Bengaluru
7,Java Training in Electronic city Bangalore
8,Ted Talk in the field of Security
9,Arrange a Machine Learning Workshop and Internship


In [114]:
preprocess = pd.DataFrame(columns=['input'])
preprocess['input'] = input.iloc[:,0] #df.iloc[rows,columns]  (: means seecting all values of that dimension)
#Lower Casing
preprocess['input'] = preprocess['input'].str.lower()
preprocess

Unnamed: 0,input
0,get a system administration certification from purplehat today.
1,lockdown special courses on ydemi. 22 hours left!
2,codeboost codeathon is live now!
3,attend the computer vision and deep learning summit tomorrow
4,ai & big data expo in 36 hours
5,book online tickets for workshops on c programming
6,spoonshot is hiring a work from home - data science internship for 6 months in bengaluru
7,java training in electronic city bangalore
8,ted talk in the field of security
9,arrange a machine learning workshop and internship


In [115]:
#Convert word numbers to digit
preprocess['input'] = preprocess['input'].apply(lambda row: wordtodigits.convert(row))
preprocess

Unnamed: 0,input
0,get a system administration certification from purplehat today.
1,lockdown special courses on ydemi. 22 hours left!
2,codeboost codeathon is live now!
3,attend the computer vision and deep learning summit tomorrow
4,ai & big data expo in 36 hours
5,book online tickets for workshops on c programming
6,spoonshot is hiring a work from home - data science internship for 6 months in bengaluru
7,java training in electronic city bangalore
8,ted talk in the field of security
9,arrange a machine learning workshop and internship


In [116]:
#removing stopwords
nltk.download('stopwords') 
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
newStopWords = ["day","hour","month","days","months","hours"] #adding more stopwords
stop_words.extend(newStopWords)
preprocess['input'] = preprocess['input'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))
preprocess

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,input
0,get system administration certification purplehat today.
1,lockdown special courses ydemi. 22 left!
2,codeboost codeathon live now!
3,attend computer vision deep learning summit tomorrow
4,ai & big data expo 36
5,book online tickets workshops c programming
6,spoonshot hiring work home - data science internship 6 bengaluru
7,java training electronic city bangalore
8,ted talk field security
9,arrange machine learning workshop internship


In [117]:
#Removing punctuation marks with python RegEx(Regular Expression) 
# [] - A set of characters
# \w - Returns a match where the string contains any word characters (characters from a to Z, digits from 0-9, and the underscore _ character)
# \s - Returns a match where the string contains a white space character
preprocess['input'] = preprocess['input'].str.replace('[^\w\s]','')  
preprocess

Unnamed: 0,input
0,get system administration certification purplehat today
1,lockdown special courses ydemi 22 left
2,codeboost codeathon live now
3,attend computer vision deep learning summit tomorrow
4,ai big data expo 36
5,book online tickets workshops c programming
6,spoonshot hiring work home data science internship 6 bengaluru
7,java training electronic city bangalore
8,ted talk field security
9,arrange machine learning workshop internship


In [118]:
#removing numbers
preprocess['input'] = preprocess['input'].str.replace(r'\d+','')
preprocess

Unnamed: 0,input
0,get system administration certification purplehat today
1,lockdown special courses ydemi left
2,codeboost codeathon live now
3,attend computer vision deep learning summit tomorrow
4,ai big data expo
5,book online tickets workshops c programming
6,spoonshot hiring work home data science internship bengaluru
7,java training electronic city bangalore
8,ted talk field security
9,arrange machine learning workshop internship


In [119]:
nltk.download('punkt') #module required for ngrams tokenizer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [120]:
#FUNCTION to generate n-grams from sentences.
def extract_ngrams(data, num):
    n_grams = nltk.ngrams(nltk.word_tokenize(data), num)
    return [ ' '.join(grams) for grams in n_grams]

In [121]:
preprocessUnigram = pd.DataFrame(columns=['input'])
preprocessUnigram['input'] = preprocess['input'].apply(lambda row: extract_ngrams(row, 1))
preprocessUnigram

Unnamed: 0,input
0,"[get, system, administration, certification, purplehat, today]"
1,"[lockdown, special, courses, ydemi, left]"
2,"[codeboost, codeathon, live, now]"
3,"[attend, computer, vision, deep, learning, summit, tomorrow]"
4,"[ai, big, data, expo]"
5,"[book, online, tickets, workshops, c, programming]"
6,"[spoonshot, hiring, work, home, data, science, internship, bengaluru]"
7,"[java, training, electronic, city, bangalore]"
8,"[ted, talk, field, security]"
9,"[arrange, machine, learning, workshop, internship]"


In [122]:
preprocessBigram = pd.DataFrame(columns=['input'])
preprocessBigram['input'] = preprocess['input'].apply(lambda row: extract_ngrams(row, 2))
preprocessBigram

Unnamed: 0,input
0,"[get system, system administration, administration certification, certification purplehat, purplehat today]"
1,"[lockdown special, special courses, courses ydemi, ydemi left]"
2,"[codeboost codeathon, codeathon live, live now]"
3,"[attend computer, computer vision, vision deep, deep learning, learning summit, summit tomorrow]"
4,"[ai big, big data, data expo]"
5,"[book online, online tickets, tickets workshops, workshops c, c programming]"
6,"[spoonshot hiring, hiring work, work home, home data, data science, science internship, internship bengaluru]"
7,"[java training, training electronic, electronic city, city bangalore]"
8,"[ted talk, talk field, field security]"
9,"[arrange machine, machine learning, learning workshop, workshop internship]"


# Embedding of Preprocessed Input Events

In [123]:
#Loading Trained Text Embedding Model from Tensorflow
embed = hub.load("https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2")

In [124]:
#FUNCTION to convert strings in list to lowercase
def list_to_lowercase(lst):
    lst = list(map(lambda x: x.lower(), lst))
    return lst

In [125]:
#smaller case domains
domain = list_to_lowercase(Domain)
domain

['artificial intelligence',
 'blockchain',
 'c',
 'c++',
 'cloud computing',
 'coding',
 'data science',
 'development processes',
 'finance',
 'hardware',
 'higher education',
 'iot',
 'java',
 'javascript',
 'machine learning',
 'management',
 'mobile applications',
 'networking',
 'other',
 'python',
 'security',
 'software architecture',
 'web development']

In [126]:
p = inflect.engine() 

In [127]:
#smaller case events and then convert to singular
event = list_to_lowercase(Events)
for i in range(len(event)):
    event[i] = p.singular_noun(event[i]) 
event

['certification',
 'competition',
 'course',
 'expo',
 'fest',
 'hackathon',
 'internship',
 'job',
 'seminar',
 'talk',
 'training',
 'webinar',
 'workshop']

In [128]:
#FUNCTION to Convert List to Numpy array with only 1 value in 1 row for Embedding input
def convert_LISTtoNUMPY_for_Embedding(list):
    array = np.array(list).reshape(-1,1) #shape(-1,1) -> (respective_no_of_rows, 1 column)
    return array

In [129]:
#Convert Domain list to Numpy array with each value in one row for Embedding input
domain_array = convert_LISTtoNUMPY_for_Embedding(domain)
domain_array

array([['artificial intelligence'],
       ['blockchain'],
       ['c'],
       ['c++'],
       ['cloud computing'],
       ['coding'],
       ['data science'],
       ['development processes'],
       ['finance'],
       ['hardware'],
       ['higher education'],
       ['iot'],
       ['java'],
       ['javascript'],
       ['machine learning'],
       ['management'],
       ['mobile applications'],
       ['networking'],
       ['other'],
       ['python'],
       ['security'],
       ['software architecture'],
       ['web development']], dtype='<U23')

In [130]:
#Convert Event list to Numpy array with each value in one row for Embedding input
event_array = convert_LISTtoNUMPY_for_Embedding(event)
event_array

array([['certification'],
       ['competition'],
       ['course'],
       ['expo'],
       ['fest'],
       ['hackathon'],
       ['internship'],
       ['job'],
       ['seminar'],
       ['talk'],
       ['training'],
       ['webinar'],
       ['workshop']], dtype='<U13')

In [131]:
#FUNCTION to generate Embeddings for domain, events
def Lst_embed(Lst , array):
    Lst_embedding={}
    for i in range(len(array)):
        Lst_embedding[Lst[i]] = embed(array[i]).numpy()
    return Lst_embedding

In [132]:
np.set_printoptions(edgeitems=5, linewidth=100000) #adjusting np array printing option to display one row vectors in one line

In [133]:
# Domain names(lowercase) to vector
Domain_embedding = Lst_embed(Domain,domain_array)
Domain_embedding

{'Artificial Intelligence': array([[-0.02381875,  0.05594338, -0.07246269, -0.04354268, -0.04750569, -0.04718925, -0.00347527, -0.24177687, -0.03963751,  0.10682512,  0.06793644, -0.04864619, -0.01375441,  0.05176971, -0.19391337,  0.01247761, -0.04312132, -0.05439579,  0.03666145,  0.05388505,  0.10056886,  0.02378981,  0.03047369, -0.08116286,  0.08467728,  0.20824097,  0.1531769 ,  0.13950138,  0.09806154,  0.02888711, -0.04995367, -0.11170217,  0.05676451,  0.01363306,  0.04906087,  0.05782437,  0.06803175, -0.09751923,  0.03738473,  0.00217713, -0.06354721,  0.04211215, -0.0572245 ,  0.0718331 , -0.05395149, -0.06211198, -0.05455518,  0.01159068,  0.12673263,  0.0441965 ,  0.11889018, -0.00200013,  0.1356021 , -0.06100192, -0.04036981,  0.15302859, -0.01678325,  0.0034187 , -0.05341627,  0.00181504,  0.12422847, -0.05650365, -0.00933342,  0.01072138, -0.07174584,  0.11136372,  0.00601638, -0.00930537, -0.02702697,  0.00130041, -0.03623686, -0.05587099,  0.11041072,  0.05636852,  0

In [134]:
Event_embedding = Lst_embed(Events, event_array)
Event_embedding

{'Certifications': array([[-0.02622697,  0.1323314 , -0.07933656, -0.05958174,  0.02174166,  0.01774574,  0.00393504,  0.15573442,  0.03231861,  0.15558924, -0.02188332,  0.05177488,  0.03396592,  0.0251112 ,  0.04587876,  0.02054276,  0.05123631,  0.00960169,  0.08924967,  0.02350604,  0.2586215 , -0.01055706, -0.03316978,  0.04662339, -0.01685476, -0.05775296, -0.05638898, -0.05432487, -0.00763475, -0.01671426,  0.03887858, -0.16345112,  0.07165498,  0.02070082, -0.0658092 ,  0.08015142,  0.02607125, -0.07549284,  0.05855378, -0.13110675, -0.04210997, -0.03396124, -0.12198041, -0.08546801, -0.05965901,  0.04859266, -0.05623327, -0.05659505,  0.11494159,  0.1686635 , -0.01245608, -0.13544455,  0.09639972, -0.08678984, -0.07847134,  0.00334613, -0.00114855,  0.09728484, -0.02478104, -0.09510131,  0.10785009,  0.10539845,  0.06441829, -0.0618402 , -0.00102796, -0.08595154,  0.1213435 , -0.02296631, -0.06114709,  0.07895839,  0.03255863, -0.0283063 ,  0.01853368,  0.11436556,  0.09289436

# Extracting Domains & Events from input   

In [135]:
#FUNCTION for Domain_Recommendation for respective ngrams
def Domain_Recommendation_ngram(preprocess_ngram, threshold):
    Domain_Recommendation_ngram = {}
    for j in range(len(preprocess_ngram['input'])):
        word2vec_input_event = embed(preprocess_ngram['input'][j]).numpy()
        domain_recommend = [] #list to keep domains matched to input event
        for i in range(len(Domain)):
            similarity_index = cosine_similarity(word2vec_input_event, Domain_embedding[Domain[i]])
            for value in np.nditer(similarity_index):
                if value >= threshold :
                    domain_recommend.append(Domain[i])
                    break
        if not domain_recommend:
            domain_recommend.append('Other') #if no domain recommended
        Domain_Recommendation_ngram[input.iloc[:,0][j]] = domain_recommend
    return Domain_Recommendation_ngram

In [136]:
Domain_Recommendation_Bigram = Domain_Recommendation_ngram(preprocessBigram, 0.5)
Domain_Recommendation_Bigram

{'AI & Big Data Expo in 36 hours': ['Data Science'],
 'Advanced Certification in Open Banking from July 2020': ['Other'],
 'Arrange a Machine Learning Workshop and Internship ': ['Machine Learning',
  'Software Architecture'],
 'Attend the Computer Vision and Deep Learning Summit tomorrow': ['Data Science',
  'Machine Learning',
  'Software Architecture'],
 'Bolt IoT and ML Innovation Training in August': ['Development Processes',
  'IoT',
  'Software Architecture'],
 'Book Online Tickets for workshops on C programming': ['C'],
 'CodeBoost codeathon is live now!': ['Other'],
 'Digital Marketing Workshop and Webinar ': ['Web Development'],
 "Don't miss the Web Development Course in Pune": ['Development Processes',
  'Software Architecture',
  'Web Development'],
 'Four days of hands-on security training followed by the two-day main conference covering the information security research': ['Artificial Intelligence',
  'Data Science',
  'Management',
  'Security',
  'Software Architecture'

In [137]:
Domain_Recommendation_Unigram = Domain_Recommendation_ngram(preprocessUnigram,0.5)
Domain_Recommendation_Unigram

{'AI & Big Data Expo in 36 hours': ['Data Science'],
 'Advanced Certification in Open Banking from July 2020': ['Finance'],
 'Arrange a Machine Learning Workshop and Internship ': ['Hardware',
  'Machine Learning',
  'Software Architecture'],
 'Attend the Computer Vision and Deep Learning Summit tomorrow': ['Data Science',
  'Hardware',
  'Machine Learning',
  'Software Architecture'],
 'Bolt IoT and ML Innovation Training in August': ['Development Processes',
  'IoT',
  'Software Architecture'],
 'Book Online Tickets for workshops on C programming': ['C'],
 'CodeBoost codeathon is live now!': ['Other'],
 'Digital Marketing Workshop and Webinar ': ['Management'],
 "Don't miss the Web Development Course in Pune": ['Development Processes',
  'Management',
  'Web Development'],
 'Four days of hands-on security training followed by the two-day main conference covering the information security research': ['Data Science',
  'Security'],
 'Get a System Administration certification from Purple

In [138]:
def Combine_Unigram_Bigram(unigram,bigram):
    ds = [unigram, bigram]
    d = {}
    for k in unigram.keys():
        d[k] = np.unique(np.concatenate(list(d[k] for d in ds))).tolist()
    return d

In [139]:
Domain_Recommendation = Combine_Unigram_Bigram(Domain_Recommendation_Unigram, Domain_Recommendation_Bigram)
Domain_Recommendation

{'AI & Big Data Expo in 36 hours': ['Data Science'],
 'Advanced Certification in Open Banking from July 2020': ['Finance', 'Other'],
 'Arrange a Machine Learning Workshop and Internship ': ['Hardware',
  'Machine Learning',
  'Software Architecture'],
 'Attend the Computer Vision and Deep Learning Summit tomorrow': ['Data Science',
  'Hardware',
  'Machine Learning',
  'Software Architecture'],
 'Bolt IoT and ML Innovation Training in August': ['Development Processes',
  'IoT',
  'Software Architecture'],
 'Book Online Tickets for workshops on C programming': ['C'],
 'CodeBoost codeathon is live now!': ['Other'],
 'Digital Marketing Workshop and Webinar ': ['Management', 'Web Development'],
 "Don't miss the Web Development Course in Pune": ['Development Processes',
  'Management',
  'Software Architecture',
  'Web Development'],
 'Four days of hands-on security training followed by the two-day main conference covering the information security research': ['Artificial Intelligence',
  'D

In [140]:
#FUNCTION for Event_Recommendation for respective ngrams
def Event_Recommendation_ngram(preprocess_ngram, threshold):
    Event_Recommendation_ngram = {}
    for j in range(len(preprocess_ngram['input'])):
        word2vec_input_event = embed(preprocess_ngram['input'][j]).numpy()
        event_recommend = [] #list to keep event_type matched to input text event
        for i in range(len(Events)):
            similarity_index = cosine_similarity(word2vec_input_event, Event_embedding[Events[i]])
            for value in np.nditer(similarity_index):
                if value >= threshold :
                    event_recommend.append(Events[i])
                    break
        Event_Recommendation_ngram[input.iloc[:,0][j]] = event_recommend
    return Event_Recommendation_ngram

In [141]:
Event_Recommendation_Bigram = Event_Recommendation_ngram(preprocessBigram, 0.5)
Event_Recommendation_Bigram

{'AI & Big Data Expo in 36 hours': ['Expos',
  'Seminars',
  'Webinars',
  'Workshops'],
 'Advanced Certification in Open Banking from July 2020': ['Certifications'],
 'Arrange a Machine Learning Workshop and Internship ': ['Expos',
  'Internships',
  'Jobs',
  'Seminars',
  'Trainings',
  'Webinars',
  'Workshops'],
 'Attend the Computer Vision and Deep Learning Summit tomorrow': ['Seminars',
  'Workshops'],
 'Bolt IoT and ML Innovation Training in August': ['Certifications',
  'Trainings'],
 'Book Online Tickets for workshops on C programming': ['Expos', 'Workshops'],
 'CodeBoost codeathon is live now!': [],
 'Digital Marketing Workshop and Webinar ': ['Expos',
  'Seminars',
  'Webinars',
  'Workshops'],
 "Don't miss the Web Development Course in Pune": ['Courses'],
 'Four days of hands-on security training followed by the two-day main conference covering the information security research': ['Seminars',
  'Trainings'],
 'Get a System Administration certification from PurpleHat today.

In [142]:
Event_Recommendation_Unigram = Event_Recommendation_ngram(preprocessUnigram,0.5)
Event_Recommendation_Unigram

{'AI & Big Data Expo in 36 hours': ['Expos', 'Fests', 'Seminars', 'Workshops'],
 'Advanced Certification in Open Banking from July 2020': ['Certifications'],
 'Arrange a Machine Learning Workshop and Internship ': ['Expos',
  'Internships',
  'Jobs',
  'Seminars',
  'Trainings',
  'Webinars',
  'Workshops'],
 'Attend the Computer Vision and Deep Learning Summit tomorrow': ['Seminars',
  'Trainings'],
 'Bolt IoT and ML Innovation Training in August': ['Competitions',
  'Trainings'],
 'Book Online Tickets for workshops on C programming': ['Expos',
  'Seminars',
  'Webinars',
  'Workshops'],
 'CodeBoost codeathon is live now!': [],
 'Digital Marketing Workshop and Webinar ': ['Expos',
  'Seminars',
  'Webinars',
  'Workshops'],
 "Don't miss the Web Development Course in Pune": ['Courses'],
 'Four days of hands-on security training followed by the two-day main conference covering the information security research': ['Expos',
  'Seminars',
  'Trainings',
  'Webinars',
  'Workshops'],
 'Get 

In [143]:
Event_Recommendation = Combine_Unigram_Bigram(Event_Recommendation_Unigram, Event_Recommendation_Bigram)
Event_Recommendation

{'AI & Big Data Expo in 36 hours': ['Expos',
  'Fests',
  'Seminars',
  'Webinars',
  'Workshops'],
 'Advanced Certification in Open Banking from July 2020': ['Certifications'],
 'Arrange a Machine Learning Workshop and Internship ': ['Expos',
  'Internships',
  'Jobs',
  'Seminars',
  'Trainings',
  'Webinars',
  'Workshops'],
 'Attend the Computer Vision and Deep Learning Summit tomorrow': ['Seminars',
  'Trainings',
  'Workshops'],
 'Bolt IoT and ML Innovation Training in August': ['Certifications',
  'Competitions',
  'Trainings'],
 'Book Online Tickets for workshops on C programming': ['Expos',
  'Seminars',
  'Webinars',
  'Workshops'],
 'CodeBoost codeathon is live now!': [],
 'Digital Marketing Workshop and Webinar ': ['Expos',
  'Seminars',
  'Webinars',
  'Workshops'],
 "Don't miss the Web Development Course in Pune": ['Courses'],
 'Four days of hands-on security training followed by the two-day main conference covering the information security research': ['Expos',
  'Seminar

# Mapping Recommendations with Employees and their preference

In [144]:
Employee_Preference_Data

Unnamed: 0,Name,Domain,Event1,Event2
0,Bryan Brock,Management,Jobs,Hackathons
1,Joseph Sullivan,Coding,Certifications,Webinars
2,Sherri Dawson,Security,Internships,Fests
3,Dustin Ferguson,Hardware,Competitions,Webinars
4,Kayla Young,Web Development,Expos,Certifications
...,...,...,...,...
178,Katelyn Barnes,Coding,Courses,Webinars
179,Gabriel Cruz,Other,Talks,Fests
180,Christina Williams,Software Architecture,Talks,Fests
181,William Turner,Development Processes,Trainings,Seminars


In [145]:
def find_employees_domain(recommendeddomains):
    return(Employee_Preference_Data['Name'].loc[Employee_Preference_Data['Domain'].isin(recommendeddomains)])

In [146]:
def find_employees_event(recommendedevents):
    return(Employee_Preference_Data['Name'].loc[Employee_Preference_Data['Event1'].isin(recommendedevents) | Employee_Preference_Data['Event2'].isin(recommendedevents)])

In [147]:
def pandas_to_string(employee_names_df):
    return ', '.join(employee_names_df.values.flatten().tolist())

In [148]:
#Mapping Recommendations with Employees and their preference
Recommendations = pd.DataFrame(columns=['input','Recommended_Employees'], index=input.index) #Empty dataframe with same no. of rows as no. of input events
for i in range(len(input)):
    Recommendations['input'][i] = input.iloc[i,0]
    recommended_domains = Domain_Recommendation[input.iloc[i,0]] # iloc[rows,columns]
    recommended_events = Event_Recommendation[input.iloc[i,0]]
    if (recommended_domains == ['Other']): #condition when no domain detected
        Recommendations['Recommended_Employees'][i] = pandas_to_string(find_employees_event(recommended_events)) #direct recommendation with event names
    else:
        DomainMatchedIndex = find_employees_domain(recommended_domains).index # Getting index of employees with recommended domains
        EventsMatchedIndex = find_employees_event(recommended_events).index #Getting index of employees with recommended domains
        RecommendationIndex = DomainMatchedIndex.intersection(EventsMatchedIndex) #Intersection to get Respective Domain Employees with their Preferred events
        Recommendations['Recommended_Employees'][i] = pandas_to_string(Employee_Preference_Data['Name'][RecommendationIndex])  

In [149]:
Recommendations

Unnamed: 0,input,Recommended_Employees
0,Get a System Administration certification from PurpleHat today.,"Joshua Miller, Virginia Lucas"
1,Lockdown special courses on Ydemi. 22 hours left!,"David Foster, Jennifer Merritt, Robert Ramirez"
2,CodeBoost codeathon is live now!,
3,Attend the Computer Vision and Deep Learning Summit tomorrow,"David Davis, Philip Woods, Jessica Lee, Rebecca Kelley, Melissa Diaz"
4,AI & Big Data Expo in 36 hours,Jessica Terrell
5,Book Online Tickets for workshops on C programming,"Melinda West, Christopher Hernandez, Allison Robinson, Beverly Hayes, Teresa Riley, Gina Diaz, Damon Lane"
6,Spoonshot is hiring a Work From Home - Data Science Internship for 6 months in Bengaluru,"Adam Richards, Michael Smith, Daniel Johnson"
7,Java Training in Electronic city Bangalore,"Russell Vargas, Corey Beck, Edward Hampton, James Raymond, Melissa Diaz"
8,Ted Talk in the field of Security,"Kevin Robbins, Ernest Pierce, Marissa Dawson"
9,Arrange a Machine Learning Workshop and Internship,"Dustin Ferguson, David Davis, Philip Woods, Christian Odom, Brian Patterson, Matthew Martinez, Jessica Lee, Tanya Armstrong, Jasmine Brown, Rebecca Kelley, Sharon Mitchell, Stacy Jones, Melissa Diaz"


### Exporting Recommendations to Spreadsheet(xls)

In [150]:
Recommendations.to_excel('output.xls', index=False)