### Importing libraries  

In [1]:
import numpy as np
import pandas as pd
import csv
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# reading csv file
data = pd.read_csv("CCMLEmployeeData.csv")          

In [3]:
type(data)

pandas.core.frame.DataFrame

In [4]:
#checking format of data from file
data.head()                      

Unnamed: 0,Name,Domain,Event1,Event2
0,Bryan Brock,Management,Jobs,Hackathons
1,Joseph Sullivan,Coding,Certifications,Webinars
2,Sherri Dawson,Security,Internships,Fests
3,Dustin Ferguson,Hardware,Competitions,Webinars
4,Kayla Young,Web Development,Expos,Certifications


## Using bag of words method for finding domains 

In [5]:
#making list of all domains in dataset
domains = list(set(data['Domain']))

In [6]:
type(domains)

list

In [7]:
domains

['Machine Learning',
 'Higher Education',
 'Python',
 'Other',
 'Hardware',
 'Coding',
 'Data Science',
 'Networking',
 'Security',
 'Java',
 'Web Development',
 'Cloud Computing',
 'IoT',
 'Software Architecture',
 'Development Processes',
 'Blockchain',
 'Artificial Intelligence',
 'C',
 'JavaScript',
 'Finance',
 'Mobile Applications',
 'Management',
 'C++']

### Similar to domain implementation for events for event1 and event2 combinely

In [8]:
events = list(set(data['Event1']).union(set(data['Event2'])))

In [9]:
type(events)

list

In [10]:
events

['Jobs',
 'Expos',
 'Talks',
 'Hackathons',
 'Competitions',
 'Seminars',
 'Fests',
 'Certifications',
 'Courses',
 'Internships',
 'Workshops',
 'Trainings',
 'Webinars']

#### Domain model

In [11]:
#function for checking domain in input exists or not

def domain_match(s1, s2):
    if(s1 == s2):
        return True
    else:
        return False

def get_domain_feature(sentence):
    
    feature = np.zeros(len(domains))
    words = sentence.split()
    for word in words:
        for i, ft in enumerate(domains):
            if(domain_match(ft.split()[0].lower(), word.lower())):
                feature[i] = feature[i] + 1
                break
    return feature.reshape(1, -1)

In [12]:
f = get_domain_feature("Certification program in Python and IoT and software architecture")

In [13]:
f

array([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.]])

In [14]:
f.shape

(1, 23)

In [15]:
#creating sample training data
dataset = np.zeros((len(domains), len(domains) + 1))
for i in range(len(domains)):
    dataset[i][i] = 1
    dataset[i][len(domains)] = i

In [16]:
dataset.shape

(23, 24)

In [17]:
x = dataset[:, :-1]

In [18]:
x.shape

(23, 23)

In [19]:
y = dataset[:, -1].reshape(-1, 1)

In [20]:
y.shape

(23, 1)

In [21]:
LR = LogisticRegression()
LR.fit(x, y)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [22]:
domains[int(LR.predict(get_domain_feature("Machine Learning Certification"))[0])]

'Machine Learning'

#### Event Model

In [23]:
#function to check particular event in input exists in dataset or not

def event_match(s1, s2):
    if(s1 == s2):
        return True
    else:
        return False

def get_event_feature(sentence):
    feature = np.zeros(len(events))
    words = sentence.split()
    for word in words:
        for i, ft in enumerate(events):
            if(event_match(ft, word)):
                feature[i] = feature[i] + 1
                break
    return feature.reshape(1, -1)

In [24]:
#creating sample training data
ev_dataset = np.zeros((len(events), len(events) + 1))
for i in range(len(events)):
    ev_dataset[i][i] = 1
    ev_dataset[i][len(events)] = i

In [25]:
ev_dataset.shape

(13, 14)

In [26]:
event_x = ev_dataset[:, :-1]

In [27]:
event_x.shape

(13, 13)

In [28]:
event_y = ev_dataset[:, -1].reshape(-1, 1)

In [29]:
event_y.shape

(13, 1)

In [30]:
event_LR = LogisticRegression()
event_LR.fit(event_x, event_y)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [31]:
events[int(event_LR.predict(get_event_feature("Internships on Machine Learning and Iot"))[0])]

'Internships'

#### Iterate through the employee dataset to output the names

In [32]:
event_name = "Internships in IoT and Machine learning"

In [33]:
def get_recommendation(event_name):
    emp = list()
    domain = domains[int(LR.predict(get_domain_feature(event_name))[0])]
    event = events[int(event_LR.predict(get_event_feature(event_name))[0])]
    print(domain,event)

    n = data.shape[0]
    #print(n)

    for i in range(n):
        if(data.iloc[i, 1] == domain and (data.iloc[i, 2] == event or data.iloc[i, 3] == event)):
            emp.append(data.iloc[i, 0])
    return emp

### Iterate through input csv file for getting output and apply get_recommendation method

In [34]:
db = pd.read_csv('Input.csv')

db['employee'] = db['sentence'].apply(get_recommendation)

Python Webinars
IoT Certifications
Machine Learning Webinars
Software Architecture Webinars
C++ Jobs
Networking Certifications
Data Science Webinars
Finance Webinars
Coding Webinars


In [35]:
db.head(5)

Unnamed: 0,sentence,employee
0,Python internships available!!,"[Michael Alvarado, Carol Larson]"
1,Iot Certifications available,[Curtis Ortega]
2,Machine Learning internship programme starting...,[Christian Odom]
3,Learn software architecture and get certificate,[]
4,Jobs are available for C++ language students,"[Michael Parrish, Gail Duran]"


In [36]:
#file = open('employeeData.csv','w')

In [37]:
#csv_writer = csv.writer(file)

In [38]:
#csv_writer.writerows('employee')
print(db.size)

18


### Store output in CSV file as output file with two columns as input sentence and recommended employees 

In [39]:

with open('Output.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Sentence", "Employee"])
    for i in range(db.size):
        try:
            writer.writerow([str(db['sentence'][i]),str(db['employee'][i])])
        except:
            continue
    file.close()

### Reading output file to confirm data stored successfully ..... 

In [40]:
file = open('Output.csv','r')
print('Printing data from CSV file made as an output file.........')
print('')
for i,row in enumerate(file):
    r = row.split(',')
    print(str((i+1))+') Sentence:-',r[0],'\n','Employees:-',r[1:])
print('')
print('Whole Data printed........')
file.close()

Printing data from CSV file made as an output file.........

1) Sentence:- Sentence 
 Employees:- ['Employee\n']
2) Sentence:- Python internships available!! 
 Employees:- ['"[\'Michael Alvarado\'', ' \'Carol Larson\']"\n']
3) Sentence:- Iot Certifications available 
 Employees:- ["['Curtis Ortega']\n"]
4) Sentence:- Machine Learning internship programme starting from next week 
 Employees:- ["['Christian Odom']\n"]
5) Sentence:- Learn software architecture and get certificate 
 Employees:- ['[]\n']
6) Sentence:- Jobs are available for C++ language students 
 Employees:- ['"[\'Michael Parrish\'', ' \'Gail Duran\']"\n']
7) Sentence:- Certifications available in networking in 5000 only 
 Employees:- ['"[\'Cassandra Meza\'', " 'Valerie Snow'", ' \'Wyatt Cruz\']"\n']
8) Sentence:- Participate for Data science hackathons 
 Employees:- ['[]\n']
9) Sentence:- Finance fests are arranged for students 
 Employees:- ['[]\n']
10) Sentence:- Attend coding talks to improve your knowledge 
 Employees