# Contributor Selection
Here, we are using the bio + about data gathered from LinkedIn to determine which contributor is suited for a particular task. For this we provide user profiles and keywords, and based on them, we can find the ideal candidates for our specific task.

In [4]:
# Import libraries
import re
import string
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize,sent_tokenize
import nltk
nltk.download('punkt_tab')
nltk.download('wordnet')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

## Constants

In [5]:
# This contains the common contraction words in English language. And we are contracting them so that they can be subjected to clean if necessary.
CONTRACTION_MAP = {
"ain't": "is not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have",
"he'll": "he will",
"he'll've": "he he will have",
"he's": "he is",
"how'd": "how did",
"how'd'y": "how do you",
"how'll": "how will",
"how's": "how is",
"I'd": "I would",
"I'd've": "I would have",
"I'll": "I will",
"I'll've": "I will have",
"I'm": "I am",
"I've": "I have",
"i'd": "i would",
"i'd've": "i would have",
"i'll": "i will",
"i'll've": "i will have",
"i'm": "i am",
"i've": "i have",
"isn't": "is not",
"it'd": "it would",
"it'd've": "it would have",
"it'll": "it will",
"it'll've": "it will have",
"it's": "it is",
"let's": "let us",
"ma'am": "madam",
"mayn't": "may not",
"might've": "might have",
"mightn't": "might not",
"mightn't've": "might not have",
"must've": "must have",
"mustn't": "must not",
"mustn't've": "must not have",
"needn't": "need not",
"needn't've": "need not have",
"o'clock": "of the clock",
"oughtn't": "ought not",
"oughtn't've": "ought not have",
"shan't": "shall not",
"sha'n't": "shall not",
"shan't've": "shall not have",
"she'd": "she would",
"she'd've": "she would have",
"she'll": "she will",
"she'll've": "she will have",
"she's": "she is",
"should've": "should have",
"shouldn't": "should not",
"shouldn't've": "should not have",
"so've": "so have",
"so's": "so as",
"that'd": "that would",
"that'd've": "that would have",
"that's": "that is",
"there'd": "there would",
"there'd've": "there would have",
"there's": "there is",
"they'd": "they would",
"they'd've": "they would have",
"they'll": "they will",
"they'll've": "they will have",
"they're": "they are",
"they've": "they have",
"to've": "to have",
"wasn't": "was not",
"we'd": "we would",
"we'd've": "we would have",
"we'll": "we will",
"we'll've": "we will have",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"what'll": "what will",
"what'll've": "what will have",
"what're": "what are",
"what's": "what is",
"what've": "what have",
"when's": "when is",
"when've": "when have",
"where'd": "where did",
"where's": "where is",
"where've": "where have",
"who'll": "who will",
"who'll've": "who will have",
"who's": "who is",
"who've": "who have",
"why's": "why is",
"why've": "why have",
"will've": "will have",
"won't": "will not",
"won't've": "will not have",
"would've": "would have",
"wouldn't": "would not",
"wouldn't've": "would not have",
"y'all": "you all",
"y'all'd": "you all would",
"y'all'd've": "you all would have",
"y'all're": "you all are",
"y'all've": "you all have",
"you'd": "you would",
"you'd've": "you would have",
"you'll": "you will",
"you'll've": "you will have",
"you're": "you are",
"you've": "you have"
}

In [6]:
ONE_TWO_LETTER_WORDS = ['ai', 'c#', 'r', 'go', 'c']
OMIT_PUNCTUATION_LIST = ['#']

## Load and Clean the Dataset
Load the initial dataset and remove the unwanted columns. Then save it on a different file, so that we do not have to clean it every time.

In [8]:
# Load and clean dataset
csv_file_path = './data/linkedin_profile_dataset_1.csv'
csv_data = pd.read_csv(csv_file_path, delimiter=';')
csv_data.head(10)

Unnamed: 0,Full Name,First Name,Last Name,Position,Company,University,Profile,About,Bio,Unnamed: 9,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,Dilusha Amarasekara,Dilusha,Amarasekara,Software Engineer,IFS,University of Moratuwa,https://www.linkedin.com/in/diluchamo/,I'm a recent graduate with a BSc (Hons) in Inf...,Software Engineer at IFS R&D International (Pv...,,...,,,,,,,,,,
1,Anuradha Basnayake,Anuradha,Basnayake,Associate Software Engineer,Velou,University of Colombo School of Computing,https://www.linkedin.com/in/anuradha-basnayake/,Serial learner with a love for building high q...,Associate Software Engineer @ Velou | CS Gradu...,,...,,,,,,,,,,
2,Yasitha Rukshan,Yasitha,Rukshan,Software Engineer,Intervest Software Technologies (Private) Limited,University of Ruhuna,https://www.linkedin.com/in/yasitha-rukshan-75...,I am a Software Engineer at Intervest Software...,Software Engineer at Intervest | Full Stack | ...,,...,,,,,,,,,,
3,Sasindu Dulanjana,Sasindu,Dulanjana,Software Engineer,EY,University of Greenwich,https://www.linkedin.com/in/sasindu-dulanjana-...,After graduations(Bsc in Information Technolog...,Software Engineer,,...,,,,,,,,,,
4,Thilina Ranaweera,Thilina,Ranaweera,Technical Lead,Wiley,SLIIT,https://www.linkedin.com/in/thilina-ranaweera-...,Experienced Capital Market Specialist with a d...,Tech Lead at Wiley,,...,,,,,,,,,,
5,Raveen Dissanayaka,Raveen,Dissanayaka,Senior Software Engineer,IFS,University of Westminster,https://www.linkedin.com/in/raveen-dissanayaka...,🚀 Senior Software Engineer at IFS | Poka Produ...,Senior Software Engineer | Python | Java | AWS...,,...,,,,,,,,,,
6,Venuri Hettiarachchi,Venuri,Hettiarachchi,Software Engineer,Axiata Digital Labs,SLIIT,https://www.linkedin.com/in/venuri-hettiarachchi/,I'm a Software Engineer with 3+ years of exper...,Software Engineer @ ADL | Full Stack Developer...,,...,,,,,,,,,,
7,Paheerathan Selvarasa,Paheerathan,Selvarasa,Software Engineer,Wiley,,https://www.linkedin.com/in/paheerathan-selvar...,"A Software Engineer, Salesforce Certified with...",Software Engineer | 5x Salesforce Certified,,...,,,,,,,,,,
8,Madushan Abeyrathna,Madushan,Abeyrathna,"Technical Specialist, Software Engineering",Wiley,SLIIT,https://www.linkedin.com/in/chamara-abeyrathna/,Backend developer with more than six years of ...,"Technical Specialist, Software Engineering at ...",,...,,,,,,,,,,
9,Sulanjala Fonseka,Sulanjala,Fonseka,Software Engineer,Wiley,,https://www.linkedin.com/in/sdickshan/,I am an experienced full-stack software engine...,Software Engineer at Wiley | AWS CQCP,,...,,,,,,,,,,


In [9]:
csv_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1615 entries, 0 to 1614
Data columns (total 26 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Full Name    621 non-null    object 
 1   First Name   621 non-null    object 
 2   Last Name    621 non-null    object 
 3   Position     610 non-null    object 
 4   Company      596 non-null    object 
 5   University   540 non-null    object 
 6   Profile      621 non-null    object 
 7   About        617 non-null    object 
 8   Bio          387 non-null    object 
 9   Unnamed: 9   234 non-null    object 
 10  Unnamed: 10  0 non-null      float64
 11  Unnamed: 11  0 non-null      float64
 12  Unnamed: 12  0 non-null      float64
 13  Unnamed: 13  0 non-null      float64
 14  Unnamed: 14  0 non-null      float64
 15  Unnamed: 15  0 non-null      float64
 16  Unnamed: 16  0 non-null      float64
 17  Unnamed: 17  0 non-null      float64
 18  Unnamed: 18  0 non-null      float64
 19  Unname

In [10]:
# Retain relevant columns and remove unwanted columns
relevant_columns = ['Full Name', 'Position', 'Company', 'University', 'About', 'Bio']
cleaned_data = csv_data[relevant_columns].dropna().reset_index(drop=True)
cleaned_data.head(10)

Unnamed: 0,Full Name,Position,Company,University,About,Bio
0,Dilusha Amarasekara,Software Engineer,IFS,University of Moratuwa,I'm a recent graduate with a BSc (Hons) in Inf...,Software Engineer at IFS R&D International (Pv...
1,Anuradha Basnayake,Associate Software Engineer,Velou,University of Colombo School of Computing,Serial learner with a love for building high q...,Associate Software Engineer @ Velou | CS Gradu...
2,Yasitha Rukshan,Software Engineer,Intervest Software Technologies (Private) Limited,University of Ruhuna,I am a Software Engineer at Intervest Software...,Software Engineer at Intervest | Full Stack | ...
3,Sasindu Dulanjana,Software Engineer,EY,University of Greenwich,After graduations(Bsc in Information Technolog...,Software Engineer
4,Thilina Ranaweera,Technical Lead,Wiley,SLIIT,Experienced Capital Market Specialist with a d...,Tech Lead at Wiley
5,Raveen Dissanayaka,Senior Software Engineer,IFS,University of Westminster,🚀 Senior Software Engineer at IFS | Poka Produ...,Senior Software Engineer | Python | Java | AWS...
6,Venuri Hettiarachchi,Software Engineer,Axiata Digital Labs,SLIIT,I'm a Software Engineer with 3+ years of exper...,Software Engineer @ ADL | Full Stack Developer...
7,Madushan Abeyrathna,"Technical Specialist, Software Engineering",Wiley,SLIIT,Backend developer with more than six years of ...,"Technical Specialist, Software Engineering at ..."
8,Thilani Abeysooriya,Senior Software Engineering Manager,Wiley,University of Colombo,Technical Leader with 16+ years of experience ...,Senior Software Engineering Manager
9,Samitha Kolambage,Senior Software Engineer (Automation/SQE),Wiley Sri Lanka,SLIIT,6+ Years of experience in Software Quality Eng...,Senior Software Engineer at Wiley (Automation/...


In [14]:
# Save the above dataset
cleaned_file_path = './data/cleaned_linkedin_profiles.csv'
cleaned_data.to_csv(cleaned_file_path, index=False)

## Load the Cleaned Dataset (which does not have unwanted columns)
Load the cleaned dataset and subject it in to NLP data cleaning techniques so that we get the unique words. But the issue with this is that there can be certain words that have one or two letters or with punctuation (e.g., R, AI, C#). They will be removed due to our cleaning pipeline. If those are needed, we need to specifically say that those should not be removed.

In [15]:
# Load the data from cleaned_file_path
cleaned_data = pd.read_csv(cleaned_file_path)
cleaned_data.head(10)

Unnamed: 0,Full Name,Position,Company,University,About,Bio
0,Dilusha Amarasekara,Software Engineer,IFS,University of Moratuwa,I'm a recent graduate with a BSc (Hons) in Inf...,Software Engineer at IFS R&D International (Pv...
1,Anuradha Basnayake,Associate Software Engineer,Velou,University of Colombo School of Computing,Serial learner with a love for building high q...,Associate Software Engineer @ Velou | CS Gradu...
2,Yasitha Rukshan,Software Engineer,Intervest Software Technologies (Private) Limited,University of Ruhuna,I am a Software Engineer at Intervest Software...,Software Engineer at Intervest | Full Stack | ...
3,Sasindu Dulanjana,Software Engineer,EY,University of Greenwich,After graduations(Bsc in Information Technolog...,Software Engineer
4,Thilina Ranaweera,Technical Lead,Wiley,SLIIT,Experienced Capital Market Specialist with a d...,Tech Lead at Wiley
5,Raveen Dissanayaka,Senior Software Engineer,IFS,University of Westminster,🚀 Senior Software Engineer at IFS | Poka Produ...,Senior Software Engineer | Python | Java | AWS...
6,Venuri Hettiarachchi,Software Engineer,Axiata Digital Labs,SLIIT,I'm a Software Engineer with 3+ years of exper...,Software Engineer @ ADL | Full Stack Developer...
7,Madushan Abeyrathna,"Technical Specialist, Software Engineering",Wiley,SLIIT,Backend developer with more than six years of ...,"Technical Specialist, Software Engineering at ..."
8,Thilani Abeysooriya,Senior Software Engineering Manager,Wiley,University of Colombo,Technical Leader with 16+ years of experience ...,Senior Software Engineering Manager
9,Samitha Kolambage,Senior Software Engineer (Automation/SQE),Wiley Sri Lanka,SLIIT,6+ Years of experience in Software Quality Eng...,Senior Software Engineer at Wiley (Automation/...


In [16]:
cleaned_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 334 entries, 0 to 333
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Full Name   334 non-null    object
 1   Position    334 non-null    object
 2   Company     334 non-null    object
 3   University  334 non-null    object
 4   About       334 non-null    object
 5   Bio         334 non-null    object
dtypes: object(6)
memory usage: 15.8+ KB


In [17]:
# Remove duplicates
cleaned_data.drop_duplicates(subset=['Full Name', 'Position', 'Company', 'University', 'About', 'Bio'], keep=False, inplace=True)

In [18]:
# Combine 'About' and 'Bio' columns for feature extraction
cleaned_data['Combined'] = cleaned_data['About'] + ' ' + cleaned_data['Bio']

In [19]:
# Search for contractions in the provided text(body) and expand if found.
def expand_contractions(text):
    words = text.split()
    word_list = []
    for word in words:
        _word = word.lower()
        if _word in CONTRACTION_MAP:
            word_list.append(CONTRACTION_MAP[_word])
        else:
            word_list.append(word)
    return ' '.join(word_list)

In [20]:
# Remove punctations
def remove_punctuations(text):
    char_list = []
    for char in text:
        if char in string.punctuation:
            if char in OMIT_PUNCTUATION_LIST:
                char_list.append(char)
        else:
            char_list.append(char)
    text = ''.join(char_list)
    return text

In [21]:
# Remove stop words
# This is only to preprocess the dataset. In a later step, we need to run this again after lemmatizing.
def remove_stop_words(text):
    stopwords = nltk.corpus.stopwords.words('english') # NLTK stop-words list, since Sinhala language is not there we will be using English as the default value.
    other_words = [line.rstrip('\n') for line in open('stopwords.txt')] # Collected stop words in a text file.
    words = text.split()
    word_list = []
    for word in words:
        _word = word.lower()
        if _word in stopwords or _word in other_words:
            continue
        else:
            word_list.append(word)
    return ' '.join(word_list)

In [22]:
# Remove words containing 2 letters or more that 21 letters
def remove_two_letter_words(text):
    words = text.split()
    word_list = []
    for word in words:
        word = word.casefold()
        if len(word) >= 21:
            continue
        if len(word) <= 2:
            if word not in ONE_TWO_LETTER_WORDS:
                continue
            else:
                word_list.append(word)
        else:
            word_list.append(word)
    return ' '.join(word_list)

In [23]:
# Initialize the lemmatizer
# https://www.ibm.com/topics/stemming-lemmatization
# https://www.nltk.org/howto/stem.html
lemmatizer = WordNetLemmatizer()

In [24]:
# This is used to remove special characters after tokenization based on unicodes
def remove_characters_after_tokenization(elem):
    # Common punctuations
    if (elem >= u"\u0020") and (elem <= u"\u002F"):
        if (elem != u"\u0023"): # Do not remove # symbol
            return False
        else:
            return True
    elif (elem >= u"\u003A") and (elem <= u"\u0040"):
        return False
    elif (elem >= u"\u005B") and (elem <= u"\u0060"):
        return False
    elif (elem >= u"\u007B") and (elem <= u"\u007E"):
        return False
    else:
        return True

In [25]:
# Tokenize the text to get the lemmatized words
def tokenize_text(text):
    sentences = sent_tokenize(text)
    wordList = []
    for sentence in sentences:
        wordList += word_tokenize(sentence)
    wordList = filter(remove_characters_after_tokenization, wordList)
    return list(wordList)

In [26]:
# Function to clean text: remove punctuations, emojis, and convert to lowercase
def clean_text(text):
    # Remove emojis
    emoji_pattern = re.compile("["
                                u"\U0001F600-\U0001F64F"  # emoticons
                                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                                u"\U0001F680-\U0001F6FF"  # transport & map symbols
                                u"\U0001F1E0-\U0001F1FF"  # flags
                                u"\U00002702-\U000027B0"  # other symbols
                                u"\U000024C2-\U0001F251"  # enclosed characters
                                "]+", flags=re.UNICODE)
    text = emoji_pattern.sub(r'', text)  # Remove emojis
    text = expand_contractions(text)
    text = text.replace(u'\ufffd', '8')   # Replaces the ASCII '�' symbol with '8'
    text = text.replace(',', '')          # Removes commas
    text = text.rstrip('\n')              # Removes line breaks
    text = text.casefold()                # Makes all letters lowercase
    text = re.sub("\S*@\S*\s?"," ", text) # Removes emails and mentions (words with @)
    text = re.sub(r'http\S+', '', text)   # Removes URLs with http
    text = re.sub(r'www\S+', '', text)    # Removes URLs with www
    text = remove_punctuations(text)      # Removes punctuations
    text = re.sub("\S*\d\S*"," ", text)   # Removes numbers and words concatenated with numbers i.e. tw33t. Removes road names such as AR-111.
    text = remove_two_letter_words(text)  # Remove words containing 2 letters or more than 21 letters
    # Remove punctuations and convert to lowercase
    # text = re.sub(r'[^\w\s]', '', text).lower()
    tokens = tokenize_text(text)
    lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in tokens])
    return lemmatized_text

  text = re.sub("\S*@\S*\s?"," ", text) # Removes emails and mentions (words with @)
  text = re.sub("\S*\d\S*"," ", text)   # Removes numbers and words concatenated with numbers i.e. tw33t. Removes road names such as AR-111.


In [28]:
# Apply the cleaning function
cleaned_data['Combined'] = cleaned_data['Combined'].apply(clean_text)
cleaned_data['Combined']

0      recent graduate with bsc hons information tech...
1      serial learner with love for building high qua...
2      software engineer intervest software technolog...
3      after graduationsbsc information technology un...
4      experienced capital market specialist with dem...
                             ...                        
329    mathematician and research software engineer w...
330    enthusiastic and experienced software develope...
331    software engineer with interest data science m...
332    data specialist with experience deep learning ...
333    python r matlab good software engineering prac...
Name: Combined, Length: 310, dtype: object

## Save the Cleaned Dataset for Machine Learning Model

In [29]:
# Save the above dataset
cleaned_file_path_1 = './data/cleaned_linkedin_profiles_1.csv'
cleaned_data.to_csv(cleaned_file_path_1, index=False)

In [30]:
# Load the data from cleaned_file_path
cleaned_data = pd.read_csv(cleaned_file_path_1)
cleaned_data.head(10)

Unnamed: 0,Full Name,Position,Company,University,About,Bio,Combined
0,Dilusha Amarasekara,Software Engineer,IFS,University of Moratuwa,I'm a recent graduate with a BSc (Hons) in Inf...,Software Engineer at IFS R&D International (Pv...,recent graduate with bsc hons information tech...
1,Anuradha Basnayake,Associate Software Engineer,Velou,University of Colombo School of Computing,Serial learner with a love for building high q...,Associate Software Engineer @ Velou | CS Gradu...,serial learner with love for building high qua...
2,Yasitha Rukshan,Software Engineer,Intervest Software Technologies (Private) Limited,University of Ruhuna,I am a Software Engineer at Intervest Software...,Software Engineer at Intervest | Full Stack | ...,software engineer intervest software technolog...
3,Sasindu Dulanjana,Software Engineer,EY,University of Greenwich,After graduations(Bsc in Information Technolog...,Software Engineer,after graduationsbsc information technology un...
4,Thilina Ranaweera,Technical Lead,Wiley,SLIIT,Experienced Capital Market Specialist with a d...,Tech Lead at Wiley,experienced capital market specialist with dem...
5,Raveen Dissanayaka,Senior Software Engineer,IFS,University of Westminster,🚀 Senior Software Engineer at IFS | Poka Produ...,Senior Software Engineer | Python | Java | AWS...,senior software engineer ifs poka product deve...
6,Venuri Hettiarachchi,Software Engineer,Axiata Digital Labs,SLIIT,I'm a Software Engineer with 3+ years of exper...,Software Engineer @ ADL | Full Stack Developer...,software engineer with year experience full st...
7,Madushan Abeyrathna,"Technical Specialist, Software Engineering",Wiley,SLIIT,Backend developer with more than six years of ...,"Technical Specialist, Software Engineering at ...",backend developer with more than six year indu...
8,Thilani Abeysooriya,Senior Software Engineering Manager,Wiley,University of Colombo,Technical Leader with 16+ years of experience ...,Senior Software Engineering Manager,technical leader with year experience software...
9,Samitha Kolambage,Senior Software Engineer (Automation/SQE),Wiley Sri Lanka,SLIIT,6+ Years of experience in Software Quality Eng...,Senior Software Engineer at Wiley (Automation/...,year experience software quality engineering w...


## Implement Machine Learning Model

In [31]:
# Vectorize the text data using TF-IDF
# https://kavita-ganesan.com/tfidftransformer-tfidfvectorizer-usage-differences/
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X = vectorizer.fit_transform(cleaned_data['Combined']).toarray()

In [32]:
# Encode target labels (Position)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(cleaned_data['Position'])

In [33]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
# Train a RandomForestClassifier / SVM / LogisticRegressionaccuracy
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [35]:
# Evaluate the model
# 75% - 90%
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 30.65%


In [53]:
# Save to .h5 model

## Example Usage

In [37]:
# Function to predict top 3 candidates for given keywords
def recommend_candidates(profiles, keywords):
    # Combine profiles and keywords into a feature vector
    profile_text = [p['About'] + ' ' + p['Bio'] for p in profiles]
    keywords_combined = ' '.join(keywords)
    profile_text.append(keywords_combined)

    # Preprocess the text
    processed_text = [
        text.replace(r'[^\w\s]', '').lower() for text in profile_text
    ]

    # Transform using the trained vectorizer
    features = vectorizer.transform(processed_text).toarray()

    # Predict for each profile
    profile_predictions = model.predict(features[:-1])
    keyword_prediction = model.predict(features[-1:])[0]
    print(profile_predictions)
    print(keyword_prediction)

    # Rank profiles based on similarity to the keyword prediction
    recommendations = [
        (profiles[i]['Full Name'], label_encoder.inverse_transform([profile_predictions[i]])[0])
        for i in range(len(profiles))
        if (profile_predictions[i] >= keyword_prediction - 25) or (profile_predictions[i] <= keyword_prediction + 25)  # Alter this value to have profiles that are similar to the given keywords []
    ]
    return recommendations[:3]  # Return the top 3 candidates

In [47]:
# Example usage
profiles = [
    #{'Full Name': 'John Doe', 'About': 'Experienced in Python and Java', 'Bio': 'Software Developer with 5 years of experience.'},
   # {'Full Name': 'Jane Smith', 'About': 'Expert in machine learning', 'Bio': 'Data Scientist with a passion for AI.'},
    {'Full Name': 'Emily Brown', 'About': 'Full-stack developer', 'Bio': 'Specializes in React and Node.js.'},
    {'Full Name': 'Michael Johnson', 'About': 'Backend development expertise', 'Bio': 'Java and Spring Boot specialist.'},
    {'Full Name': 'Sarah Lee', 'About': 'Frontend developer', 'Bio': 'Angular and Vue.js experience.'}
]

keywords = ['machine learning', 'AI', 'data analysis']
recommendations = recommend_candidates(profiles, keywords)

print("Top 3 Recommendations:")
for name, position in recommendations:
    print(f"{name} - {position}")

[102 123   9]
86
Top 3 Recommendations:
Emily Brown - Software Engineer
Michael Johnson - Technical Lead
Sarah Lee - Associate Software Engineer
