In [23]:
import pandas as pd
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [24]:
data = pd.read_csv('skills_data.csv')

In [25]:
if 'skills' not in data.columns:
    raise KeyError("The data does not contain a 'skills' column")

# Check if the job_title column exists
if 'job_title' not in data.columns:
    raise KeyError("The data does not contain a 'job_title' column")

# Check if the data is empty
if data.empty:
    raise ValueError("The data is empty")

In [26]:
data['skills'] = data['skills'].apply(lambda x: str(x).split(','))

In [27]:
all_skills = []
for skills in data['skills']:
    all_skills.extend(skills)
unique_skills = list(set(all_skills))

In [28]:
matrix = []
for skills in data['skills']:
    skill_vector = [1 if skill in skills else 0 for skill in unique_skills]
    matrix.append(skill_vector)

In [29]:
df = pd.DataFrame(matrix, columns=unique_skills)

In [30]:
X_train, X_test, y_train, y_test = train_test_split(df, data['job_title'], train_size=0.8,random_state=1)

In [31]:
clf = RandomForestClassifier()

In [32]:
clf.fit(X_train, y_train)


RandomForestClassifier()

In [33]:
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy*100)

Accuracy: 11.11111111111111


In [34]:
predictions = clf.predict(X_test)
report = classification_report(data['job_title'][int(0.8*len(df)):], predictions)
print(report)

                                  precision    recall  f1-score   support

               Account Executive       0.00      0.00      0.00       1.0
Business Development Coordinator       0.00      0.00      0.00       1.0
                Content Marketer       0.00      0.00      0.00       1.0
               Financial Analyst       0.00      0.00      0.00       1.0
                Graphic Designer       0.00      0.00      0.00       0.0
     Human Resources Coordinator       0.00      0.00      0.00       1.0
         Human Resources Manager       0.00      0.00      0.00       0.0
           IT Support Technician       0.00      0.00      0.00       1.0
               Marketing Analyst       0.00      0.00      0.00       1.0
               Marketing Manager       0.00      0.00      0.00       0.0
    Public Relations Coordinator       0.00      0.00      0.00       1.0
                 Software Tester       0.00      0.00      0.00       0.0
               Technical Trainer     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [35]:
def recommend_skills(user_skills):
    # Convert the user's skills into a vector
    user_vector = [1 if skill in user_skills else 0 for skill in unique_skills]

    # Predict the most likely job for the user based on their skills
    job = clf.predict([user_vector])[0]

    # Find the skills for the predicted job that are not included in the input skills
    job_skills = data[data['job_title'] == job]['skills'].iloc[0]
    recommendations = [skill for skill in job_skills if skill not in user_skills]

    # Return the top 3 skills to learn
    return recommendations[:3]

In [36]:
recommendations = recommend_skills(['Figma', 'Adobe'])
print(recommendations) 

['QA', 'testing', 'automation']




In [37]:
import pickle

In [38]:
with open('model.pkl', 'wb') as file:
    pickle.dump(clf, file)

In [39]:
# Load the model from the file
with open('model.pkl', 'rb') as file:
    model = pickle.load(file)
    
user_skills = ['Python']
user_vector = [1 if skill in user_skills else 0 for skill in unique_skills]
# Make predictions with the model
job = model.predict([user_vector])[0]

# Sort the predictions by the likelihood of the user getting a job
job_skills = data[data['job_title'] == job]['skills'].iloc[0]
recommendations = [skill for skill in job_skills if skill not in user_skills]

    # Return the top 3 skills to learn
print(recommendations[:3])



['Java', 'C++', 'JavaScript']


