In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load the dataset
freelancers_data = {
    'freelancer_id': [1, 2, 3, 4, 5],
    'name': ['Alice Johnson', 'Bob Smith', 'Charlie Lee', 'Diana Davis', 'Evan Brown'],
    'skills': [
        'Python|Machine Learning|Data Science',
        'Web Development|HTML|CSS|JavaScript',
        'Graphic Design|Adobe Photoshop|Illustrator',
        'Python|Data Analysis|Pandas',
        'Content Writing|SEO|Copywriting'
    ],
    'experience': [
        '3 years',
        '2 years',
        '5 years',
        '4 years',
        '3 years'
    ]
}

jobs_data = {
    'job_id': [101, 102, 103, 104, 105],
    'required_skills': [
        'Python|Machine Learning',
        'Web Development|JavaScript',
        'Graphic Design|Illustrator',
        'Data Analysis|Python',
        'Content Writing|SEO'
    ],
    'job_description': [
        'Looking for a Python developer with expertise in machine learning to work on data-driven projects.',
        'Hiring a web developer with experience in JavaScript for a dynamic web application.',
        'Seeking a graphic designer skilled in Illustrator for a logo and branding project.',
        'In need of a data analyst proficient in Python and data manipulation for reporting tasks.',
        'Looking for a content writer with strong SEO skills to improve blog visibility.'
    ]
}

freelancers_df = pd.DataFrame(freelancers_data)
jobs_df = pd.DataFrame(jobs_data)

# Combine skills and experience for freelancers into a single text field
freelancers_df['profile'] = freelancers_df['skills'] + ' ' + freelancers_df['experience']

# Use TF-IDF Vectorizer to vectorize the freelancer profiles and job required skills
tfidf_vectorizer = TfidfVectorizer()

# Fit the vectorizer on freelancers' profiles and job required skills
freelancer_profiles_tfidf = tfidf_vectorizer.fit_transform(freelancers_df['profile'])
job_requirements_tfidf = tfidf_vectorizer.transform(jobs_df['required_skills'])

# Calculate the cosine similarity between freelancers and jobs
similarity_matrix = cosine_similarity(freelancer_profiles_tfidf, job_requirements_tfidf)

# Save the model and data
with open('/content/similarity_matrix.pkl', 'wb') as f:
    pickle.dump(similarity_matrix, f)

with open('/content/tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf_vectorizer, f)

# Save the datasets
freelancers_df.to_csv('/content/freelancers.csv', index=False)
jobs_df.to_csv('/content/jobs.csv', index=False)



In [6]:
from google.colab import files
files.download('similarity_matrix.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>