In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162996 sha256=4b666f2a6cb21395283ac9233c96ec50ae3717cd2433827ded10edb31133016d
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


**Upload and Load the CSV Data**

In [None]:
# Load the data from the CSV file
import pandas as pd

file_path = '/content/drive/MyDrive/Colab Notebooks/data.csv'
df = pd.read_csv(file_path)

# Clean the column names
df.columns = df.columns.str.strip()

# Print the columns of the DataFrame to verify the column names
print("Columns in the CSV file:", df.columns)

# Display the first few rows of the DataFrame
df.head()

Columns in the CSV file: Index(['job_id', 'job_description', 'student_id', 'student_skills'], dtype='object')


Unnamed: 0,job_id,job_description,student_id,student_skills
0,1,Supply chain manager overseeing logistics,1,Online Degrees Certificates Credentialing
1,2,inventory control and vendor relationships,2,Technical Support Help Desk Troubleshooting
2,3,crisis communications and brand reputation,3,Fintech Payment Systems Blockchain Technology
3,4,improving efficiency and managing teams,4,BI Tools Tableau Power BI Data Mining
4,5,Machine learning engineer with experience in T...,5,Corporate Communications Internal Communicatio...


**Prepare the Data for the Surprise Library**

In [None]:
from surprise import Dataset, Reader

# Create an interaction column (assuming each student interacted with the listed job)
df['interaction'] = 1

# Use the Surprise library's Reader class to parse the data
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(df[['student_id', 'job_id', 'interaction']], reader)

**Train the SVD Model**

In [None]:
from surprise import SVD
from surprise.model_selection import train_test_split

# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

# Train the SVD model
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7c6ddaf195d0>

**Generate Recommendations**

In [None]:
def get_recommendations(student_id, model, top_n=5):
    # Get a list of all job_ids
    job_ids = df['job_id'].unique()

    # Predict the interaction score for each job_id
    predictions = [model.predict(student_id, job_id) for job_id in job_ids]

    # Sort the predictions by the estimated interaction score
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Get the top N predictions
    top_predictions = predictions[:top_n]

    # Extract job_ids and scores
    top_job_ids = [pred.iid for pred in top_predictions]
    top_scores = [pred.est for pred in top_predictions]

    # Get the job descriptions for the top job_ids
    top_descriptions = df[df['job_id'].isin(top_job_ids)]['job_description'].tolist()

    return pd.DataFrame({'job_id': top_job_ids, 'job_description': top_descriptions, 'score': top_scores})
# Get recommendations for student_id 1
recommendations = get_recommendations(student_id=student_id, model=model, top_n=5)
print(recommendations)

   job_id                                    job_description  score
0       1          Supply chain manager overseeing logistics      1
1       4            improving efficiency and managing teams      1
2       5  Machine learning engineer with experience in T...      1
3       9          Operations manager streamlining processes      1
4      10  Public relations specialist handling media rel...      1


**Evaluate the Model**


In [None]:
from surprise import accuracy

# Predict on the test set
predictions = model.test(testset)

# Compute and print the RMSE (Root Mean Squared Error)
rmse = accuracy.rmse(predictions)
print(f'RMSE: {rmse}')


RMSE: 0.0000
RMSE: 0.0


**Recommend Jobs for a New Student**

In [None]:
def recommend_for_new_student(new_student_id, new_student_skills):
    # Append the new student's data to the original dataframe
    new_student_data = {
        'job_id': df['job_id'].tolist(),
        'job_description': df['job_description'].tolist(),
        'student_id': [new_student_id] * len(df['job_id']),
        'student_skills': [new_student_skills] * len(df['job_id']),
        'interaction': [1] * len(df['job_id'])  # Assuming interaction is 1
    }

    df_new_student = pd.DataFrame(new_student_data)
    df_combined = pd.concat([df, df_new_student], ignore_index=True)

    # Use the Surprise library's Reader class to parse the combined data
    data_combined = Dataset.load_from_df(df_combined[['student_id', 'job_id', 'interaction']], reader)

    # Train a new SVD model on the combined data
    trainset_combined = data_combined.build_full_trainset()
    model_combined = SVD()
    model_combined.fit(trainset_combined)

    # Generate recommendations for the new student
    recommendations = get_recommendations(new_student_id, model_combined, top_n=5)
    return recommendations

# Take student ID and skills as input from the user
new_student_id = int(input("Enter new student ID: "))
new_student_skills = input("Enter new student skills: ")

# Get recommendations for the new student
new_recommendations = recommend_for_new_student(new_student_id, new_student_skills)
print(new_recommendations)


Enter new student ID: 2345
Enter new student skills: sql
   job_id                                    job_description  score
0       2         inventory control and vendor relationships      1
1       5  Machine learning engineer with experience in T...      1
2       7     IT support specialist troubleshooting hardware      1
3      13  Human resources specialist focused on recruitment      1
4      14                   JavaScript and responsive design      1
