<a href="https://colab.research.google.com/github/Audreipeowpeow/Tinlendar/blob/main/ai_model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import pandas as pd

sub_category = {
    1: "Start-up",
    2: "Hackathon",
    3: "AP",
    4: "Music",
    5: "Animal",
    6: "Marketing Case",
    7: "AI",
    8: "WSDC",
    9: "Drawing",
    10: "Children",
    11: "Business Case",
    12: "Robot",
    13: "BP",
    14: "Writing",
    15: "Elderly",
}

user_sub_category = []
NUM_OF_USERS = 500
for i in range(NUM_OF_USERS):
    rating = {}
    random_slot = random.randint(10, 20)
    for j in range(random_slot):
        random_index = random.randint(1, 15)
        if random_index in rating:
            rating[random_index] += 1
        else:
            rating[random_index] = 1
    for idx, (k, v)  in enumerate(rating.items()):
      user_sub_category.append([f"{i + 1}", f"{k}", f"{round((v / random_slot) * 10) + 1}"])
df = pd.DataFrame(user_sub_category, columns=['customer_id', 'sub_category_id', 'rating'])
df

Unnamed: 0,customer_id,sub_category_id,rating
0,1,9,2
1,1,10,3
2,1,4,2
3,1,1,2
4,1,13,3
...,...,...,...
4820,500,15,2
4821,500,9,1
4822,500,10,1
4823,500,5,2


In [None]:
from collections import defaultdict
from surprise import Dataset, NormalPredictor, Reader, accuracy, SVD
from surprise.model_selection import train_test_split

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(1, 10))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[["customer_id", "sub_category_id", "rating"]], reader)

trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)

# Than predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

top_n = get_top_n(predictions, n=10)

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

1 ['12', '2', '6', '3', '15', '11', '7']
2 ['12', '5', '7', '6', '10']
3 ['5', '8', '15', '9', '12', '11']
4 ['1', '3', '15', '4']
5 ['2', '8', '4', '12', '11', '5']
6 ['12', '11', '2', '13']
7 ['15', '10', '5', '13', '9', '7']
8 ['4', '2', '6', '10', '11', '8']
9 ['11', '6', '7', '15', '13', '9', '4']
10 ['14', '6', '11', '8', '9']
11 ['4', '2', '1', '12', '5']
12 ['1', '11', '8', '2', '6']
13 ['15', '7', '1', '12']
14 ['14', '10', '8', '7', '9']
15 ['14', '15', '12', '1', '6', '9']
16 ['13', '15', '3', '11', '7', '10', '12']
17 ['1', '7', '9', '10']
18 ['12', '2', '5', '11', '4', '9', '8']
19 ['9', '11', '2', '10', '7']
20 ['14', '6', '12', '11', '10']
21 ['12', '3', '13', '8', '15']
22 ['6', '14', '11', '15', '9', '7', '1']
23 ['6', '10', '14', '12', '15', '8', '9']
24 ['6', '3', '1', '7', '15', '5', '12']
25 ['12', '15', '13', '3', '8']
26 ['7', '12', '13', '14', '15', '8']
27 ['4', '12', '10', '11']
28 ['6', '12', '1', '13', '14']
29 ['10', '6', '14', '15', '9', '3']
30 ['4', '6',