In [None]:
import numpy as np
import math
class UMRList:
    def __init__(self, user_list, movie_list, rating_list):
        self.user_list = user_list
        self.movie_list = movie_list
        self.rating_list = rating_list

    def get_user_list(self):
        return self.user_list

    def get_movie_list(self):
        return self.movie_list

    def get_rating_list(self):
        return self.rating_list

class KNeighbor:
    def __init__(self, user_id, similarity):
        self.user_id = user_id
        self.similarity = similarity

    def get_similarity(self):
        return self.similarity

    def get_user_id(self):
        return self.user_id

def load_training_matrix(file_path='train.txt'):
    matrix = np.zeros((200, 1000))
    with open(file_path, 'r') as training_file:
        for line in training_file:
            user, movie, rating = map(int, line.split())
            matrix[user - 1][movie - 1] = rating
    return matrix

training_matrix = np.zeros((200, 1000))
training_matrix = load_training_matrix()

def prediction_movie_list(file_path='test5.txt'):
    user_list, movie_list, rating_list = [], [], []
    with open(file_path, 'r') as test_file:
        for line in test_file:
            user, movie, rating = map(int, line.split())
            if rating == 0:
                user_list.append(user)
                movie_list.append(movie)
                rating_list.append(rating)
    return UMRList(user_list, movie_list, rating_list)

def avg_rating_from_training():
    train_rating_list = {}
    for usr in range(200):
        ratings = training_matrix[usr][training_matrix[usr] != 0]
        avg_rating = ratings.mean() if ratings.size > 0 else 0
        train_rating_list[usr + 1] = avg_rating
    return train_rating_list

def each_movie_avg_rating():
    movie_avg_ratings = {}
    for movie in range(1000):
        ratings = training_matrix[:, movie][training_matrix[:, movie] != 0]
        avg_rating = ratings.mean() if ratings.size > 0 else 0
        movie_avg_ratings[movie + 1] = avg_rating
    return movie_avg_ratings

def given_list_in_test(user_id, file_path='test5.txt'):
    user_list, movie_list, rating_list = [], [], []
    with open(file_path, 'r') as test_file:
        for line in test_file:
            user, movie, rating = map(int, line.split())
            if user == user_id and rating != 0:
                user_list.append(user)
                movie_list.append(movie)
                rating_list.append(rating)
    return UMRList(user_list, movie_list, rating_list)

def avg_user_rating_in_test(user_id, file_path='test5.txt'):
    ratings = []
    with open(file_path, 'r') as test_file:
        for line in test_file:
            user, movie, rating = map(int, line.split())
            if user == user_id and rating != 0:
                ratings.append(rating)
    return np.mean(ratings) if ratings else 0

def inverse_user_frequency(movie_id):
    m = sum(1 for usr in range(200) if training_matrix[usr][movie_id - 1] != 0)
    return math.log10(200.0 / m) if m != 0 else 1

def predict_cosine_similarity(user_id, k_neighbor):
    neighbor_list = []
    given_data = given_list_in_test(user_id)
    movie_list = given_data.get_movie_list()
    rating_list = given_data.get_rating_list()

    for usr in range(200):
        numerator, denom_test, denom_train = 0.0, 0.0, 0.0
        for movie_id in movie_list:
            test_rating = rating_list[movie_list.index(movie_id)]
            train_rating = training_matrix[usr][movie_id - 1]
            if test_rating != 0 and train_rating != 0:
                numerator += test_rating * train_rating
                denom_test += test_rating ** 2
                denom_train += train_rating ** 2

        denom_total = math.sqrt(denom_test) * math.sqrt(denom_train)
        if denom_total != 0:
            similarity = numerator / denom_total
            similarity *= math.pow(abs(similarity), 1.5)
            similarity *= inverse_user_frequency(movie_list[0])
            neighbor_list.append(KNeighbor(usr + 1, similarity))

    neighbor_list.sort(key=lambda x: x.similarity, reverse=True)
    return neighbor_list[:k_neighbor]

def predict_rating_cosine(user_id, movie_id, k_neighbor):
    similarity_list = predict_cosine_similarity(user_id, k_neighbor)
    movie_avg_ratings = each_movie_avg_rating()
    avg_movie_rating = movie_avg_ratings.get(movie_id, 0)
    avg_user_rating = avg_user_rating_in_test(user_id)

    numerator, denominator = 0.0, 0.0
    for neighbor in similarity_list:
        training_user_id = neighbor.user_id
        rating = training_matrix[training_user_id - 1][movie_id - 1]
        if rating > 0:
            numerator += neighbor.similarity * rating
            denominator += neighbor.similarity

    if denominator != 0:
        result = numerator / denominator
    elif avg_movie_rating != 0:
        result = avg_movie_rating
    else:
        result = avg_user_rating

    return int(round(result))

def pearson_correlation(user_id, k_neighbor):
    neighbor_list = []
    avg_rating_in_test = avg_user_rating_in_test(user_id)
    given_data = given_list_in_test(user_id)
    movie_list = given_data.get_movie_list()
    rating_list = given_data.get_rating_list()
    train_rating_list = avg_rating_from_training()

    for usr in range(200):
        numerator, denom_test, denom_train = 0.0, 0.0, 0.0
        common_movie = 0
        for movie_id in movie_list:
            test_rating = rating_list[movie_list.index(movie_id)]
            train_rating = training_matrix[usr][movie_id - 1]
            if test_rating != 0 and train_rating != 0:
                avg_rating_in_train = train_rating_list[usr + 1]
                numerator += (test_rating - avg_rating_in_test) * (train_rating - avg_rating_in_train)
                denom_test += (test_rating - avg_rating_in_test) ** 2
                denom_train += (train_rating - avg_rating_in_train) ** 2
                common_movie += 1

        if denom_test > 0 and denom_train > 0:
            denom_total = math.sqrt(denom_test) * math.sqrt(denom_train)
            similarity = numerator / denom_total if denom_total != 0 else 0
            similarity *= (common_movie / (common_movie + 2))
            similarity *= math.pow(abs(similarity), 1.5)
            similarity *= inverse_user_frequency(movie_list[0])
            neighbor_list.append(KNeighbor(usr + 1, similarity))

    neighbor_list.sort(key=lambda x: x.similarity, reverse=True)
    return neighbor_list[:k_neighbor]

def predict_rating_pearson(user_id, movie_id, k_neighbor):
    similarity_list = pearson_correlation(user_id, k_neighbor)
    avg_user_rating = avg_user_rating_in_test(user_id)
    avg_movie_rating = each_movie_avg_rating().get(movie_id, 0)

    numerator, denominator = 0.0, 0.0
    train_rating_list = avg_rating_from_training()
    for neighbor in similarity_list:
        training_user_id = neighbor.user_id
        train_rating = training_matrix[training_user_id - 1][movie_id - 1]
        if train_rating > 0:
            avg_rating_in_train = train_rating_list[training_user_id]
            weight = train_rating - avg_rating_in_train
            numerator += weight * neighbor.similarity
            denominator += abs(neighbor.similarity)

    if denominator != 0:
        result = avg_user_rating + numerator / denominator
    elif avg_movie_rating != 0:
        result = avg_movie_rating
    else:
        result = avg_user_rating

    result = int(round(result))
    return min(max(result, 1), 5)
item_usr_mv_list = {}

def pearson_correlation_iuf(user_id, k_neighbor):
    neighbor_list = []
    avg_rating_in_test = avg_user_rating_in_test(user_id)
    given_data = given_list_in_test(user_id)
    movie_list = given_data.get_movie_list()
    rating_list = given_data.get_rating_list()
    train_rating_list = avg_rating_from_training()

    for usr in range(200):
        numerator, denom_test, denom_train = 0.0, 0.0, 0.0
        common_movie = 0
        for movie_id in movie_list:
            test_rating = rating_list[movie_list.index(movie_id)]
            train_rating = training_matrix[usr][movie_id - 1]
            if test_rating != 0 and train_rating != 0:
                avg_rating_in_train = train_rating_list[usr + 1]
                iuf = inverse_user_frequency(movie_id)
                numerator += (test_rating - avg_rating_in_test) * (train_rating - avg_rating_in_train) * iuf
                denom_test += ((test_rating - avg_rating_in_test) ** 2) * iuf
                denom_train += ((train_rating - avg_rating_in_train) ** 2) * iuf
                common_movie += 1

        if denom_test > 0 and denom_train > 0:
            denom_total = math.sqrt(denom_test) * math.sqrt(denom_train)
            similarity = numerator / denom_total if denom_total != 0 else 0
            similarity *= (common_movie / (common_movie + 2))
            neighbor_list.append(KNeighbor(usr + 1, similarity))

    neighbor_list.sort(key=lambda x: x.similarity, reverse=True)
    return neighbor_list[:k_neighbor]

def predict_rating_pearson_iuf(user_id, movie_id, k_neighbor):
    similarity_list = pearson_correlation_iuf(user_id, k_neighbor)
    avg_user_rating = avg_user_rating_in_test(user_id)
    avg_movie_rating = each_movie_avg_rating().get(movie_id, 0)

    numerator, denominator = 0.0, 0.0
    train_rating_list = avg_rating_from_training()
    for neighbor in similarity_list:
        training_user_id = neighbor.user_id
        train_rating = training_matrix[training_user_id - 1][movie_id - 1]
        if train_rating > 0:
            avg_rating_in_train = train_rating_list[training_user_id]
            weight = train_rating - avg_rating_in_train
            similarity = neighbor.similarity
            numerator += weight * similarity
            denominator += abs(similarity)

    if denominator != 0:
        result = avg_user_rating + numerator / denominator
    elif avg_movie_rating != 0:
        result = avg_movie_rating
    else:
        result = avg_user_rating

    result = int(round(result))
    return min(max(result, 1), 5)

def pearson_correlation_case_mod(user_id, k_neighbor, rho=2.5):
    neighbor_list = []
    avg_rating_in_test = avg_user_rating_in_test(user_id)
    given_data = given_list_in_test(user_id)
    movie_list = given_data.get_movie_list()
    rating_list = given_data.get_rating_list()
    train_rating_list = avg_rating_from_training()

    for usr in range(200):
        numerator, denom_test, denom_train = 0.0, 0.0, 0.0
        common_movie = 0
        for movie_id in movie_list:
            test_rating = rating_list[movie_list.index(movie_id)]
            train_rating = training_matrix[usr][movie_id - 1]
            if test_rating != 0 and train_rating != 0:
                avg_rating_in_train = train_rating_list[usr + 1]
                numerator += (test_rating - avg_rating_in_test) * (train_rating - avg_rating_in_train)
                denom_test += (test_rating - avg_rating_in_test) ** 2
                denom_train += (train_rating - avg_rating_in_train) ** 2
                common_movie += 1

        if denom_test > 0 and denom_train > 0:
            denom_total = math.sqrt(denom_test) * math.sqrt(denom_train)
            similarity = numerator / denom_total if denom_total != 0 else 0
            similarity *= (common_movie / (common_movie + 2))
            similarity *= math.pow(abs(similarity), 1.5)
            similarity = similarity * (abs(similarity) ** (rho - 1)) 
            neighbor_list.append(KNeighbor(usr + 1, similarity))

    neighbor_list.sort(key=lambda x: x.similarity, reverse=True)
    return neighbor_list[:k_neighbor]

def predict_rating_pearson_case_mod(user_id, movie_id, k_neighbor, rho=2.5):
    similarity_list = pearson_correlation_case_mod(user_id, k_neighbor, rho)
    avg_user_rating = avg_user_rating_in_test(user_id)
    avg_movie_rating = each_movie_avg_rating().get(movie_id, 0)

    numerator, denominator = 0.0, 0.0
    train_rating_list = avg_rating_from_training()
    for neighbor in similarity_list:
        training_user_id = neighbor.user_id
        train_rating = training_matrix[training_user_id - 1][movie_id - 1]
        if train_rating > 0:
            avg_rating_in_train = train_rating_list[training_user_id]
            weight = train_rating - avg_rating_in_train
            similarity = neighbor.similarity
            numerator += weight * similarity
            denominator += abs(similarity)

    if denominator != 0:
        result = avg_user_rating + numerator / denominator
    elif avg_movie_rating != 0:
        result = avg_movie_rating
    else:
        result = avg_user_rating

    result = int(round(result))
    return min(max(result, 1), 5)

def pearson_correlation_with_both_modifications(user_id, k_neighbor, rho=2.5):
    neighbor_list = []
    avg_rating_in_test = avg_user_rating_in_test(user_id)
    given_data = given_list_in_test(user_id)
    movie_list = given_data.get_movie_list()
    rating_list = given_data.get_rating_list()
    train_rating_list = avg_rating_from_training()

    for usr in range(200):
        numerator, denom_test, denom_train = 0.0, 0.0, 0.0
        common_movie = 0
        for movie_id in movie_list:
            test_rating = rating_list[movie_list.index(movie_id)]
            train_rating = training_matrix[usr][movie_id - 1]
            if test_rating != 0 and train_rating != 0:
                avg_rating_in_train = train_rating_list[usr + 1]
                iuf = inverse_user_frequency(movie_id)
                numerator += (test_rating - avg_rating_in_test) * (train_rating - avg_rating_in_train) * iuf
                denom_test += ((test_rating - avg_rating_in_test) ** 2) * iuf
                denom_train += ((train_rating - avg_rating_in_train) ** 2) * iuf
                common_movie += 1

        if denom_test > 0 and denom_train > 0:
            denom_total = math.sqrt(denom_test) * math.sqrt(denom_train)
            similarity = numerator / denom_total if denom_total != 0 else 0
            similarity *= (common_movie / (common_movie + 2))
            similarity *= math.pow(abs(similarity), 1.5)
            similarity = similarity * (abs(similarity) ** (rho - 1))  # Apply case amplification
            neighbor_list.append(KNeighbor(usr + 1, similarity))

    neighbor_list.sort(key=lambda x: x.similarity, reverse=True)
    return neighbor_list[:k_neighbor]

def predict_rating_pearson_with_both_modifications(user_id, movie_id, k_neighbor, rho=2.5):
    similarity_list = pearson_correlation_with_both_modifications(user_id, k_neighbor, rho)
    avg_user_rating = avg_user_rating_in_test(user_id)
    avg_movie_rating = each_movie_avg_rating().get(movie_id, 0)

    numerator, denominator = 0.0, 0.0
    train_rating_list = avg_rating_from_training()
    for neighbor in similarity_list:
        training_user_id = neighbor.user_id
        train_rating = training_matrix[training_user_id - 1][movie_id - 1]
        if train_rating > 0:
            avg_rating_in_train = train_rating_list[training_user_id]
            weight = train_rating - avg_rating_in_train
            similarity = neighbor.similarity
            numerator += weight * similarity
            denominator += abs(similarity)

    if denominator != 0:
        result = avg_user_rating + numerator / denominator
    elif avg_movie_rating != 0:
        result = avg_movie_rating
    else:
        result = avg_user_rating

    result = int(round(result))
    return min(max(result, 1), 5)

def item_based_with_adjusted_cosine(user_id, movie_id, k_neighbor):
    neighbor_list = []
    given_object = given_list_in_test(user_id)
    movie_list = given_object.get_movie_list()
    train_rating_list = avg_rating_from_training()

    for usr in range(len(movie_list)):
        rated_movie = movie_list[usr]
        numerator = 0.0
        denom_i = 0.0
        denom_j = 0.0

        common_usr = 0

        for user in range(0, 200):
            avg_rating_in_train = train_rating_list[user+1]
            if(training_matrix[user][rated_movie-1] != 0) & (training_matrix[user][movie_id-1] != 0):
                numerator += (training_matrix[user][rated_movie-1] -
                              avg_rating_in_train) * (training_matrix[user][movie_id-1] - avg_rating_in_train)
                denom_i += math.pow((training_matrix[user][rated_movie-1]), 2)
                denom_j += math.pow((training_matrix[user][movie_id-1]), 2)
                common_usr += 1

        if common_usr > 1:
            denom_total = math.sqrt(denom_i) * math.sqrt(denom_j)
            similarity = numerator/denom_total
            similarity *= (common_usr/(common_usr + 2))
            similarity *= math.pow(math.fabs(similarity), 1.5)
            neighbor_obj = KNeighbor(usr+1, similarity)
            neighbor_list.append(neighbor_obj)
            item_usr_mv_list[usr+1] = movie_id

    neighbor_list.sort(key=lambda x: x.similarity, reverse=True)
    k_neighbor_obj_list = []

    for i in range(0, len(neighbor_list)):
        if i < k_neighbor:
            k_neighbor_obj_list.append(neighbor_list[i])

    return k_neighbor_obj_list

def predict_rating_item_based(user_id, movie_id, k_neighbor):
    similarity_list = item_based_with_adjusted_cosine(user_id, movie_id, k_neighbor)
    avg_user_rating = avg_user_rating_in_test(user_id)
    avg_movie_rating = each_movie_avg_rating()
    the_avg_movie_rating = avg_movie_rating[movie_id]

    numerator = 0.0
    denominator = 0.0

    given_object = given_list_in_test(user_id)
    movie_list = given_object.get_movie_list()
    rating_list = given_object.get_rating_list()

    for i in range(0, len(movie_list)):
        movie_id = movie_list[i]
        rating = rating_list[i]

        for j in range(0, len(similarity_list)):
            neighbor = similarity_list[j]
            similarity = neighbor.get_similarity()
            usr_id = neighbor.get_user_id()
            similar_mv_id = item_usr_mv_list[usr_id]

            if movie_id == similar_mv_id:
                numerator += similarity * rating
                denominator += math.fabs(similarity)

    if denominator != 0.0:
        result = numerator/denominator
    elif the_avg_movie_rating != 0:
        result = the_avg_movie_rating
    else:
        result = avg_user_rating

    result = int(round(result))

    if result > 5:
        result = 5
    elif result < 0:
        result = 1

    return result

def run():
    k_neighbor = 150
    output_list = prediction_movie_list()
    with open('cosine5.txt', 'w') as output_file:
        for i, (user_id, movie_id) in enumerate(zip(output_list.get_user_list(), output_list.get_movie_list())):
            rating_cosine = predict_rating_cosine(user_id, movie_id, k_neighbor)
            rating_pearson = predict_rating_pearson(user_id, movie_id, k_neighbor)
            rating_pearson_with_both_modifications = predict_rating_pearson_with_both_modifications(user_id, movie_id, k_neighbor)
            rating_pearson_iuf=predict_rating_pearson_iuf(user_id, movie_id, k_neighbor)
            rating_pearson_case_mod=predict_rating_pearson_case_mod(user_id, movie_id, k_neighbor)
            rating_my_own_algorithm = int(round(0.6 * rating_cosine + 0.4 * rating_pearson))
            rating_item_based=predict_rating_item_based(user_id, movie_id, k_neighbor)
            output_file.write(f"{user_id} {movie_id} {rating_my_own_algorithm}\n")
            print(f"{i} Done")
    print("Finished!!")
run()
