In [10]:
import math
import numpy as np
import operator
import matplotlib.pyplot as plt

event_titles_map = {}
event_client_map={}
client_event_map={}
client_avg_rating = {}
cache = {}


In [11]:
def read_event_title(fname):   
    with open(fname) as f:
        for line in f.readlines():
            parts = [x.strip() for x in line.split(',')]
            event_id = int(parts[0])
            event_titles_map[event_id] = parts[2] + "("+ parts[1]+")"    

def get_ratings_map(fname):
    with open(fname) as f:
        for line in f.readlines():
            parts = [x.strip() for x in line.split(',')]
            event_title_id = int(float(parts[0]))
            client_id = int(float(parts[1]))
            rating = float(parts[2])
            if client_id not in client_event_map:
                client_event_map[client_id]={}
            client_event_map[client_id][event_title_id] = rating
            
            if event_title_id not in event_client_map:
                event_client_map[event_title_id] = []
            event_client_map[event_title_id].append(client_id)


In [12]:
def get_client_avg_rating():
    for client in client_event_map:
        sum = 0
        i=0
        for event in client_event_map[client]:
            sum = sum + float(client_event_map[client][event])
            i = i + 1 
        avg = sum/i
        client_avg_rating[client]=avg


In [13]:
event_titles_filename="/Users/priyankarajbanshi/Downloads/Event Planner Recommend/event_titles.txt"
ratings_filename="/Users/priyankarajbanshi/Downloads/Event Planner Recommend/ratings.txt"
read_event_title(event_titles_filename)
get_ratings_map(ratings_filename)
get_client_avg_rating()


In [14]:
def calculate_accuracy():
    correct_predictions = 0
    total_predictions = 0
    for client in client_event_map:
        for event in client_event_map[client]:
            total_predictions += 1
            predicted_rating = predict_rating(client, event)
            if predicted_rating == client_event_map[client][event]:
                correct_predictions += 1
    accuracy = (correct_predictions / total_predictions) * 100
    return accuracy


In [15]:
def predict_rating(client, event):
    if client not in cache:
        client_correlation = get_client_corr(client)
        temp_rating = 0
        for other_client in client_correlation:
            if event in client_event_map[other_client]:
                temp_rating += client_correlation[other_client] * (client_event_map[other_client][event] - client_avg_rating[other_client])
        predicted_rating = client_avg_rating[client] + temp_rating
        cache[client] = predicted_rating
    else:
        predicted_rating = cache[client]
    return predicted_rating


In [16]:
def get_client_corr(active_client):
    client_correlation = {}
    for client in client_event_map:
        if client != active_client:
            nominator = 0
            sum_vaj_diff = 0
            sum_vij_diff = 0
            for event in client_event_map[active_client]:
                if event in client_event_map[client]:                
                    nominator +=  (client_event_map[active_client][event] - client_avg_rating[active_client]) * (client_event_map[client][event] - client_avg_rating[client])
                    sum_vaj_diff +=  np.power(client_event_map[active_client][event] - client_avg_rating[active_client], 2)
                    sum_vij_diff +=  np.power(client_event_map[client][event] - client_avg_rating[client], 2)
            denominator= np.sqrt(sum_vaj_diff * sum_vij_diff)  
            if denominator != 0:
                client_correlation[client] = nominator / denominator
    return client_correlation


In [17]:
def recommendation(active_client, K):
    if active_client not in cache:
        client_correlation = get_client_corr(active_client)
        predicted_rating ={}
        for event in event_titles_map:
            temp_rating = 0
            if event in event_client_map:
                for client in event_client_map[event]:
                    if client in client_correlation:
                        temp_rating +=client_correlation[client]*(client_event_map[client][event]- client_avg_rating[client])
            predicted_rating[event] = temp_rating

        predicted_rating = sorted(predicted_rating.items(), key=lambda kv: kv[1], reverse=True)
        cache[active_client] = predicted_rating
    else:
        predicted_rating= cache[active_client]
        
    recommended_events = predicted_rating[:K]
    for event in recommended_events:
        print(event_titles_map[event[0]])


In [22]:
active_client = 1987434
K = 5
recommendation(active_client, K)

Harvey Pvt Ltd(Harvey)
Angela Pvt Ltd(Angela)
Larry Pvt Ltd(Larry)
Louise Pvt Ltd(Louise)
Benjamin Pvt Ltd(Benjamin)


In [23]:
# Save the trained model to model.pkl
model = cache
with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)