In [None]:
import numpy as np
import pandas as pd
import itertools as IT
import datetime
from tqdm import tqdm


In [None]:
class UserItemData:
    
    def __init__(self,path,start=None,end=None,min_ratings=0):
        df = pd.read_csv(path,sep="\s+")        #branje .dat datoteke
        
        #selekcija po minimalnem datumu
        if start is not None:
            s_day , s_month, s_year = start.strip().split(".")  
            s_day = int(s_day)
            s_month = int(s_month)
            s_year = int(s_year)
            
            #glede na leto
            df = df.loc[ (df["date_year"]   >= s_year) ] 
            
            #glede na mesec
            tmp_mask = (df["date_year"] == s_year) & (df["date_month"] < s_month)
            df = df.loc[np.logical_not(tmp_mask)]
            
            #glede na dan
            tmp_mask = (df["date_year"] == s_year) & (df["date_month"] == s_month) & (df["date_day"] < s_day)
            df = df.loc[np.logical_not(tmp_mask)]
        
        if end is not None:
            e_day , e_month, e_year = end.strip().split(".")  
            e_day = int(e_day)
            e_month = int(e_month)
            e_year = int(e_year)
            
            #glede na leto
            df = df.loc[ (df["date_year"] <= int(e_year)) ] 
            
            #glede na mesec
            tmp_mask = (df["date_year"] == int(e_year)) & (df["date_month"] > int(e_month))
            df = df.loc[np.logical_not(tmp_mask)]
            
            #glede na dan
            tmp_mask = (df["date_year"] == e_year) & (df["date_month"] == e_month) & (df["date_day"] >= e_day)
            df = df.loc[np.logical_not(tmp_mask)]
        
        counts = df["movieID"].value_counts()
        mask = counts > min_ratings
        ids = counts.loc[mask].index.values
        df = df.loc[df["movieID"].isin(ids)]
        
        self.data = df
        self.len = len(self.data)
        
        
    def nrows(self):
        return self.len

In [None]:
class MovieData:
    def __init__(self,path):
        self.data =  pd.read_csv(path,sep="\t+",usecols=["id","title"],engine="python")
    
    def get_title(self,id):
        return self.data["title"].loc[self.data["id"] == id].values[0]

In [None]:
class RandomPredictor:
    def __init__(self, min_grade=0, max_grade=5):
        self.min_grade = min_grade
        self.max_grade = max_grade + 1
    def predict(self,uid):
        mids = set(self.data["movieID"].values)
        N = len(mids)
        ratings = np.random.randint(self.min_grade, self.max_grade,N)
        items = zip(mids,ratings)
        return dict(items)
    def fit(self,X):
        self.data = X.data

In [None]:
class ViewsPredictor:
    def __init__(self):
        pass
    def fit(self,X):
        n = X.data["movieID"].value_counts()   
        vals = n.values.flatten()
        keys = n.index
        self.predictions = dict( zip(keys,vals) )
    def predict(self,uid):
        return dict(sorted(self.predictions.items(), key=lambda item: item[1], reverse=True))

In [None]:
class STDPredictor:
    def __init__(self, n):
        self.n = n;
    
    def fit(self,X):
        mask = X.data["movieID"].value_counts() > self.n
        mids = X.data["movieID"].value_counts().loc[mask]
        mids = mids.index
        
        mask = X.data["movieID"].isin(mids)
        tmp = X.data[["movieID","rating"]].loc[mask].groupby("movieID").std()
        self.predictions = dict( zip(tmp.index, tmp.values.flatten()) ) 
        
    def predict(self,uid):
        return dict(sorted(self.predictions.items(), key=lambda item: item[1], reverse=True))