In [1]:
# Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.sparse import csr_matrix
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error
import itertools
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.pipeline import make_pipeline
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
%matplotlib inline

In [2]:
# Import the ratings dataset
ratings = pd.read_csv('ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


In [3]:
new_ratings = ratings.iloc[np.isin(ratings["movieId"],ratings["movieId"].unique()[:5] )]

In [4]:
user_movie = pd.crosstab(new_ratings.userId,new_ratings.movieId, values=new_ratings.rating, aggfunc="mean")

In [5]:
user_movie_train, user_movie_test = train_test_split(user_movie, test_size=0.3)

In [6]:
from sklearn.cluster import KMeans
elbow = [] 
for i in range(1, 15):
    kmeans = make_pipeline(KNNImputer(n_neighbors=3),KMeans(n_clusters = i, init = 'k-means++', random_state = 42))
    #kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
    kmeans.fit(user_movie_train) 
    elbow.append(kmeans.named_steps['kmeans'].inertia_)
    print("Kmeans med {} klynger".format(i))



Kmeans med 1 klynger




Kmeans med 2 klynger




Kmeans med 3 klynger




Kmeans med 4 klynger




Kmeans med 5 klynger


KeyboardInterrupt: 

In [None]:
plt.figure()
plt.plot(elbow)
plt.xticks(np.arange(0,14),np.arange(1,15))
plt.show()

In [7]:
kmeans =  make_pipeline(KNNImputer(n_neighbors=3),KMeans(n_clusters = 12, init = 'k-means++', random_state = 42))
kmeans.fit(user_movie_train)



In [8]:
y_user_movie_hat = kmeans.predict(user_movie_test)

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(random_state=42) # use fit_transform instead of fit, as TSNE has no transform method 
movie_tsne = tsne.fit_transform(kmeans["knnimputer"].transform(user_movie_train))

In [None]:
for i in sorted(np.unique(y_user_movie_hat)):
    temp_scatter = movie_tsne[np.where(y_user_movie_hat==i)]
    plt.scatter(x=temp_scatter[:,0],y=temp_scatter[:,1], color="C{}".format(i))
plt.legend()
plt.show()

In [None]:
from sklearn.decomposition import PCA
pca =  PCA(n_components=2)

movie_pca = pca.fit_transform(kmeans["knnimputer"].transform(user_movie_train))

In [None]:
for i in sorted(np.unique(y_user_movie_hat)):
    temp_scatter = movie_pca[np.where(y_user_movie_hat==i)]
    plt.scatter(x=temp_scatter[:,0],y=temp_scatter[:,1], color="C{}".format(i))
plt.show()

### Designe anbefalingssystem

In [9]:
test_not_nan = user_movie_test.loc[user_movie_test.isnull().any(axis=1) == False]

In [10]:
class abonnent():
    def __init__(self, fasit):
        self.fasit = fasit
        self.idx1 = None
        self.filmer_sett = np.ones((1,5))*np.nan
    
    def start(self):
        self.idx1 = np.random.choice(len(self.fasit[0]))
        self.filmer_sett[0][self.idx1] = self.fasit[0][self.idx1]
        return self.filmer_sett
    
    def ny_film(self, anbefaling):
        if np.isin(anbefaling, np.where(np.isnan(self.filmer_sett))[1]) == True:
            self.filmer_sett[0][anbefaling] = self.fasit[0][anbefaling]
            print("Abonnentens rating = ", self.fasit[0][anbefaling])
            return self.filmer_sett
        else:
            print("denne filmen har jeg sett!")
            return self.filmer_sett

In [11]:
class anbefalingssystem():
    def __init__(self, modell):
        self.model = modell
        self.pred = []
        #self.filmer_sett = np.ones((1,5))
    
    def anbefaling(self, rating):
        pred_class = self.model.predict(rating)
        self.pred.append(pred_class[0])
        cluster_ratings = self.model["kmeans"].cluster_centers_[pred_class]
        sett_idx = np.where(~np.isnan(rating))[1]
        cluster_ratings[0][sett_idx] = 0
        new_movie = np.argmax(cluster_ratings)
        print("Anbefaler film nr.", new_movie)
        print("Predikert rating = ", cluster_ratings.max())
        return new_movie

In [12]:
for i in range(len(test_not_nan)):
    abo = abonnent(np.asarray([test_not_nan.iloc[i]]))
    anbef = anbefalingssystem(kmeans)
    film_array = abo.start()
    for i in range(4):
        ny_film = anbef.anbefaling(film_array)
        film = abo.ny_film(ny_film)
    print(anbef.pred)
    print("----------------")
    

Anbefaler film nr. 2
Predikert rating =  4.557173895550013
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.983856858846919
Abonnentens rating =  3.5
Anbefaler film nr. 1
Predikert rating =  3.389224652087476
Abonnentens rating =  4.5
[0, 5, 5, 5]
----------------
Anbefaler film nr. 2
Predikert rating =  4.16910519020484
Abonnentens rating =  4.5
Anbefaler film nr. 3
Predikert rating =  4.422424702606935
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  4.419303130009282
Abonnentens rating =  3.0
Anbefaler film nr. 4
Predikert rating =  3.366471584783607
Abonnentens rating =  2.5
[10, 8, 8, 10]
----------------
Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  4.0
Anbefaler film nr. 3
Predikert rating =  3.872512586909608
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  3.8574482538160324
Abonnentens rating =  5.0
Anb

Anbefaler film nr. 1
Predikert rating =  2.8958491521133887
Abonnentens rating =  5.0
[5, 5, 5, 8]
----------------
Anbefaler film nr. 2
Predikert rating =  4.021897226884044
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  3.8574482538160324
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.5533645009190398
Abonnentens rating =  3.0
[2, 2, 2, 2]
----------------
Anbefaler film nr. 2
Predikert rating =  4.16910519020484
Abonnentens rating =  4.5
Anbefaler film nr. 3
Predikert rating =  4.29317883103994
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  4.208544125913436
Abonnentens rating =  3.5
Anbefaler film nr. 1
Predikert rating =  3.5032602585722317
Abonnentens rating =  2.5
[10, 0, 11, 11]
----------------
Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  3.0
Anbefaler film nr. 3
Predikert rating =  3.981877919811

Anbefaler film nr. 4
Predikert rating =  3.983856858846919
Abonnentens rating =  2.0
[10, 5, 5, 5]
----------------
Anbefaler film nr. 2
Predikert rating =  4.16910519020484
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.381351888668001
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.983856858846919
Abonnentens rating =  5.0
[10, 5, 5, 5]
----------------
Anbefaler film nr. 2
Predikert rating =  4.711635750421587
Abonnentens rating =  4.5
Anbefaler film nr. 3
Predikert rating =  4.29317883103994
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  4.049985247176846
Abonnentens rating =  5.0
Anbefaler film nr. 1
Predikert rating =  4.084659090909091
Abonnentens rating =  4.0
[11, 0, 0, 4]
----------------
Anbefaler film nr. 2
Predikert rating =  4.5787564329705575
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.422424702606935

Anbefaler film nr. 0
Predikert rating =  4.049985247176846
Abonnentens rating =  3.5
Anbefaler film nr. 1
Predikert rating =  3.2707008932163846
Abonnentens rating =  2.5
[2, 0, 0, 0]
----------------
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.381351888668001
Abonnentens rating =  3.0
Anbefaler film nr. 4
Predikert rating =  4.208544125913436
Abonnentens rating =  5.0
Anbefaler film nr. 1
Predikert rating =  3.5032602585722317
Abonnentens rating =  5.0
[5, 5, 11, 11]
----------------
Anbefaler film nr. 2
Predikert rating =  4.942578246738796
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.942567116334995
Abonnentens rating =  4.5
Anbefaler film nr. 4
Predikert rating =  3.9664640933173043
Abonnentens rating =  3.0
Anbefaler film nr. 1
Predikert rating =  3.685900004452162
Abonnentens rating =  3.0
[9, 9, 9, 9]
----------------
Anbefaler film nr. 2
Predikert rating =  4.942054340622

Anbefaler film nr. 1
Predikert rating =  3.888317687378931
Abonnentens rating =  3.5
Anbefaler film nr. 3
Predikert rating =  3.2643048725972266
Abonnentens rating =  3.0
[10, 3, 3, 3]
----------------
Anbefaler film nr. 4
Predikert rating =  3.539581943517901
Abonnentens rating =  2.0
Anbefaler film nr. 1
Predikert rating =  3.4466310873915944
Abonnentens rating =  0.5
Anbefaler film nr. 0
Predikert rating =  3.101512119190574
Abonnentens rating =  2.5
Anbefaler film nr. 2
Predikert rating =  1.5130086724483025
Abonnentens rating =  1.0
[7, 7, 7, 7]
----------------
Anbefaler film nr. 2
Predikert rating =  4.750530303030305
Abonnentens rating =  4.0
Anbefaler film nr. 3
Predikert rating =  4.642537878787876
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  4.579924242424246
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  4.084659090909091
Abonnentens rating =  2.5
[4, 4, 4, 4]
----------------
Anbefaler film nr. 2
Predikert rating =  4.5571738955500

Anbefaler film nr. 3
Predikert rating =  4.29317883103994
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  4.049985247176846
Abonnentens rating =  4.0
[0, 2, 0, 0]
----------------
Anbefaler film nr. 2
Predikert rating =  4.942054340622931
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  4.0
Anbefaler film nr. 4
Predikert rating =  4.066226764303528
Abonnentens rating =  3.0
Anbefaler film nr. 0
Predikert rating =  4.419303130009282
Abonnentens rating =  3.0
[5, 5, 0, 8]
----------------
Anbefaler film nr. 2
Predikert rating =  4.021897226884044
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  3.0
Anbefaler film nr. 4
Predikert rating =  4.066226764303528
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  4.049985247176846
Abonnentens rating =  1.0
[2, 2, 0, 0]
----------------
Anbefaler film nr. 2
Predikert rating =  4.750530303030305
Ab

Anbefaler film nr. 3
Predikert rating =  4.942567116334995
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.9664640933173043
Abonnentens rating =  3.0
Anbefaler film nr. 1
Predikert rating =  3.685900004452162
Abonnentens rating =  4.0
[9, 9, 9, 9]
----------------
Anbefaler film nr. 4
Predikert rating =  4.803901515151512
Abonnentens rating =  3.0
Anbefaler film nr. 2
Predikert rating =  4.942054340622931
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.957125684519845
Abonnentens rating =  4.0
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  5.0
[4, 5, 9, 5]
----------------
Anbefaler film nr. 3
Predikert rating =  4.816951080773605
Abonnentens rating =  5.0
Anbefaler film nr. 2
Predikert rating =  4.5787564329705575
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.419303130009282
Abonnentens rating =  5.0
Anbefaler film nr. 1
Predikert rating =  2.8958491521133887
Abonnentens rating =  4.0
[6

Anbefaler film nr. 4
Predikert rating =  3.983856858846919
Abonnentens rating =  3.5
Anbefaler film nr. 1
Predikert rating =  3.389224652087476
Abonnentens rating =  3.5
[9, 5, 5, 5]
----------------
Anbefaler film nr. 2
Predikert rating =  4.557173895550013
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.381351888668001
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.9664640933173043
Abonnentens rating =  4.0
[0, 5, 5, 9]
----------------
Anbefaler film nr. 2
Predikert rating =  4.942578246738796
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.942567116334995
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.9664640933173043
Abonnentens rating =  5.0
Anbefaler film nr. 1
Predikert rating =  4.084659090909091
Abonnentens rating =  3.0
[9, 9, 9, 4]
----------------
Anbefaler film nr. 2
Predikert rating =  4.750530303030305

Anbefaler film nr. 0
Predikert rating =  4.579924242424246
Abonnentens rating =  5.0
[2, 4, 4, 4]
----------------
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.381351888668001
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.9664640933173043
Abonnentens rating =  3.5
Anbefaler film nr. 1
Predikert rating =  3.685900004452162
Abonnentens rating =  4.0
[5, 5, 9, 9]
----------------
Anbefaler film nr. 2
Predikert rating =  4.021897226884044
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  3.0
Anbefaler film nr. 4
Predikert rating =  4.066226764303528
Abonnentens rating =  3.5
Anbefaler film nr. 0
Predikert rating =  3.1147646183069053
Abonnentens rating =  3.0
[2, 2, 0, 10]
----------------
Anbefaler film nr. 2
Predikert rating =  4.021897226884044
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  4.01702229681131

Anbefaler film nr. 0
Predikert rating =  4.419303130009282
Abonnentens rating =  2.5
Anbefaler film nr. 1
Predikert rating =  3.5032602585722317
Abonnentens rating =  3.5
[5, 8, 8, 11]
----------------
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.381351888668001
Abonnentens rating =  3.5
Anbefaler film nr. 4
Predikert rating =  3.983856858846919
Abonnentens rating =  3.5
Anbefaler film nr. 1
Predikert rating =  3.389224652087476
Abonnentens rating =  3.0
[5, 5, 5, 5]
----------------
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  4.0
Anbefaler film nr. 4
Predikert rating =  4.066226764303528
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.049985247176846
Abonnentens rating =  3.0
Anbefaler film nr. 1
Predikert rating =  3.5032602585722317
Abonnentens rating =  3.0
[5, 0, 0, 11]
----------------
Anbefaler film nr. 3
Predikert rating =  4.9123525513585

Anbefaler film nr. 1
Predikert rating =  3.5032602585722317
Abonnentens rating =  4.0
[9, 6, 11, 11]
----------------
Anbefaler film nr. 2
Predikert rating =  4.5787564329705575
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.422424702606935
Abonnentens rating =  5.0
Anbefaler film nr. 0
Predikert rating =  4.419303130009282
Abonnentens rating =  2.0
Anbefaler film nr. 1
Predikert rating =  3.8040955631399314
Abonnentens rating =  1.0
[8, 8, 8, 6]
----------------
Anbefaler film nr. 2
Predikert rating =  4.942054340622931
Abonnentens rating =  1.5
Anbefaler film nr. 4
Predikert rating =  4.289301147369988
Abonnentens rating =  0.5
Anbefaler film nr. 1
Predikert rating =  3.4466310873915944
Abonnentens rating =  2.5
Anbefaler film nr. 3
Predikert rating =  1.4594173893706883
Abonnentens rating =  0.5
[5, 3, 7, 7]
----------------
Anbefaler film nr. 2
Predikert rating =  4.557173895550013
Abonnentens rating =  4.5
Anbefaler film nr. 3
Predikert rating =  4.9123525513

Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  3.8574482538160324
Abonnentens rating =  4.0
Anbefaler film nr. 4
Predikert rating =  3.5533645009190398
Abonnentens rating =  4.0
[2, 2, 2, 2]
----------------
Anbefaler film nr. 2
Predikert rating =  4.557173895550013
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  4.0
Anbefaler film nr. 3
Predikert rating =  3.872512586909608
Abonnentens rating =  4.0
Anbefaler film nr. 4
Predikert rating =  3.5533645009190398
Abonnentens rating =  4.5
[0, 2, 2, 2]
----------------
Anbefaler film nr. 2
Predikert rating =  4.16910519020484
Abonnentens rating =  3.0
Anbefaler film nr. 3
Predikert rating =  3.981877919811075
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.366471584783607
Abonnentens rating =  2.0
Anbefaler film nr. 0
Predikert rating =  4.419303130009282
Abonnentens rating =  4.0
[10

Anbefaler film nr. 1
Predikert rating =  3.685900004452162
Abonnentens rating =  3.0
[9, 9, 9, 9]
----------------
Anbefaler film nr. 2
Predikert rating =  4.557173895550013
Abonnentens rating =  4.5
Anbefaler film nr. 0
Predikert rating =  4.419303130009282
Abonnentens rating =  4.5
Anbefaler film nr. 4
Predikert rating =  4.803901515151512
Abonnentens rating =  5.0
Anbefaler film nr. 1
Predikert rating =  4.084659090909091
Abonnentens rating =  4.0
[0, 8, 4, 4]
----------------
Anbefaler film nr. 2
Predikert rating =  4.942578246738796
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.942567116334995
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.9664640933173043
Abonnentens rating =  5.0
Anbefaler film nr. 1
Predikert rating =  4.084659090909091
Abonnentens rating =  4.0
[9, 9, 9, 4]
----------------
Anbefaler film nr. 3
Predikert rating =  4.912352551358515
Abonnentens rating =  4.5
Anbefaler film nr. 4
Predikert rating =  4.066226764303528


Anbefaler film nr. 3
Predikert rating =  4.942567116334995
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  3.9664640933173043
Abonnentens rating =  1.0
Anbefaler film nr. 1
Predikert rating =  2.8958491521133887
Abonnentens rating =  1.0
[9, 9, 9, 8]
----------------
Anbefaler film nr. 2
Predikert rating =  4.16910519020484
Abonnentens rating =  5.0
Anbefaler film nr. 3
Predikert rating =  4.616750983698706
Abonnentens rating =  5.0
Anbefaler film nr. 4
Predikert rating =  4.208544125913436
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  3.5032602585722317
Abonnentens rating =  4.0
[10, 11, 11, 11]
----------------
Anbefaler film nr. 2
Predikert rating =  4.557173895550013
Abonnentens rating =  4.0
Anbefaler film nr. 1
Predikert rating =  4.017022296811317
Abonnentens rating =  3.0
Anbefaler film nr. 3
Predikert rating =  4.29317883103994
Abonnentens rating =  4.0
Anbefaler film nr. 0
Predikert rating =  4.049985247176846
Abonnentens rating =  5.0
