In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/netflix-prize-data/combined_data_3.txt
/kaggle/input/netflix-prize-data/movie_titles.csv
/kaggle/input/netflix-prize-data/combined_data_4.txt
/kaggle/input/netflix-prize-data/combined_data_1.txt
/kaggle/input/netflix-prize-data/README
/kaggle/input/netflix-prize-data/probe.txt
/kaggle/input/netflix-prize-data/combined_data_2.txt
/kaggle/input/netflix-prize-data/qualifying.txt


In [2]:
# Defining column names
column_names = ['MovieID', 'YearOfRelease', 'Title']

# Reading CSV file without headers, skipping lines with parsing errors
movie_titles = pd.read_csv('/kaggle/input/netflix-prize-data/movie_titles.csv', encoding='ISO-8859-1', header=None,names=column_names,on_bad_lines ='skip')

# Display the first few rows of the dataframe
movie_titles.head(10)

Unnamed: 0,MovieID,YearOfRelease,Title
0,1,2003.0,Dinosaur Planet
1,2,2004.0,Isle of Man TT 2004 Review
2,3,1997.0,Character
3,4,1994.0,Paula Abdul's Get Up & Dance
4,5,2004.0,The Rise and Fall of ECW
5,6,1997.0,Sick
6,7,1992.0,8 Man
7,8,2004.0,What the #$*! Do We Know!?
8,9,1991.0,Class of Nuke 'Em High 2
9,10,2001.0,Fighter


Txt to df -  combined data 1

In [3]:
import pandas as pd

def read_combined_data(file_path):
    # Initialize lists to store data
    movie_ids = []
    customer_ids = []
    ratings = []
    dates = []

    # Read the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Initialize variables
    current_movie_id = None

    # Process each line
    for line in lines:
        # Strip leading/trailing whitespaces
        line = line.strip()

        # Check for movie ID
        if line.endswith(':'):
            # Update current movie ID
            current_movie_id = line[:-1].strip()
        elif line:  # Check if the line is not empty
            # Extract customer data
            data = line.strip().split(',')
            if len(data) >= 3:
                customer_id = data[0].strip()
                rating_str = data[1].strip()
                date = data[2].strip()

                try:
                    rating = float(rating_str)
                except ValueError:
                    print(f"Skipping invalid rating (not a valid float): '{rating_str}'")
                    continue

                # Append data to lists
                movie_ids.append(current_movie_id)
                customer_ids.append(customer_id)
                ratings.append(rating)
                dates.append(date)
            else:
                print(f"Skipping line with insufficient data: '{line}'")

    # Create DataFrame
    combined_data = pd.DataFrame({
        'MovieID': movie_ids,
        'CustomerID': customer_ids,
        'Rating': ratings,
        'Date': dates
    })

    return combined_data

# Read combined_data_1.txt
file_path = "/kaggle/input/netflix-prize-data/combined_data_1.txt"
print(f"Reading file: {file_path}")
data_1 = read_combined_data(file_path)

# Display the first few rows of the DataFrame
print(data_1.head())

Reading file: /kaggle/input/netflix-prize-data/combined_data_1.txt
  MovieID CustomerID  Rating        Date
0       1    1488844     3.0  2005-09-06
1       1     822109     5.0  2005-05-13
2       1     885013     4.0  2005-10-19
3       1      30878     4.0  2005-12-26
4       1     823519     3.0  2004-05-03


In [4]:
data_1.head(5)

Unnamed: 0,MovieID,CustomerID,Rating,Date
0,1,1488844,3.0,2005-09-06
1,1,822109,5.0,2005-05-13
2,1,885013,4.0,2005-10-19
3,1,30878,4.0,2005-12-26
4,1,823519,3.0,2004-05-03


txt to df- combined data 2



In [5]:
data_1.drop(columns = "Date", inplace = True)

In [6]:
data_1.head()

Unnamed: 0,MovieID,CustomerID,Rating
0,1,1488844,3.0
1,1,822109,5.0
2,1,885013,4.0
3,1,30878,4.0
4,1,823519,3.0


***Changing the data type of rating***

In [7]:
data_1['Rating']=data_1['Rating'].astype("int8")

# **------------------------------------------------------------------------------------------------------------------------------------------------**

## **DATA CLEANING**

***Dropping year of release***

In [8]:
movie_titles.drop(columns = "YearOfRelease", inplace = True)

***Any user who has rated the same movie more than once***

### **-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------**

# *Collaborative Filtering*

### *User criteria- rated more than 100 movies*

In [9]:
rating_by_each_user = data_1.groupby('CustomerID').count()['MovieID']>1000

In [10]:
#all users who have rated more than 100 movies
active_users = rating_by_each_user[rating_by_each_user].index
active_users = active_users.tolist()
active_users

['1001129',
 '1028463',
 '1037245',
 '1061195',
 '1105029',
 '1110156',
 '1114324',
 '1227322',
 '1272379',
 '1298511',
 '1299887',
 '1300759',
 '1314869',
 '1403217',
 '1461435',
 '1470123',
 '1473980',
 '1519378',
 '1602153',
 '1612901',
 '16272',
 '1639792',
 '1663888',
 '1664010',
 '1673185',
 '1707198',
 '1710658',
 '1784150',
 '1792741',
 '1806515',
 '1819462',
 '184705',
 '1852040',
 '1876520',
 '1902838',
 '1903324',
 '1927580',
 '1932594',
 '1935793',
 '1977959',
 '2040859',
 '2056022',
 '2061495',
 '2062350',
 '2083367',
 '2118461',
 '2143500',
 '2147527',
 '2176465',
 '2237185',
 '2238060',
 '2256485',
 '2291306',
 '2315012',
 '2439493',
 '2457095',
 '2460347',
 '2537543',
 '2606799',
 '2625420',
 '303948',
 '305344',
 '319058',
 '322009',
 '3321',
 '387418',
 '447759',
 '491531',
 '504620',
 '507603',
 '525356',
 '530789',
 '57633',
 '636262',
 '682963',
 '716173',
 '727242',
 '752642',
 '786312',
 '789014',
 '794999',
 '798296',
 '818752',
 '862596',
 '952156']

*Total 70270 users have rated more than 100 movies*

In [11]:
filtered_data = data_1[data_1['CustomerID'].isin(active_users)]
filtered_data

Unnamed: 0,MovieID,CustomerID,Rating
25,1,1227322,4
31,1,786312,3
36,1,525356,2
60,1,1927580,4
84,1,1792741,2
...,...,...,...
24053690,4499,387418,2
24053697,4499,1114324,1
24053704,4499,794999,5
24053705,4499,1932594,1


### *Movie criteria- considering movies which have more than 100 ratings*

In [12]:
most_rated_movies = filtered_data.groupby('MovieID').count()['Rating']>50

In [13]:
y = most_rated_movies[most_rated_movies].index
y = y.tolist()

In [14]:
final_data_1 = filtered_data[filtered_data['MovieID'].isin(y)]
final_data_1

Unnamed: 0,MovieID,CustomerID,Rating
31883,18,303948,4
32005,18,1227322,2
32145,18,786312,3
32372,18,1784150,3
32689,18,1927580,5
...,...,...,...
24051796,4496,2238060,5
24051817,4496,1707198,4
24051820,4496,1852040,2
24051859,4496,1612901,4


## **2) User Based Collaborative Filtering:**

### Makes recommendations based on similar user interaction. 


In [15]:
user_movie_interact1 = final_data_1.pivot_table(index = 'CustomerID', columns = "MovieID", values = "Rating")
user_movie_interact1 = user_movie_interact1.fillna(0)

In [16]:
user_movie_interact1

MovieID,1011,1012,1020,1022,1026,1027,1035,104,1043,1046,...,940,953,962,963,984,985,990,992,993,994
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001129,0.0,0.0,3.0,0.0,2.0,0.0,0.0,0.0,3.0,3.0,...,2.0,2.0,0.0,3.0,2.0,3.0,0.0,0.0,3.0,2.0
1028463,1.0,5.0,4.0,0.0,4.0,4.0,5.0,5.0,5.0,4.0,...,0.0,0.0,4.0,2.0,0.0,4.0,0.0,2.0,2.0,4.0
1037245,0.0,3.0,5.0,3.0,3.0,0.0,4.0,0.0,4.0,3.0,...,4.0,3.0,3.0,5.0,3.0,4.0,3.0,0.0,0.0,4.0
1061195,0.0,0.0,5.0,0.0,5.0,1.0,1.0,5.0,5.0,1.0,...,5.0,3.0,4.0,4.0,1.0,5.0,4.0,5.0,5.0,5.0
1105029,0.0,3.0,4.0,5.0,0.0,2.0,0.0,0.0,3.0,2.0,...,3.0,4.0,0.0,3.0,0.0,4.0,0.0,0.0,0.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794999,5.0,5.0,5.0,5.0,0.0,0.0,5.0,5.0,0.0,5.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
798296,2.0,3.0,0.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0,...,3.0,0.0,0.0,4.0,0.0,1.0,5.0,0.0,0.0,2.0
818752,3.0,0.0,0.0,0.0,3.0,3.0,4.0,3.0,4.0,0.0,...,0.0,5.0,0.0,5.0,0.0,0.0,5.0,2.0,0.0,3.0
862596,0.0,3.0,0.0,0.0,2.0,3.0,0.0,0.0,3.0,4.0,...,0.0,0.0,3.0,0.0,0.0,4.0,0.0,0.0,2.0,0.0


In [17]:
user_movie_interact1 = user_movie_interact1.astype('int8')

In [18]:
user_movie_interact1

MovieID,1011,1012,1020,1022,1026,1027,1035,104,1043,1046,...,940,953,962,963,984,985,990,992,993,994
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001129,0,0,3,0,2,0,0,0,3,3,...,2,2,0,3,2,3,0,0,3,2
1028463,1,5,4,0,4,4,5,5,5,4,...,0,0,4,2,0,4,0,2,2,4
1037245,0,3,5,3,3,0,4,0,4,3,...,4,3,3,5,3,4,3,0,0,4
1061195,0,0,5,0,5,1,1,5,5,1,...,5,3,4,4,1,5,4,5,5,5
1105029,0,3,4,5,0,2,0,0,3,2,...,3,4,0,3,0,4,0,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794999,5,5,5,5,0,0,5,5,0,5,...,5,5,5,5,5,5,5,5,5,5
798296,2,3,0,2,0,1,0,0,2,0,...,3,0,0,4,0,1,5,0,0,2
818752,3,0,0,0,3,3,4,3,4,0,...,0,5,0,5,0,0,5,2,0,3
862596,0,3,0,0,2,3,0,0,3,4,...,0,0,3,0,0,4,0,0,2,0


*There are 70270 customers with 3455 movies*

In [19]:
#normalizing the data
user_movie_norm = user_movie_interact1.subtract(user_movie_interact1.mean(axis=1), axis = 'rows')
user_movie_norm

MovieID,1011,1012,1020,1022,1026,1027,1035,104,1043,1046,...,940,953,962,963,984,985,990,992,993,994
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001129,-1.242637,-1.242637,1.757363,-1.242637,0.757363,-1.242637,-1.242637,-1.242637,1.757363,1.757363,...,0.757363,0.757363,-1.242637,1.757363,0.757363,1.757363,-1.242637,-1.242637,1.757363,0.757363
1028463,-2.056101,1.943899,0.943899,-3.056101,0.943899,0.943899,1.943899,1.943899,1.943899,0.943899,...,-3.056101,-3.056101,0.943899,-1.056101,-3.056101,0.943899,-3.056101,-1.056101,-1.056101,0.943899
1037245,-2.880785,0.119215,2.119215,0.119215,0.119215,-2.880785,1.119215,-2.880785,1.119215,0.119215,...,1.119215,0.119215,0.119215,2.119215,0.119215,1.119215,0.119215,-2.880785,-2.880785,1.119215
1061195,-3.082749,-3.082749,1.917251,-3.082749,1.917251,-2.082749,-2.082749,1.917251,1.917251,-2.082749,...,1.917251,-0.082749,0.917251,0.917251,-2.082749,1.917251,0.917251,1.917251,1.917251,1.917251
1105029,-2.444600,0.555400,1.555400,2.555400,-2.444600,-0.444600,-2.444600,-2.444600,0.555400,-0.444600,...,0.555400,1.555400,-2.444600,0.555400,-2.444600,1.555400,-2.444600,-2.444600,-2.444600,0.555400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794999,1.542777,1.542777,1.542777,1.542777,-3.457223,-3.457223,1.542777,1.542777,-3.457223,1.542777,...,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777
798296,0.437588,1.437588,-1.562412,0.437588,-1.562412,-0.562412,-1.562412,-1.562412,0.437588,-1.562412,...,1.437588,-1.562412,-1.562412,2.437588,-1.562412,-0.562412,3.437588,-1.562412,-1.562412,0.437588
818752,0.849930,-2.150070,-2.150070,-2.150070,0.849930,0.849930,1.849930,0.849930,1.849930,-2.150070,...,-2.150070,2.849930,-2.150070,2.849930,-2.150070,-2.150070,2.849930,-0.150070,-2.150070,0.849930
862596,-2.021038,0.978962,-2.021038,-2.021038,-0.021038,0.978962,-2.021038,-2.021038,0.978962,1.978962,...,-2.021038,-2.021038,0.978962,-2.021038,-2.021038,1.978962,-2.021038,-2.021038,-0.021038,-2.021038


### ***Identifying similar users***

In [20]:
from sklearn.metrics.pairwise import cosine_similarity

In [21]:
user_similarity_score = cosine_similarity(user_movie_norm)
user_similarity_score

array([[ 1.00000000e+00, -1.44420342e-02,  2.18482807e-02, ...,
        -4.25406832e-03,  7.32475712e-02, -8.97186950e-04],
       [-1.44420342e-02,  1.00000000e+00, -9.84706978e-02, ...,
        -2.35587294e-01,  3.14140031e-01, -1.46453334e-01],
       [ 2.18482807e-02, -9.84706978e-02,  1.00000000e+00, ...,
         2.23412264e-01, -5.27539424e-03,  2.16077671e-01],
       ...,
       [-4.25406832e-03, -2.35587294e-01,  2.23412264e-01, ...,
         1.00000000e+00, -1.25770362e-01,  4.35205218e-01],
       [ 7.32475712e-02,  3.14140031e-01, -5.27539424e-03, ...,
        -1.25770362e-01,  1.00000000e+00, -1.09079795e-02],
       [-8.97186950e-04, -1.46453334e-01,  2.16077671e-01, ...,
         4.35205218e-01, -1.09079795e-02,  1.00000000e+00]])

In [22]:
user_similarity_score.shape

(85, 85)

In [23]:
similar_user_df = pd.DataFrame(user_similarity_score, index = user_movie_interact1.index, columns = user_movie_interact1.index)
similar_user_df

CustomerID,1001129,1028463,1037245,1061195,1105029,1110156,1114324,1227322,1272379,1298511,...,716173,727242,752642,786312,789014,794999,798296,818752,862596,952156
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001129,1.000000,-0.014442,0.021848,-0.045491,0.036414,0.071810,0.025521,0.055874,0.071601,0.054879,...,-0.015191,0.048977,-0.009787,0.072865,-0.045820,-0.008886,-0.045169,-0.004254,0.073248,-0.000897
1028463,-0.014442,1.000000,-0.098471,0.267413,-0.126730,0.097309,0.318249,0.082789,0.202698,-0.014912,...,0.008028,0.143000,-0.112347,-0.194341,0.287108,0.021580,-0.201996,-0.235587,0.314140,-0.146453
1037245,0.021848,-0.098471,1.000000,-0.007797,0.296349,0.094893,0.012138,0.077897,0.008629,0.144206,...,0.020189,0.090547,0.215864,0.252054,0.029030,0.034408,0.166441,0.223412,-0.005275,0.216078
1061195,-0.045491,0.267413,-0.007797,1.000000,0.063125,0.200888,0.083469,0.173833,0.097103,0.086230,...,0.081440,0.102143,0.074509,0.005913,0.091862,0.020008,0.087074,-0.107672,0.072588,0.009934
1105029,0.036414,-0.126730,0.296349,0.063125,1.000000,0.191979,0.032625,0.150914,0.125865,0.196101,...,0.104020,0.140643,0.265270,0.412782,0.046577,0.110496,0.254272,0.304018,-0.026180,0.367814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794999,-0.008886,0.021580,0.034408,0.020008,0.110496,0.063883,-0.011422,-0.023424,0.145738,0.138987,...,-0.042955,0.146934,-0.031504,0.079079,0.154069,1.000000,0.098508,0.052390,0.063329,0.184313
798296,-0.045169,-0.201996,0.166441,0.087074,0.254272,0.087649,-0.140937,-0.037125,-0.057479,0.185634,...,-0.002492,0.036172,0.221480,0.283925,-0.062833,0.098508,1.000000,0.307332,-0.090535,0.353864
818752,-0.004254,-0.235587,0.223412,-0.107672,0.304018,-0.023807,-0.219564,-0.057891,-0.059351,0.119740,...,0.067000,0.050589,0.279269,0.269482,0.046649,0.052390,0.307332,1.000000,-0.125770,0.435205
862596,0.073248,0.314140,-0.005275,0.072588,-0.026180,0.049749,0.253154,0.092726,0.204538,-0.016968,...,0.025901,0.230091,-0.093520,-0.116370,0.238697,0.063329,-0.090535,-0.125770,1.000000,-0.010908


# ------------------------------------------------------------------------------------------------------------------------------------------

### ***Example: Finding 5 users similar to '952156'***

In [24]:
#0.3 is the threshold i.e the cosine similarity should be at least greater than 0.3 between two users.

similar_user_df[similar_user_df['952156']>0.3]['952156'].sort_values(ascending=False)[1:6]


CustomerID
818752     0.435205
57633      0.389642
1105029    0.367814
798296     0.353864
1927580    0.340092
Name: 952156, dtype: float64

1. Remove the movies that have been watched by the target user (user ID 1 in this example).
2. Keep only the movies that similar users have watched.

To remove the movies watched by the target user, we keep only the row for `CustomerID = 952156` in the user-item matrix and remove the items with missing values.

In [25]:
picked_userid_watched = user_movie_norm[user_movie_norm.index == '952156'].dropna(axis=1, how='all')
picked_userid_watched

MovieID,1011,1012,1020,1022,1026,1027,1035,104,1043,1046,...,940,953,962,963,984,985,990,992,993,994
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
952156,1.009818,-1.990182,-1.990182,-0.990182,-1.990182,0.009818,-1.990182,2.009818,0.009818,0.009818,...,0.009818,-1.990182,-0.990182,3.009818,2.009818,1.009818,-1.990182,-1.990182,-1.990182,-1.990182


In [26]:
similar_user_movies = user_movie_norm[user_movie_norm.index.isin(similar_user_df.index)].dropna(axis=1, how='all')
similar_user_movies

MovieID,1011,1012,1020,1022,1026,1027,1035,104,1043,1046,...,940,953,962,963,984,985,990,992,993,994
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001129,-1.242637,-1.242637,1.757363,-1.242637,0.757363,-1.242637,-1.242637,-1.242637,1.757363,1.757363,...,0.757363,0.757363,-1.242637,1.757363,0.757363,1.757363,-1.242637,-1.242637,1.757363,0.757363
1028463,-2.056101,1.943899,0.943899,-3.056101,0.943899,0.943899,1.943899,1.943899,1.943899,0.943899,...,-3.056101,-3.056101,0.943899,-1.056101,-3.056101,0.943899,-3.056101,-1.056101,-1.056101,0.943899
1037245,-2.880785,0.119215,2.119215,0.119215,0.119215,-2.880785,1.119215,-2.880785,1.119215,0.119215,...,1.119215,0.119215,0.119215,2.119215,0.119215,1.119215,0.119215,-2.880785,-2.880785,1.119215
1061195,-3.082749,-3.082749,1.917251,-3.082749,1.917251,-2.082749,-2.082749,1.917251,1.917251,-2.082749,...,1.917251,-0.082749,0.917251,0.917251,-2.082749,1.917251,0.917251,1.917251,1.917251,1.917251
1105029,-2.444600,0.555400,1.555400,2.555400,-2.444600,-0.444600,-2.444600,-2.444600,0.555400,-0.444600,...,0.555400,1.555400,-2.444600,0.555400,-2.444600,1.555400,-2.444600,-2.444600,-2.444600,0.555400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794999,1.542777,1.542777,1.542777,1.542777,-3.457223,-3.457223,1.542777,1.542777,-3.457223,1.542777,...,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777,1.542777
798296,0.437588,1.437588,-1.562412,0.437588,-1.562412,-0.562412,-1.562412,-1.562412,0.437588,-1.562412,...,1.437588,-1.562412,-1.562412,2.437588,-1.562412,-0.562412,3.437588,-1.562412,-1.562412,0.437588
818752,0.849930,-2.150070,-2.150070,-2.150070,0.849930,0.849930,1.849930,0.849930,1.849930,-2.150070,...,-2.150070,2.849930,-2.150070,2.849930,-2.150070,-2.150070,2.849930,-0.150070,-2.150070,0.849930
862596,-2.021038,0.978962,-2.021038,-2.021038,-0.021038,0.978962,-2.021038,-2.021038,0.978962,1.978962,...,-2.021038,-2.021038,0.978962,-2.021038,-2.021038,1.978962,-2.021038,-2.021038,-0.021038,-2.021038


In [27]:
# Remove the watched movie from the movie list
similar_user_movies.drop(picked_userid_watched.columns,axis=1, inplace=True, errors='ignore')

# Take a look at the data
similar_user_movies

MovieID
CustomerID
1001129
1028463
1037245
1061195
1105029
...
794999
798296
818752
862596
