In [1]:
import os
import pandas as pd
import numpy as np
from math import sqrt
from tqdm import tqdm_notebook as tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [4]:
# 각자 작업 환경에 맞는 경로를 지정해주세요. Google Colab과 Jupyter환경에서 경로가 다를 수 있습니다.
path = '../data/movielens/'
ratings_df = pd.read_csv(os.path.join(path, 'ratings.csv'), encoding='utf-8')

print(ratings_df.shape)
print(ratings_df.head())

(100836, 4)
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [5]:
train_df, test_df = train_test_split(ratings_df, test_size=0.2, random_state=1234)

print(train_df.shape)
print(test_df.shape)

(80668, 4)
(20168, 4)


# sparse Matrix 만들기

sparse matrix = (user, movie)

### 1. 연산량이 많아서 시간이 오래걸리는 방법


In [7]:
user_ids = sorted(list(set(train_df['userId'].values)))
movie_ids = sorted(list(set(train_df['movieId'].values)))

print(f'유저 수: {len(user_ids)}, 영화 수: {len(movie_ids)}')
print(user_ids)
print(movie_ids)

유저 수: 610, 영화 수: 8938
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 

In [8]:
# Empty sparse matrix를 dataframe 형태로 준비
sparse_matrix = pd.DataFrame(index=movie_ids, columns=user_ids)
sparse_matrix

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193573,,,,,,,,,,,...,,,,,,,,,,
193579,,,,,,,,,,,...,,,,,,,,,,
193581,,,,,,,,,,,...,,,,,,,,,,
193587,,,,,,,,,,,...,,,,,,,,,,


In [9]:
# train_df의 movieId를 기준으로 groupBy
grouped = train_df.groupby('movieId')
grouped['userId'].count()

movieId
1         169
2          82
3          44
4           4
5          38
         ... 
193573      1
193579      1
193581      1
193587      1
193609      1
Name: userId, Length: 8938, dtype: int64

In [18]:
group

Unnamed: 0,userId,movieId,rating,timestamp
80358,508,1270,4.5,1268298253
32643,222,1270,4.5,1391350098
75556,477,1270,4.5,1200939690
18883,122,1270,5.0,1461561374
18302,116,1270,4.5,1337199782
...,...,...,...,...
24714,171,1270,5.0,866905762
83217,527,1270,4.0,1033173871
2501,19,1270,4.0,965703583
73463,474,1270,4.5,1081177274


In [15]:
group.transpose()

Unnamed: 0,80358,32643,75556,18883,18302,3266,62766,49089,12983,82599,...,89422,45695,45849,81635,38559,24714,83217,2501,73463,90079
userId,508.0,222.0,477.0,122.0,116.0,21.0,414.0,318.0,83.0,524.0,...,580.0,303.0,304.0,517.0,265.0,171.0,527.0,19.0,474.0,586.0
movieId,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,...,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0,1270.0
rating,4.5,4.5,4.5,5.0,4.5,5.0,5.0,2.5,3.5,5.0,...,3.5,3.5,5.0,5.0,5.0,5.0,4.0,4.0,4.5,4.5
timestamp,1268298000.0,1391350000.0,1200940000.0,1461561000.0,1337200000.0,1407618000.0,961514698.0,1270753000.0,1333842000.0,851609356.0,...,1167793000.0,1053303000.0,898641489.0,1487957000.0,965314811.0,866905762.0,1033174000.0,965703583.0,1081177000.0,1529900000.0


In [10]:
idx = 0
for movieId, group in grouped:
    if idx < 1:
        print(group)
        group_copied = group.transpose()
        group_copied.loc['userId'] = pd.to_numeric(group_copied.loc['userId'])
        print(group_copied)
        group_copied.columns = group_copied.loc['userId']
        print(group_copied)
        group_copied = group_copied.drop(['userId', 'movieId']).rename(index={'rating': movieId})
        print(group_copied)
        print(group_copied.columns)
        print(group_copied.index)
    else:
        continue
        idx += 1

       userId  movieId  rating   timestamp
6876       46        1     5.0   834787906
23663     161        1     4.0  1176751765
70844     453        1     5.0  1005966797
97043     604        1     3.0   832079851
24321     169        1     4.5  1059427918
...       ...      ...     ...         ...
81082     514        1     4.0  1533872400
68337     443        1     4.0  1501722482
71257     456        1     5.0   856883308
76227     480        1     3.0  1179178004
19179     124        1     4.0  1336584336

[169 rows x 4 columns]
                 6876          23663         70844        97043         24321  \
userId            46.0  1.610000e+02  4.530000e+02        604.0  1.690000e+02   
movieId            1.0  1.000000e+00  1.000000e+00          1.0  1.000000e+00   
rating             5.0  4.000000e+00  5.000000e+00          3.0  4.500000e+00   
timestamp  834787906.0  1.176752e+09  1.005967e+09  832079851.0  1.059428e+09   

                  58096         84780         37470   

                  32054        52380        70845        14818         14125  \
userId     2.200000e+02        340.0        453.0         94.0  9.100000e+01   
movieId    1.000000e+01         10.0         10.0         10.0  1.000000e+01   
rating     4.000000e+00          4.0          2.0          3.0  3.500000e+00   
timestamp  1.230056e+09  848666415.0  972621985.0  843406732.0  1.112713e+09   

                 23229         41462        8037          51195        89905  \
userId           160.0  2.820000e+02         57.0  3.310000e+02        584.0   
movieId           10.0  1.000000e+01         10.0  1.000000e+01         10.0   
rating             1.0  3.500000e+00          3.0  3.000000e+00          5.0   
timestamp  971196422.0  1.378497e+09  965798286.0  1.537158e+09  834987172.0   

           ...         27309         23866         39233        5877   \
userId     ...  1.860000e+02  1.660000e+02  2.740000e+02         42.0   
movieId    ...  1.000000e+01  1.000000e+01  1.000000

userId            64.0         555.0         438.0         480.0        470.0  \
19         3.500000e+00          3.0  3.000000e+00  1.500000e+00          3.0   
timestamp  1.161522e+09  980123949.0  1.105667e+09  1.179178e+09  849075545.0   

userId           136.0        284.0        42.0          249.0        321.0  \
19                 3.0          4.0          2.0  3.500000e+00          3.0   
timestamp  832449345.0  832786975.0  996256258.0  1.354107e+09  843212522.0   

userId     ...         381.0         509.0         156.0        117.0  \
19         ...  2.500000e+00  4.000000e+00  3.000000e+00          2.0   
timestamp  ...  1.165976e+09  1.436027e+09  1.106855e+09  844162892.0   

userId            200.0         377.0        56.0          222.0  \
19         3.500000e+00  2.000000e+00          5.0  3.500000e+00   
timestamp  1.229877e+09  1.340343e+09  835799219.0  1.391352e+09   

userId            357.0         534.0  
19         2.000000e+00  4.000000e+00  
timestamp  1.

                  50953         32260         27535        38902  \
userId     3.300000e+02  2.210000e+02  1.870000e+02        268.0   
movieId    2.900000e+01  2.900000e+01  2.900000e+01         29.0   
rating     3.000000e+00  4.500000e+00  4.000000e+00          5.0   
timestamp  1.285904e+09  1.119985e+09  1.161850e+09  940182766.0   

                  92642        85296         89035         19921  \
userId     5.990000e+02        555.0  5.760000e+02  1.320000e+02   
movieId    2.900000e+01         29.0  2.900000e+01  2.900000e+01   
rating     3.500000e+00          4.0  3.500000e+00  2.000000e+00   
timestamp  1.498501e+09  978841345.0  1.358151e+09  1.157924e+09   

                  48988         59236  ...         37974         14876  \
userId     3.180000e+02  3.870000e+02  ...  2.600000e+02  9.500000e+01   
movieId    2.900000e+01  2.900000e+01  ...  2.900000e+01  2.900000e+01   
rating     3.500000e+00  3.500000e+00  ...  4.000000e+00  5.000000e+00   
timestamp  1.413628e+0

userId           121.0        263.0        592.0        566.0         167.0  \
39                 4.0          4.0          4.0          4.0  2.000000e+00   
timestamp  847656062.0  941590574.0  837350111.0  849005720.0  1.154723e+09   

userId            480.0        266.0        597.0        174.0         95.0   \
39         2.500000e+00          2.0          4.0          4.0  4.000000e+00   
timestamp  1.179178e+09  945669397.0  941558116.0  848487101.0  1.043339e+09   

userId     ...        284.0        372.0         357.0        555.0  \
39         ...          3.0          3.0  4.500000e+00          4.0   
timestamp  ...  832786975.0  874417086.0  1.348612e+09  978746326.0   

userId            177.0        58.0          489.0        216.0         230.0  \
39         4.000000e+00          5.0  3.500000e+00          3.0  3.000000e+00   
timestamp  1.435891e+09  847718745.0  1.332773e+09  975212451.0  1.196305e+09   

userId           347.0  
39                 4.0  
timestamp  84

userId            219.0        134.0         334.0         354.0        512.0  \
47         3.500000e+00          4.0  2.000000e+00  4.000000e+00          5.0   
timestamp  1.194686e+09  832841168.0  1.234630e+09  1.200871e+09  841449359.0   

userId            223.0        588.0         462.0        297.0         274.0  \
47         3.000000e+00          3.0  4.000000e+00          4.0  4.000000e+00   
timestamp  1.226210e+09  839316278.0  1.154038e+09  900871748.0  1.171173e+09   

userId     ...         187.0         317.0        470.0        19.0   \
47         ...  4.000000e+00  5.000000e+00          3.0          3.0   
timestamp  ...  1.161850e+09  1.430362e+09  849075299.0  965710720.0   

userId            573.0         198.0         370.0         144.0  \
47         5.000000e+00  5.000000e+00  3.500000e+00  4.500000e+00   
timestamp  1.186590e+09  1.034136e+09  1.159163e+09  1.137324e+09   

userId           140.0         204.0  
47                 4.0  5.000000e+00  
timestamp

                 67475        84608        18784        593           6920   \
userId           437.0        544.0        121.0          6.0  4.700000e+01   
movieId           62.0         62.0         62.0         62.0  6.200000e+01   
rating             3.0          5.0          5.0          4.0  3.000000e+00   
timestamp  859720978.0  850688537.0  847656203.0  845553660.0  1.496205e+09   

                 29389        51912        79849         46749         15306  \
userId           202.0        337.0        498.0  3.070000e+02  1.000000e+02   
movieId           62.0         62.0         62.0  6.200000e+01  6.200000e+01   
rating             3.0          3.0          5.0  3.000000e+00  4.000000e+00   
timestamp  975013683.0  860255716.0  839197444.0  1.186161e+09  1.100186e+09   

           ...         73124        5278          50959        62023  \
userId     ...  4.740000e+02         35.0  3.300000e+02        411.0   
movieId    ...  6.200000e+01         62.0  6.200000e+01    

Float64Index([603.0], dtype='float64', name='userId')
Index([77, 'timestamp'], dtype='object')
       userId  movieId  rating  timestamp
4945       32       78     4.0  856736477
67476     437       78     3.0  859721693
62333     414       78     3.0  961518581
                 4945         67476        62333
userId            32.0        437.0        414.0
movieId           78.0         78.0         78.0
rating             4.0          3.0          3.0
timestamp  856736477.0  859721693.0  961518581.0
userId           32.0         437.0        414.0
userId            32.0        437.0        414.0
movieId           78.0         78.0         78.0
rating             4.0          3.0          3.0
timestamp  856736477.0  859721693.0  961518581.0
userId           32.0         437.0        414.0
78                 4.0          3.0          3.0
timestamp  856736477.0  859721693.0  961518581.0
Float64Index([32.0, 437.0, 414.0], dtype='float64', name='userId')
Index([78, 'timestamp'], dtype='o

userId           350.0        385.0        288.0        94.0         170.0  \
userId           350.0        385.0        288.0         94.0        170.0   
movieId           95.0         95.0         95.0         95.0         95.0   
rating             3.0          3.0          1.0          3.0          3.0   
timestamp  864940931.0  834691768.0  978466299.0  843406942.0  840473046.0   

userId            434.0        43.0         284.0        389.0        524.0  \
userId     4.340000e+02         43.0        284.0        389.0        524.0   
movieId    9.500000e+01         95.0         95.0         95.0         95.0   
rating     2.000000e+00          4.0          5.0          4.0          4.0   
timestamp  1.270607e+09  848993983.0  832699673.0  857934174.0  851608466.0   

userId     ...        151.0         448.0         28.0         559.0  \
userId     ...        151.0  4.480000e+02  2.800000e+01        559.0   
movieId    ...         95.0  9.500000e+01  9.500000e+01         95.0 

userId            357.0         380.0         141.0         608.0  \
107        3.000000e+00  5.000000e+00  3.500000e+00  3.000000e+00   
timestamp  1.348627e+09  1.493668e+09  1.513132e+09  1.117505e+09   

userId           57.0          448.0         508.0         20.0   \
107                3.0  4.000000e+00  2.000000e+00  3.500000e+00   
timestamp  965797786.0  1.019125e+09  1.268297e+09  1.054038e+09   

userId            92.0         43.0   ...        151.0        337.0  \
107        5.000000e+00          5.0  ...          3.0          3.0   
timestamp  1.294941e+09  848994703.0  ...  855947372.0  860255992.0   

userId           294.0         22.0          477.0         103.0        314.0  \
107                3.0  4.500000e+00  3.000000e+00  3.500000e+00          3.0   
timestamp  966634119.0  1.268331e+09  1.201023e+09  1.431968e+09  834428630.0   

userId           414.0        45.0          525.0  
107                3.0          4.0  3.500000e+00  
timestamp  961438502.0  9

userId           290.0
129                2.0
timestamp  974937943.0
Float64Index([290.0], dtype='float64', name='userId')
Index([129, 'timestamp'], dtype='object')
       userId  movieId  rating  timestamp
25996     181      132     3.0  845470661
38193     262      132     3.0  840306317
84338     541      132     4.0  835643931
20628     136      132     3.0  832449989
38679     266      132     2.0  944891365
                 25996        38193        84338        20628        38679
userId           181.0        262.0        541.0        136.0        266.0
movieId          132.0        132.0        132.0        132.0        132.0
rating             3.0          3.0          4.0          3.0          2.0
timestamp  845470661.0  840306317.0  835643931.0  832449989.0  944891365.0
userId           181.0        262.0        541.0        136.0        266.0
userId           181.0        262.0        541.0        136.0        266.0
movieId          132.0        132.0        132.0        13

userId            391.0        94.0          328.0        102.0         425.0  \
userId     3.910000e+02         94.0  3.280000e+02        102.0  4.250000e+02   
movieId    1.500000e+02        150.0  1.500000e+02        150.0  1.500000e+02   
rating     4.000000e+00          4.0  3.000000e+00          3.0  3.000000e+00   
timestamp  1.032390e+09  843406616.0  1.494211e+09  835875691.0  1.085477e+09   

userId           162.0        290.0         483.0        109.0         226.0  \
userId           162.0        290.0  4.830000e+02        109.0  2.260000e+02   
movieId          150.0        150.0  1.500000e+02        150.0  1.500000e+02   
rating             4.0          4.0  2.000000e+00          3.0  4.500000e+00   
timestamp  836488864.0  974942273.0  1.178293e+09  841106309.0  1.130138e+09   

userId     ...         590.0         68.0          156.0        592.0  \
userId     ...  5.900000e+02  6.800000e+01  1.560000e+02        592.0   
movieId    ...  1.500000e+02  1.500000e+02  1.5

                 49929        20635         26126         43500         28585  \
userId           321.0        136.0  1.820000e+02  2.920000e+02  1.990000e+02   
movieId          161.0        161.0  1.610000e+02  1.610000e+02  1.610000e+02   
rating             3.0          4.0  3.000000e+00  4.000000e+00  3.500000e+00   
timestamp  843212458.0  832449222.0  1.054783e+09  1.323632e+09  1.060289e+09   

                 5049         12710        82503        56849         14151  \
userId            33.0         81.0        524.0        379.0  9.100000e+01   
movieId          161.0        161.0        161.0        161.0  1.610000e+02   
rating             3.0          3.0          5.0          4.0  3.500000e+00   
timestamp  939716218.0  845299756.0  851608781.0  847397381.0  1.112711e+09   

           ...        35791        28097        86008        21102  \
userId     ...        242.0        195.0        559.0        140.0   
movieId    ...        161.0        161.0        161.0     

userId            230.0        297.0        19.0         604.0        602.0  \
170        3.000000e+00          1.0          3.0          3.0          3.0   
timestamp  1.196304e+09  900875950.0  965711598.0  832080961.0  840876475.0   

userId            182.0        414.0         21.0         11.0          313.0  \
170        2.500000e+00          3.0  2.000000e+00          4.0  3.000000e+00   
timestamp  1.054782e+09  961516154.0  1.376823e+09  902154621.0  1.030557e+09   

userId     ...         119.0        136.0         226.0         483.0  \
170        ...  5.000000e+00          4.0  5.000000e+00  4.000000e+00   
timestamp  ...  1.435944e+09  832449755.0  1.095663e+09  1.181496e+09   

userId           217.0         274.0        294.0         416.0         82.0   \
170                3.0  2.500000e+00          2.0  2.500000e+00  3.000000e+00   
timestamp  955943105.0  1.171827e+09  966597739.0  1.187495e+09  1.084465e+09   

userId           411.0  
170                3.0  
time

                 96145        97079
userId           603.0        604.0
movieId          184.0        184.0
rating             4.0          3.0
timestamp  963179880.0  832080636.0
userId           603.0        604.0
userId           603.0        604.0
movieId          184.0        184.0
rating             4.0          3.0
timestamp  963179880.0  832080636.0
userId           603.0        604.0
184                4.0          3.0
timestamp  963179880.0  832080636.0
Float64Index([603.0, 604.0], dtype='float64', name='userId')
Index([184, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
17924     112      185     2.0  1513989947
20645     136      185     3.0   832449254
72431     468      185     3.0   831400500
73143     474      185     3.0  1069686436
8534       58      185     3.0   847718579
...       ...      ...     ...         ...
90440     588      185     1.0   839316215
62037     411      185     4.0   835532100
80846     510      185     0.5  1141159094

                  9456         23253        96007        43980        91325  \
userId     6.400000e+01        160.0        602.0        294.0        592.0   
movieId    1.980000e+02        198.0        198.0        198.0        198.0   
rating     3.000000e+00          5.0          3.0          2.0          5.0   
timestamp  1.161566e+09  971114178.0  840876055.0  966597610.0  837350578.0   

                  48025        55423        8062         29983         92706  \
userId     3.120000e+02        368.0         57.0        208.0  5.990000e+02   
movieId    1.980000e+02        198.0        198.0        198.0  1.980000e+02   
rating     3.000000e+00          2.0          3.0          2.0  3.000000e+00   
timestamp  1.043177e+09  975828193.0  965798823.0  940639375.0  1.498532e+09   

                  39276         77252        38682        78265        67495  \
userId     2.740000e+02  4.830000e+02        266.0        486.0        437.0   
movieId    1.980000e+02  1.980000e+02      

timestamp  1.446610e+09  940180585.0  829760897.0  1.377069e+09
userId            105.0        268.0        191.0         543.0
213        4.500000e+00          5.0          5.0  5.000000e-01
timestamp  1.446610e+09  940180585.0  829760897.0  1.377069e+09
Float64Index([105.0, 268.0, 191.0, 543.0], dtype='float64', name='userId')
Index([213, 'timestamp'], dtype='object')
       userId  movieId  rating  timestamp
13114      84      214     4.0  858771796
96154     603      214     4.0  963179452
                 13114        96154
userId            84.0        603.0
movieId          214.0        214.0
rating             4.0          4.0
timestamp  858771796.0  963179452.0
userId           84.0         603.0
userId            84.0        603.0
movieId          214.0        214.0
rating             4.0          4.0
timestamp  858771796.0  963179452.0
userId           84.0         603.0
214                4.0          4.0
timestamp  858771796.0  963179452.0
Float64Index([84.0, 603.0], dtype

userId           429.0        130.0        84.0         321.0        136.0  \
userId           429.0        130.0         84.0        321.0        136.0   
movieId          225.0        225.0        225.0        225.0        225.0   
rating             4.0          5.0          3.0          4.0          4.0   
timestamp  828124615.0  832589660.0  858772370.0  843212509.0  832449274.0   

userId           574.0        262.0        42.0         411.0        46.0   \
userId           574.0        262.0         42.0        411.0         46.0   
movieId          225.0        225.0        225.0        225.0        225.0   
rating             4.0          3.0          4.0          4.0          3.0   
timestamp  834634527.0  840305693.0  996219167.0  835532191.0  834788016.0   

userId     ...        468.0        56.0         121.0        229.0  \
userId     ...        468.0         56.0        121.0        229.0   
movieId    ...        225.0        225.0        225.0        225.0   
rating  

userId            275.0        444.0         513.0         100.0  \
userId     2.750000e+02        444.0  5.130000e+02  1.000000e+02   
movieId    2.350000e+02        235.0  2.350000e+02  2.350000e+02   
rating     5.000000e+00          5.0  4.500000e+00  1.000000e+00   
timestamp  1.049077e+09  832672088.0  1.159980e+09  1.100184e+09   

userId            307.0        202.0        589.0        602.0         305.0  \
userId     3.070000e+02        202.0        589.0        602.0  3.050000e+02   
movieId    2.350000e+02        235.0        235.0        235.0  2.350000e+02   
rating     3.000000e+00          4.0          4.0          4.0  4.500000e+00   
timestamp  1.186162e+09  974912792.0  856043305.0  840875901.0  1.460564e+09   

userId            474.0  ...         552.0        195.0        38.0   \
userId     4.740000e+02  ...  5.520000e+02        195.0         38.0   
movieId    2.350000e+02  ...  2.350000e+02        235.0        235.0   
rating     4.000000e+00  ...  4.000000e+00

                 62404         23800        18397        17121         7912   \
userId           414.0  1.650000e+02        117.0        109.0  5.300000e+01   
movieId          249.0  2.490000e+02        249.0        249.0  2.490000e+02   
rating             4.0  4.000000e+00          4.0          4.0  5.000000e+00   
timestamp  961514186.0  1.046741e+09  844163543.0  842793725.0  1.237748e+09   

                 56405        8548          26146        73159        56169  \
userId           373.0         58.0  1.820000e+02        474.0        372.0   
movieId          249.0        249.0  2.490000e+02        249.0        249.0   
rating             5.0          5.0  4.000000e+00          4.0          4.0   
timestamp  846830536.0  847719464.0  1.066072e+09  974669033.0  874416571.0   

                  78506         76266        5054   
userId     4.890000e+02  4.800000e+02         33.0  
movieId    2.490000e+02  2.490000e+02        249.0  
rating     3.000000e+00  4.000000e+00       

userId            462.0         608.0         391.0         205.0  \
userId     4.620000e+02  6.080000e+02  3.910000e+02  2.050000e+02   
movieId    2.600000e+02  2.600000e+02  2.600000e+02  2.600000e+02   
rating     4.000000e+00  3.500000e+00  5.000000e+00  4.500000e+00   
timestamp  1.140400e+09  1.117163e+09  1.032388e+09  1.519900e+09   

userId            198.0        559.0        59.0          573.0         15.0   \
userId     1.980000e+02        559.0         59.0  5.730000e+02  1.500000e+01   
movieId    2.600000e+02        260.0        260.0  2.600000e+02  2.600000e+02   
rating     5.000000e+00          5.0          4.0  5.000000e+00  5.000000e+00   
timestamp  1.034138e+09  865095758.0  953610660.0  1.186590e+09  1.510572e+09   

userId            254.0  ...         66.0         266.0         387.0  \
userId     2.540000e+02  ...  6.600000e+01        266.0  3.870000e+02   
movieId    2.600000e+02  ...  2.600000e+02        260.0  2.600000e+02   
rating     4.000000e+00  ... 

userId            68.0         42.0         6.0          43.0         58.0   \
276        3.500000e+00          4.0          4.0          5.0          2.0   
timestamp  1.269123e+09  996221323.0  845553907.0  848994265.0  847719132.0   

userId            608.0        592.0        284.0        240.0        314.0  \
276        3.000000e+00          3.0          3.0          5.0          3.0   
timestamp  1.117505e+09  837354148.0  832787106.0  849122684.0  834241987.0   

userId           411.0        536.0        429.0         599.0        84.0   \
276                3.0          3.0          3.0  3.000000e+00          3.0   
timestamp  835532588.0  832839990.0  828124616.0  1.498524e+09  858772543.0   

userId           321.0        99.0         436.0        584.0        414.0  
276                3.0          4.0          2.0          3.0          3.0  
timestamp  843212762.0  829828006.0  833529846.0  834988063.0  961596299.0  
Float64Index([ 68.0,  42.0,   6.0,  43.0,  58.0, 608.0,

689        6      291     3.0  845555571
                  2321         689 
userId            19.0          6.0
movieId          291.0        291.0
rating             2.0          3.0
timestamp  965712018.0  845555571.0
userId            19.0         6.0 
userId            19.0          6.0
movieId          291.0        291.0
rating             2.0          3.0
timestamp  965712018.0  845555571.0
userId            19.0         6.0 
291                2.0          3.0
timestamp  965712018.0  845555571.0
Float64Index([19.0, 6.0], dtype='float64', name='userId')
Index([291, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
89337     580      292     3.0  1167790141
78271     486      292     4.0   839537112
20355     135      292     4.0  1009692890
99509     609      292     3.0   847220907
1270       11      292     4.0   902154383
...       ...      ...     ...         ...
19601     126      292     3.0   845925732
18844     122      292     4.0  1461563011
5811

userId           294.0
304                2.0
timestamp  966596595.0
Float64Index([294.0], dtype='float64', name='userId')
Index([304, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
33695     229      305     3.0   838143696
1356       13      305     1.0   987456968
96176     603      305     1.0   963178587
8073       57      305     3.0   972175617
59201     386      305     3.0   842613995
59003     385      305     3.0   834693629
56417     373      305     5.0   846830521
97420     606      305     3.5  1171361499
                 33695        1356         96176        8073         59201  \
userId           229.0         13.0        603.0         57.0        386.0   
movieId          305.0        305.0        305.0        305.0        305.0   
rating             3.0          1.0          1.0          3.0          3.0   
timestamp  838143696.0  987456968.0  963178587.0  972175617.0  842613995.0   

                 59003        56417         97420  
userI

userId           602.0         586.0        19.0          520.0        340.0  \
userId           602.0  5.860000e+02         19.0  5.200000e+02        340.0   
movieId          316.0  3.160000e+02        316.0  3.160000e+02        316.0   
rating             3.0  4.000000e+00          2.0  4.000000e+00          4.0   
timestamp  840875558.0  1.529901e+09  965703785.0  1.326609e+09  848666375.0   

userId            226.0        559.0         24.0          344.0        56.0   \
userId     2.260000e+02        559.0  2.400000e+01  3.440000e+02         56.0   
movieId    3.160000e+02        316.0  3.160000e+02  3.160000e+02        316.0   
rating     2.500000e+00          3.0  3.500000e+00  3.500000e+00          3.0   
timestamp  1.097545e+09  845475863.0  1.458942e+09  1.420497e+09  835799115.0   

userId     ...         573.0         334.0        5.0          379.0  \
userId     ...  5.730000e+02  3.340000e+02          5.0        379.0   
movieId    ...  3.160000e+02  3.160000e+02       

timestamp  832449964.0  874416992.0  1.498516e+09  965705076.0  831940220.0  
Float64Index([274.0, 240.0, 571.0, 604.0, 610.0, 136.0, 372.0, 599.0, 19.0,
              353.0],
             dtype='float64', name='userId')
Index([328, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
20683     136      329     2.0   832449197
48030     312      329     3.0  1043176952
84357     541      329     4.0   835642969
90269     587      329     4.0   953142126
89063     577      329     3.0   945965866
...       ...      ...     ...         ...
68604     447      329     4.0   836960662
67511     437      329     3.0   859721360
14848      94      329     4.0   843406707
92754     599      329     4.0  1498498979
73180     474      329     4.0  1047054391

[88 rows x 4 columns]
                 20683         48030        84357        90269        89063  \
userId           136.0  3.120000e+02        541.0        587.0        577.0   
movieId          329.0  3.290000e+02    

userId           19.0          438.0         600.0        179.0        31.0   \
userId            19.0  4.380000e+02  6.000000e+02        179.0         31.0   
movieId          339.0  3.390000e+02  3.390000e+02        339.0        339.0   
rating             3.0  3.000000e+00  2.500000e+00          3.0          4.0   
timestamp  965706365.0  1.105666e+09  1.237712e+09  840907511.0  850467366.0   

userId           476.0        385.0        446.0        572.0        584.0  \
userId           476.0        385.0        446.0        572.0        584.0   
movieId          339.0        339.0        339.0        339.0        339.0   
rating             4.0          3.0          3.0          3.0          4.0   
timestamp  835021420.0  834691622.0  843839207.0  945888400.0  834987171.0   

userId     ...        604.0         140.0        32.0          68.0   \
userId     ...        604.0  1.400000e+02         32.0  6.800000e+01   
movieId    ...        339.0  3.390000e+02        339.0  3.390000

                 35140         9473          89345        24614        84361  \
userId           235.0  6.400000e+01  5.800000e+02        170.0        541.0   
movieId          350.0  3.500000e+02  3.500000e+02        350.0        350.0   
rating             3.0  3.500000e+00  3.500000e+00          3.0          4.0   
timestamp  841422517.0  1.161529e+09  1.167790e+09  840381134.0  835643205.0   

                 20692        71228        5415         5624          90555  \
userId           136.0        455.0         38.0         40.0  5.900000e+02   
movieId          350.0        350.0        350.0        350.0  3.500000e+02   
rating             5.0          4.0          4.0          5.0  2.500000e+00   
timestamp  832449883.0  836436244.0  841341428.0  832059787.0  1.258417e+09   

           ...        56422         68703        75390         80851  \
userId     ...        373.0  4.480000e+02        476.0  5.100000e+02   
movieId    ...        350.0  3.500000e+02        350.0  3.5

                 56427        67398        6375         21            39301  \
userId           373.0        436.0         43.0          1.0  2.740000e+02   
movieId          362.0        362.0        362.0        362.0  3.620000e+02   
rating             1.0          3.0          5.0          5.0  2.500000e+00   
timestamp  846830403.0  833530341.0  848994311.0  964982588.0  1.171827e+09   

                 2341          44401        6520          47732         33090  \
userId            19.0  2.950000e+02         45.0  3.080000e+02  2.260000e+02   
movieId          362.0  3.620000e+02        362.0  3.620000e+02  3.620000e+02   
rating             2.0  4.500000e+00          5.0  4.000000e+00  3.500000e+00   
timestamp  965709622.0  1.320064e+09  950722276.0  1.421375e+09  1.095663e+09   

           ...        68614        86046         24940        42161  \
userId     ...        447.0        559.0  1.770000e+02        288.0   
movieId    ...        362.0        362.0  3.620000e+02  

                  92780         34931         58582        67404  \
userId     5.990000e+02  2.340000e+02  3.820000e+02        436.0   
movieId    3.740000e+02  3.740000e+02  3.740000e+02        374.0   
rating     1.500000e+00  1.000000e+00  3.500000e+00          3.0   
timestamp  1.498505e+09  1.004410e+09  1.515175e+09  833531023.0   

                  82636        23275        86051         84250         17213  \
userId     5.250000e+02        160.0        559.0  5.390000e+02  1.100000e+02   
movieId    3.740000e+02        374.0        374.0  3.740000e+02  3.740000e+02   
rating     2.500000e+00          1.0          1.0  2.500000e+00  1.000000e+00   
timestamp  1.476481e+09  971619754.0  845476444.0  1.332475e+09  1.175330e+09   

                 6378   ...         39305        35651        2348   \
userId            43.0  ...  2.740000e+02        240.0         19.0   
movieId          374.0  ...  3.740000e+02        374.0        374.0   
rating             5.0  ...  3.500000e+0

                  14194        21882
userId     9.100000e+01        142.0
movieId    3.840000e+02        384.0
rating     3.500000e+00          3.0
timestamp  1.112717e+09  838934486.0
userId            91.0         142.0
userId     9.100000e+01        142.0
movieId    3.840000e+02        384.0
rating     3.500000e+00          3.0
timestamp  1.112717e+09  838934486.0
userId            91.0         142.0
384        3.500000e+00          3.0
timestamp  1.112717e+09  838934486.0
Float64Index([91.0, 142.0], dtype='float64', name='userId')
Index([384, 'timestamp'], dtype='object')
      userId  movieId  rating  timestamp
5070      33      385     4.0  939716559
                  5070
userId            33.0
movieId          385.0
rating             4.0
timestamp  939716559.0
userId            33.0
userId            33.0
movieId          385.0
rating             4.0
timestamp  939716559.0
userId            33.0
385                4.0
timestamp  939716559.0
Float64Index([33.0], dtype='float64'

userId            226.0        43.0         45.0         559.0        42.0   \
413        3.000000e+00          3.0          3.0          2.0          2.0   
timestamp  1.097545e+09  848994349.0  951170710.0  845476352.0  996221492.0   

userId           136.0         182.0         448.0        284.0         477.0  \
413                3.0  3.500000e+00  2.000000e+00          4.0  4.000000e+00   
timestamp  832449705.0  1.055156e+09  1.039946e+09  832787162.0  1.200946e+09   

userId            599.0        436.0         307.0        240.0        294.0  \
413        2.500000e+00          2.0  2.500000e+00          4.0          3.0   
timestamp  1.498518e+09  833530019.0  1.186085e+09  849122791.0  966596414.0   

userId            387.0        555.0        353.0  
413        1.000000e+00          3.0          2.0  
timestamp  1.095041e+09  978747285.0  831940050.0  
Float64Index([226.0,  43.0,  45.0, 559.0,  42.0, 136.0, 182.0, 448.0, 284.0,
              477.0, 599.0, 436.0, 307.0, 24

userId           94.0         411.0        455.0        468.0        240.0  \
userId            94.0        411.0        455.0        468.0        240.0   
movieId          432.0        432.0        432.0        432.0        432.0   
rating             3.0          3.0          3.0          2.0          5.0   
timestamp  843406914.0  835532287.0  836436114.0  831400545.0  849122370.0   

userId           136.0        181.0        19.0          287.0        8.0    \
userId           136.0        181.0         19.0  2.870000e+02          8.0   
movieId          432.0        432.0        432.0  4.320000e+02        432.0   
rating             3.0          3.0          3.0  5.000000e-01          1.0   
timestamp  832449365.0  845469653.0  965707983.0  1.110230e+09  839463702.0   

userId     ...        109.0        217.0        414.0        353.0  \
userId     ...        109.0        217.0        414.0        353.0   
movieId    ...        432.0        432.0        432.0        432.0   
rat

userId            474.0         275.0        33.0         603.0         253.0  \
446        3.000000e+00  1.000000e+00          5.0          4.0  5.000000e+00   
timestamp  1.192327e+09  1.049079e+09  939716559.0  954482325.0  1.286645e+09   

userId            606.0         178.0  
446        3.500000e+00  3.500000e+00  
timestamp  1.171761e+09  1.163674e+09  
Float64Index([474.0, 275.0, 33.0, 603.0, 253.0, 606.0, 178.0], dtype='float64', name='userId')
Index([446, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
43219     290      448     4.0   974939475
98508     607      448     4.0   964744346
62492     414      448     3.0  1036505560
98758     608      448     4.0  1117415627
96061     602      448     3.0   840876417
5073       33      448     4.0   939715485
1415       14      448     3.0   835442201
62104     411      448     4.0   835533637
                 43219        98508         62492         98758        96061  \
userId           290.0        60

timestamp  1.173443e+09  1.049079e+09  975013683.0  953925157.0
Float64Index([606.0, 275.0, 202.0, 603.0], dtype='float64', name='userId')
Index([461, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
26058     181      464     3.0   845470717
92817     599      464     2.0  1498510125
82533     524      464     2.0   851609114
84382     541      464     3.0   835644573
99567     610      464     4.0  1479542014
78288     486      464     3.0   839537308
30968     217      464     3.0   955941587
62499     414      464     2.0   961516204
96213     603      464     3.0   963177497
                 26058         92817        82533        84382         99567  \
userId           181.0  5.990000e+02        524.0        541.0  6.100000e+02   
movieId          464.0  4.640000e+02        464.0        464.0  4.640000e+02   
rating             3.0  2.000000e+00          2.0          3.0  4.000000e+00   
timestamp  845470717.0  1.498510e+09  851609114.0  835644573.0  1.479

                 96066        62503        18442         4230         546    \
userId           602.0        414.0        117.0  2.800000e+01          5.0   
movieId          475.0        475.0        475.0  4.750000e+02        475.0   
rating             5.0          5.0          3.0  4.000000e+00          5.0   
timestamp  840876475.0  961517481.0  844163511.0  1.234335e+09  847435311.0   

                 43222        86068         73211         97443         50132  \
userId           290.0        559.0  4.740000e+02  6.060000e+02  3.230000e+02   
movieId          475.0        475.0  4.750000e+02  4.750000e+02  4.750000e+02   
rating             4.0          5.0  4.000000e+00  4.000000e+00  4.000000e+00   
timestamp  974942322.0  845476276.0  1.123258e+09  1.171407e+09  1.422641e+09   

                  66821        13179        5076          12027        332    \
userId     4.320000e+02         84.0         33.0  7.400000e+01          4.0   
movieId    4.750000e+02        475.0  

                 5643          77106         92830        18445        1419   \
userId            40.0  4.820000e+02  5.990000e+02        117.0         14.0   
movieId          491.0  4.910000e+02  4.910000e+02        491.0        491.0   
rating             5.0  2.000000e+00  3.000000e+00          5.0          3.0   
timestamp  832060059.0  1.105397e+09  1.498525e+09  844163664.0  835441672.0   

                 86072        45786        775           95188        84387  \
userId           559.0        304.0          6.0  6.000000e+02        541.0   
movieId          491.0        491.0        491.0  4.910000e+02        491.0   
rating             3.0          4.0          4.0  3.000000e+00          3.0   
timestamp  845476276.0  897241738.0  845554141.0  1.237852e+09  835643720.0   

                 38524        13182         81559        62512  
userId           265.0         84.0  5.170000e+02        414.0  
movieId          491.0        491.0  4.910000e+02        491.0  
rating  

                  81561         11672        62114         84948  \
userId     5.170000e+02  7.000000e+01        411.0  5.520000e+02   
movieId    5.080000e+02  5.080000e+02        508.0  5.080000e+02   
rating     3.500000e+00  4.500000e+00          5.0  3.500000e+00   
timestamp  1.487953e+09  1.355198e+09  835532839.0  1.111473e+09   

                  76308        18448        90500         88265        8597   \
userId     4.800000e+02        117.0        589.0  5.700000e+02         58.0   
movieId    5.080000e+02        508.0        508.0  5.080000e+02        508.0   
rating     4.000000e+00          3.0          4.0  3.500000e+00          5.0   
timestamp  1.179178e+09  844163019.0  856043304.0  1.181478e+09  847718868.0   

                 38916  ...         71574         54910         90574  \
userId           268.0  ...  4.620000e+02  3.650000e+02  5.900000e+02   
movieId          508.0  ...  5.080000e+02  5.080000e+02  5.080000e+02   
rating             3.0  ...  2.500000e+

                  33106         70882        787          6535          32282  \
userId     2.260000e+02  4.530000e+02          6.0         45.0  2.210000e+02   
movieId    5.200000e+02  5.200000e+02        520.0        520.0  5.200000e+02   
rating     4.000000e+00  4.000000e+00          3.0          4.0  2.500000e+00   
timestamp  1.095663e+09  1.005967e+09  845553844.0  951170727.0  1.111176e+09   

                 68639         81999        91372         55880        86079  \
userId           447.0  5.200000e+02        592.0  3.690000e+02        559.0   
movieId          520.0  5.200000e+02        520.0  5.200000e+02        520.0   
rating             3.0  4.000000e+00          2.0  4.000000e+00          3.0   
timestamp  836962002.0  1.326610e+09  837350519.0  1.237082e+09  845476218.0   

           ...         51000         95192         39324        24826  \
userId     ...  3.300000e+02  6.000000e+02  2.740000e+02        174.0   
movieId    ...  5.200000e+02  5.200000e+02  5.2

                 90283        23733        18455        550          38529  \
userId           587.0        162.0        117.0          5.0        265.0   
movieId          534.0        534.0        534.0        534.0        534.0   
rating             4.0          5.0          3.0          3.0          4.0   
timestamp  953142295.0  836684340.0  844163663.0  847435311.0  965318191.0   

                  52736        62120        13191        59040        96230  \
userId     3.460000e+02        411.0         84.0        385.0        603.0   
movieId    5.340000e+02        534.0        534.0        534.0        534.0   
rating     2.500000e+00          2.0          4.0          3.0          4.0   
timestamp  1.094646e+09  835533320.0  857653693.0  836844780.0  954482383.0   

                  60791        28612  
userId     3.920000e+02        199.0  
movieId    5.340000e+02        534.0  
rating     3.000000e+00          4.0  
timestamp  1.027524e+09  940544123.0  
userId           5

                  39329        44046         56951         34942  \
userId     2.740000e+02        294.0  3.800000e+02  2.340000e+02   
movieId    5.460000e+02        546.0  5.460000e+02  5.460000e+02   
rating     2.000000e+00          1.0  3.000000e+00  2.000000e+00   
timestamp  1.171829e+09  966634356.0  1.494036e+09  1.004409e+09   

                  59302         98777        78293         92854        84396  \
userId     3.870000e+02  6.080000e+02        486.0  5.990000e+02        541.0   
movieId    5.460000e+02  5.460000e+02        546.0  5.460000e+02        546.0   
rating     3.000000e+00  5.000000e-01          3.0  2.000000e+00          1.0   
timestamp  1.189233e+09  1.117505e+09  839537323.0  1.498511e+09  835644276.0   

                  12427        2389         23291         33112        6538   \
userId     7.800000e+01         19.0        160.0  2.260000e+02         45.0   
movieId    5.460000e+02        546.0        546.0  5.460000e+02        546.0   
rating     5.

95201     600      562     4.0  1237759928
                 92223         19968         46837        14083         51006  \
userId           597.0  1.320000e+02  3.070000e+02         90.0  3.300000e+02   
movieId          562.0  5.620000e+02  5.620000e+02        562.0  5.620000e+02   
rating             4.0  2.500000e+00  2.000000e+00          4.0  4.500000e+00   
timestamp  940362491.0  1.157979e+09  1.186163e+09  856354233.0  1.285903e+09   

                  76066         39334         28618         10469  \
userId     4.790000e+02  2.740000e+02  1.990000e+02  6.800000e+01   
movieId    5.620000e+02  5.620000e+02  5.620000e+02  5.620000e+02   
rating     3.000000e+00  3.500000e+00  3.000000e+00  2.500000e+00   
timestamp  1.039393e+09  1.172365e+09  1.087155e+09  1.261624e+09   

                 62546  ...        61724         40617         80352  \
userId           414.0  ...        409.0  2.750000e+02  5.080000e+02   
movieId          562.0  ...        562.0  5.620000e+02  5.620

                  75207        56452        24766         22730         78554  \
userId     4.750000e+02        373.0        173.0  1.560000e+02  4.890000e+02   
movieId    5.860000e+02        586.0        586.0  5.860000e+02  5.860000e+02   
rating     4.500000e+00          3.0          3.0  2.000000e+00  1.500000e+00   
timestamp  1.498032e+09  846830035.0  843398014.0  1.106855e+09  1.333101e+09   

                  18280        33726         59305         51673  \
userId     1.160000e+02        229.0  3.870000e+02  3.340000e+02   
movieId    5.860000e+02        586.0  5.860000e+02  5.860000e+02   
rating     3.500000e+00          3.0  2.000000e+00  3.000000e+00   
timestamp  1.337200e+09  838143590.0  1.095121e+09  1.234630e+09   

                  22010  ...         92867        61008         15968  \
userId     1.440000e+02  ...  5.990000e+02        395.0  1.040000e+02   
movieId    5.860000e+02  ...  5.860000e+02        586.0  5.860000e+02   
rating     3.000000e+00  ...  3.00

                 38230         32852         79130         22733        53580  \
userId           262.0  2.230000e+02  4.900000e+02  1.560000e+02        353.0   
movieId          593.0  5.930000e+02  5.930000e+02  5.930000e+02        593.0   
rating             2.0  2.500000e+00  4.000000e+00  4.000000e+00          5.0   
timestamp  840305605.0  1.226210e+09  1.334940e+09  1.106855e+09  831940167.0   

                  90589         83117         88181         78306  \
userId     5.900000e+02  5.260000e+02  5.680000e+02  4.870000e+02   
movieId    5.930000e+02  5.930000e+02  5.930000e+02  5.930000e+02   
rating     3.500000e+00  4.000000e+00  5.000000e+00  2.500000e+00   
timestamp  1.258420e+09  1.502132e+09  1.253767e+09  1.428650e+09   

                  61165  ...         28040         76323         1581   \
userId     4.000000e+02  ...  1.930000e+02  4.800000e+02  1.600000e+01   
movieId    5.930000e+02  ...  5.930000e+02  5.930000e+02  5.930000e+02   
rating     5.000000e+00  .

userId            599.0         185.0        414.0        44.0          100.0  \
605        2.500000e+00  3.000000e+00          3.0          1.0  4.500000e+00   
timestamp  1.498519e+09  1.044311e+09  963236719.0  869252901.0  1.100187e+09   

userId            606.0         358.0         474.0  
605        3.000000e+00  3.000000e+00  3.000000e+00  
timestamp  1.173396e+09  1.339539e+09  1.089387e+09  
Float64Index([599.0, 185.0, 414.0, 44.0, 100.0, 606.0, 358.0, 474.0], dtype='float64', name='userId')
Index([605, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
14235      91      606     4.0  1112717237
                  14235
userId     9.100000e+01
movieId    6.060000e+02
rating     4.000000e+00
timestamp  1.112717e+09
userId             91.0
userId     9.100000e+01
movieId    6.060000e+02
rating     4.000000e+00
timestamp  1.112717e+09
userId             91.0
606        4.000000e+00
timestamp  1.112717e+09
Float64Index([91.0], dtype='float64', name='userId')

userId           19.0          391.0         606.0         9.0  
userId            19.0  3.910000e+02  6.060000e+02  9.000000e+00
movieId          627.0  6.270000e+02  6.270000e+02  6.270000e+02
rating             2.0  5.000000e+00  3.500000e+00  3.000000e+00
timestamp  965711342.0  1.030944e+09  1.190729e+09  1.044657e+09
userId           19.0          391.0         606.0         9.0  
627                2.0  5.000000e+00  3.500000e+00  3.000000e+00
timestamp  965711342.0  1.030944e+09  1.190729e+09  1.044657e+09
Float64Index([19.0, 391.0, 606.0, 9.0], dtype='float64', name='userId')
Index([627, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
79332     492      628     3.0   863976101
90288     587      628     5.0   953141841
51942     337      628     5.0   860255867
46852     307      628     4.0  1193663295
39084     270      628     3.0   853918943
60307     389      628     5.0   857934470
10484      68      628     3.5  1158534567
62567     414      628

userId            425.0        44.0          391.0        19.0          156.0  \
648        3.500000e+00          3.0  3.000000e+00          3.0  3.500000e+00   
timestamp  1.085477e+09  869251861.0  1.031030e+09  965709656.0  1.106855e+09   

userId            219.0         307.0        592.0        544.0        58.0   \
648        4.500000e+00  2.000000e+00          4.0          4.0          5.0   
timestamp  1.194681e+09  1.186161e+09  837350451.0  850688537.0  847719035.0   

userId     ...         220.0         301.0        411.0         376.0  \
648        ...  3.000000e+00  2.500000e+00          5.0  5.000000e+00   
timestamp  ...  1.230056e+09  1.211378e+09  835532994.0  1.364994e+09   

userId            166.0        373.0        171.0        214.0         177.0  \
648        3.000000e+00          4.0          3.0          3.0  4.000000e+00   
timestamp  1.190829e+09  846830246.0  866904159.0  853937855.0  1.435534e+09   

userId           395.0  
648                5.0  
time

                 8129         85380         49036         78566         59317  \
userId            57.0        555.0  3.180000e+02  4.890000e+02  3.870000e+02   
movieId          674.0        674.0  6.740000e+02  6.740000e+02  6.740000e+02   
rating             4.0          2.0  4.000000e+00  3.500000e+00  1.000000e+00   
timestamp  965798849.0  978841913.0  1.305360e+09  1.333659e+09  1.117356e+09   

                 96259        29448         15976        2410   
userId           603.0        202.0  1.040000e+02         19.0  
movieId          674.0        674.0  6.740000e+02        674.0  
rating             5.0          2.0  3.000000e+00          4.0  
timestamp  963177249.0  974923361.0  1.048788e+09  965703973.0  
userId           57.0         555.0         318.0         489.0         387.0  \
userId            57.0        555.0  3.180000e+02  4.890000e+02  3.870000e+02   
movieId          674.0        674.0  6.740000e+02  6.740000e+02  6.740000e+02   
rating             4.0    

                  98803        2411          9508         51948         79932  \
userId     6.080000e+02         19.0  6.400000e+01        337.0  5.000000e+02   
movieId    7.080000e+02        708.0  7.080000e+02        708.0  7.080000e+02   
rating     2.000000e+00          3.0  4.500000e+00          5.0  4.000000e+00   
timestamp  1.117408e+09  965706615.0  1.161522e+09  860255830.0  1.005528e+09   

                  78567        38234         92898        18482         77312  \
userId     4.890000e+02        262.0  5.990000e+02        117.0  4.830000e+02   
movieId    7.080000e+02        708.0  7.080000e+02        708.0  7.080000e+02   
rating     1.500000e+00          3.0  2.000000e+00          3.0  3.500000e+00   
timestamp  1.385824e+09  840306302.0  1.519245e+09  844163770.0  1.178294e+09   

           ...         77112        39089        62135         71585  \
userId     ...  4.820000e+02        270.0        411.0  4.620000e+02   
movieId    ...  7.080000e+02        708.0   

13213      84      726     3.0  858772802
                 13213
userId            84.0
movieId          726.0
rating             3.0
timestamp  858772802.0
userId            84.0
userId            84.0
movieId          726.0
rating             3.0
timestamp  858772802.0
userId            84.0
726                3.0
timestamp  858772802.0
Float64Index([84.0], dtype='float64', name='userId')
Index([726, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
92903     599      728     2.5  1519127436
14090      90      728     3.0   856354195
73264     474      728     3.5  1181348043
72511     469      728     3.0   965425597
59065     385      728     5.0   850769357
8132       57      728     3.0   965797129
96264     603      728     4.0   963178259
41004     276      728     4.0   858350916
                  92903        14090         73264        72511        59065  \
userId     5.990000e+02         90.0  4.740000e+02        469.0        385.0   
movieId    7.2800

                 22331        18487        835  
userId           151.0        117.0          6.0
movieId          747.0        747.0        747.0
rating             4.0          3.0          3.0
timestamp  855948083.0  844163992.0  845555046.0
userId           151.0        117.0        6.0  
userId           151.0        117.0          6.0
movieId          747.0        747.0        747.0
rating             4.0          3.0          3.0
timestamp  855948083.0  844163992.0  845555046.0
userId           151.0        117.0        6.0  
747                4.0          3.0          3.0
timestamp  855948083.0  844163992.0  845555046.0
Float64Index([151.0, 117.0, 6.0], dtype='float64', name='userId')
Index([747, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
44503     297      748     1.0   900872143
56979     380      748     3.0  1495234673
48688     314      748     3.0   851785062
82557     524      748     4.0   852404444
92912     599      748     2.5  14985119

                  32292         9511         5093          35350  \
userId     2.210000e+02  6.400000e+01         33.0  2.390000e+02   
movieId    7.780000e+02  7.780000e+02        778.0  7.780000e+02   
rating     4.500000e+00  5.000000e+00          4.0  4.500000e+00   
timestamp  1.119984e+09  1.161522e+09  939647370.0  1.221159e+09   

                  37994        22742         50929        23312         28041  \
userId     2.600000e+02        156.0  3.290000e+02        160.0  1.930000e+02   
movieId    7.780000e+02        778.0  7.780000e+02        778.0  7.780000e+02   
rating     4.500000e+00          4.0  4.500000e+00          5.0  4.000000e+00   
timestamp  1.109408e+09  939884874.0  1.523468e+09  971619994.0  1.435857e+09   

                  30281  ...         51022         85134         12286  \
userId     2.120000e+02  ...  3.300000e+02  5.530000e+02  7.600000e+01   
movieId    7.780000e+02  ...  7.780000e+02  7.780000e+02  7.780000e+02   
rating     3.000000e+00  ...  4

                  31594        22337         14251         92923        45671  \
userId     2.190000e+02        151.0  9.100000e+01  5.990000e+02        302.0   
movieId    7.860000e+02        786.0  7.860000e+02  7.860000e+02        786.0   
rating     3.000000e+00          4.0  2.500000e+00  2.000000e+00          4.0   
timestamp  1.194932e+09  855947242.0  1.112713e+09  1.498511e+09  854472122.0   

                 45810        83433        8144          10498         41986  \
userId           304.0        529.0         57.0  6.800000e+01  2.870000e+02   
movieId          786.0        786.0        786.0  7.860000e+02  7.860000e+02   
rating             3.0          5.0          2.0  2.500000e+00  1.000000e+00   
timestamp  881428467.0  855583362.0  969754753.0  1.158533e+09  1.110231e+09   

           ...         56984         77114        51960        59071  \
userId     ...  3.800000e+02  4.820000e+02        337.0        385.0   
movieId    ...  7.860000e+02  7.860000e+02       

                  49995         10501        51963         73273  \
userId     3.220000e+02  6.800000e+01        337.0  4.740000e+02   
movieId    8.050000e+02  8.050000e+02        805.0  8.050000e+02   
rating     3.500000e+00  3.000000e+00          4.0  3.000000e+00   
timestamp  1.217677e+09  1.158534e+09  860255866.0  1.002929e+09   

                  24238         10035        60321        82202        18499  \
userId     1.680000e+02  6.600000e+01        389.0        521.0        117.0   
movieId    8.050000e+02  8.050000e+02        805.0        805.0        805.0   
rating     4.000000e+00  4.000000e+00          5.0          4.0          4.0   
timestamp  1.306793e+09  1.113188e+09  857934470.0  852713294.0  844163454.0   

                  8738   ...         51825        70637        14096  \
userId     6.000000e+01  ...  3.350000e+02        451.0         90.0   
movieId    8.050000e+02  ...  8.050000e+02        805.0        805.0   
rating     4.000000e+00  ...  3.000000e+00

                  17286        44075         14698        79380        23317  \
userId     1.110000e+02        294.0  9.200000e+01        492.0        160.0   
movieId    8.370000e+02        837.0  8.370000e+02        837.0        837.0   
rating     4.000000e+00          1.0  5.000000e+00          3.0          2.0   
timestamp  1.518641e+09  966595671.0  1.294941e+09  863976525.0  971619114.0   

                 31030         7126          19981         80404  \
userId           217.0  5.000000e+01  1.320000e+02  5.090000e+02   
movieId          837.0  8.370000e+02  8.370000e+02  8.370000e+02   
rating             2.0  2.500000e+00  3.000000e+00  3.500000e+00   
timestamp  955944596.0  1.514239e+09  1.157968e+09  1.436027e+09   

                  15983  ...         23105        2434          56988  \
userId     1.040000e+02  ...  1.580000e+02         19.0  3.800000e+02   
movieId    8.370000e+02  ...  8.370000e+02        837.0  8.370000e+02   
rating     3.000000e+00  ...  2.500000e

userId           290.0         68.0          573.0        197.0         104.0  \
userId           290.0  6.800000e+01  5.730000e+02        197.0  1.040000e+02   
movieId          858.0  8.580000e+02  8.580000e+02        858.0  8.580000e+02   
rating             5.0  4.000000e+00  5.000000e+00          2.0  1.500000e+00   
timestamp  974937901.0  1.268849e+09  1.415676e+09  947462734.0  1.055630e+09   

userId           474.0         135.0         91.0          16.0   \
userId           474.0  1.350000e+02  9.100000e+01  1.600000e+01   
movieId          858.0  8.580000e+02  8.580000e+02  8.580000e+02   
rating             5.0  4.000000e+00  5.000000e+00  2.500000e+00   
timestamp  983033120.0  1.009694e+09  1.112711e+09  1.377477e+09   

userId            600.0  ...         362.0        302.0         462.0  \
userId     6.000000e+02  ...  3.620000e+02        302.0  4.620000e+02   
movieId    8.580000e+02  ...  8.580000e+02        858.0  8.580000e+02   
rating     4.000000e+00  ...  5.00

                  90610        19653        90293        55229         80355  \
userId     5.900000e+02        128.0        587.0        367.0  5.080000e+02   
movieId    8.980000e+02        898.0        898.0        898.0  8.980000e+02   
rating     4.000000e+00          5.0          4.0          4.0  2.000000e+00   
timestamp  1.258783e+09  899032763.0  953138131.0  997811870.0  1.268298e+09   

                 38923         24984         81583        22751         32296  \
userId           268.0  1.770000e+02  5.170000e+02        156.0  2.210000e+02   
movieId          898.0  8.980000e+02  8.980000e+02        898.0  8.980000e+02   
rating             5.0  5.000000e+00  3.000000e+00          5.0  3.500000e+00   
timestamp  940181021.0  1.435535e+09  1.487957e+09  939853337.0  1.119984e+09   

           ...         73281         52661        96282        348    \
userId     ...  4.740000e+02  3.450000e+02        603.0          4.0   
movieId    ...  8.980000e+02  8.980000e+02       

                  89378         23807         60477         52036  \
userId     5.800000e+02  1.650000e+02  3.910000e+02  3.390000e+02   
movieId    9.080000e+02  9.080000e+02  9.080000e+02  9.080000e+02   
rating     1.500000e+00  4.000000e+00  5.000000e+00  4.500000e+00   
timestamp  1.167791e+09  1.046741e+09  1.030825e+09  1.460519e+09   

                  73291        72524        23324         46875         79808  \
userId     4.740000e+02        469.0        160.0  3.070000e+02  4.970000e+02   
movieId    9.080000e+02        908.0        908.0  9.080000e+02  9.080000e+02   
rating     3.500000e+00          5.0          3.0  4.000000e+00  3.000000e+00   
timestamp  1.089387e+09  965662055.0  971112529.0  1.198813e+09  1.429127e+09   

                  1838   ...         36436        19656        62149  \
userId     1.800000e+01  ...  2.490000e+02        128.0        412.0   
movieId    9.080000e+02  ...  9.080000e+02        908.0        908.0   
rating     4.000000e+00  ...  4.

userId            132.0         593.0         357.0         488.0  \
914        3.000000e+00  4.000000e+00  4.000000e+00  4.500000e+00   
timestamp  1.157923e+09  1.181007e+09  1.348610e+09  1.112382e+09   

userId           4.0           525.0         552.0         292.0  \
914                5.0  4.000000e+00  3.500000e+00  2.000000e+00   
timestamp  945079670.0  1.476478e+09  1.111473e+09  1.357062e+09   

userId            105.0        188.0  ...         20.0          169.0  \
914        4.000000e+00          5.0  ...  4.500000e+00  5.000000e+00   
timestamp  1.448214e+09  962560181.0  ...  1.054036e+09  1.059427e+09   

userId            483.0         590.0         563.0         517.0  \
914        4.000000e+00  4.000000e+00  4.000000e+00  4.500000e+00   
timestamp  1.215896e+09  1.258420e+09  1.439933e+09  1.487954e+09   

userId            50.0          489.0         474.0         3.0    
914        3.000000e+00  5.000000e+00  4.500000e+00  5.000000e-01  
timestamp  1.527542e+09

userId            187.0        216.0         599.0        597.0         590.0  \
922        5.000000e+00          5.0  2.500000e+00          5.0  4.000000e+00   
timestamp  1.161850e+09  975211546.0  1.498517e+09  940362409.0  1.266390e+09   

userId            474.0         9.0           561.0         66.0   \
922        4.000000e+00  4.000000e+00  4.500000e+00  5.000000e+00   
timestamp  1.055440e+09  1.044657e+09  1.491095e+09  1.104645e+09   

userId            53.0          137.0         415.0         274.0  \
922        5.000000e+00  5.000000e+00  4.500000e+00  3.500000e+00   
timestamp  1.237748e+09  1.204860e+09  1.382470e+09  1.284685e+09   

userId            105.0        57.0         59.0          318.0         477.0  \
922        4.000000e+00          4.0          4.0  4.000000e+00  4.500000e+00   
timestamp  1.448196e+09  965795803.0  953609611.0  1.327169e+09  1.245030e+09   

userId           84.0   
922                4.0  
timestamp  860397228.0  
Float64Index([187.0, 

userId           465.0         590.0         474.0         100.0        156.0  \
userId           465.0  5.900000e+02  4.740000e+02  1.000000e+02        156.0   
movieId          933.0  9.330000e+02  9.330000e+02  9.330000e+02        933.0   
rating             5.0  4.000000e+00  3.500000e+00  4.000000e+00          5.0   
timestamp  959896051.0  1.258419e+09  1.089386e+09  1.100184e+09  946799400.0   

userId           216.0         219.0        57.0         19.0         572.0  \
userId           216.0  2.190000e+02         57.0         19.0        572.0   
movieId          933.0  9.330000e+02        933.0        933.0        933.0   
rating             5.0  3.000000e+00          4.0          4.0          5.0   
timestamp  975212514.0  1.198815e+09  965796716.0  965705982.0  945893455.0   

userId           355.0        587.0        84.0         412.0        469.0  \
userId           355.0        587.0         84.0        412.0        469.0   
movieId          933.0        933.0       

timestamp  1.127643e+09  
userId           603.0         186.0         517.0         580.0  \
userId           603.0  1.860000e+02  5.170000e+02  5.800000e+02   
movieId          952.0  9.520000e+02  9.520000e+02  9.520000e+02   
rating             4.0  4.000000e+00  3.000000e+00  3.000000e+00   
timestamp  963177272.0  1.031088e+09  1.487957e+09  1.167862e+09   

userId            387.0         292.0        288.0         489.0        577.0  \
userId     3.870000e+02  2.920000e+02        288.0  4.890000e+02        577.0   
movieId    9.520000e+02  9.520000e+02        952.0  9.520000e+02        952.0   
rating     2.500000e+00  4.000000e+00          3.0  4.500000e+00          4.0   
timestamp  1.095042e+09  1.293563e+09  978467717.0  1.333831e+09  945965795.0   

userId            448.0  
userId     4.480000e+02  
movieId    9.520000e+02  
rating     4.500000e+00  
timestamp  1.127643e+09  
userId           603.0         186.0         517.0         580.0  \
952                4.0  4.000

                  47840        43266         25014
userId     3.090000e+02        290.0  1.770000e+02
movieId    9.710000e+02        971.0  9.710000e+02
rating     4.500000e+00          5.0  5.000000e+00
timestamp  1.086565e+09  974938908.0  1.435537e+09
userId            309.0        290.0         177.0
userId     3.090000e+02        290.0  1.770000e+02
movieId    9.710000e+02        971.0  9.710000e+02
rating     4.500000e+00          5.0  5.000000e+00
timestamp  1.086565e+09  974938908.0  1.435537e+09
userId            309.0        290.0         177.0
971        4.500000e+00          5.0  5.000000e+00
timestamp  1.086565e+09  974938908.0  1.435537e+09
Float64Index([309.0, 290.0, 177.0], dtype='float64', name='userId')
Index([971, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
97520     606      973     4.0  1180551729
73340     474      973     3.5  1084982148
47841     309      973     4.0  1086564073
76080     479      973     4.0  1039362729
62648     41

userId           288.0         477.0         226.0        45.0         294.0  \
userId           288.0  4.770000e+02  2.260000e+02         45.0        294.0   
movieId         1005.0  1.005000e+03  1.005000e+03       1005.0       1005.0   
rating             1.0  2.500000e+00  3.000000e+00          2.0          1.0   
timestamp  978470575.0  1.201196e+09  1.095713e+09  951170710.0  966596382.0   

userId            219.0  
userId     2.190000e+02  
movieId    1.005000e+03  
rating     1.500000e+00  
timestamp  1.198783e+09  
userId           288.0         477.0         226.0        45.0         294.0  \
1005               1.0  2.500000e+00  3.000000e+00          2.0          1.0   
timestamp  978470575.0  1.201196e+09  1.095713e+09  951170710.0  966596382.0   

userId            219.0  
1005       1.500000e+00  
timestamp  1.198783e+09  
Float64Index([288.0, 477.0, 226.0, 45.0, 294.0, 219.0], dtype='float64', name='userId')
Index([1005, 'timestamp'], dtype='object')
       userId  movi

[4 rows x 32 columns]
userId            68.0          477.0        555.0         219.0  \
userId     6.800000e+01  4.770000e+02        555.0  2.190000e+02   
movieId    1.020000e+03  1.020000e+03       1020.0  1.020000e+03   
rating     2.500000e+00  3.500000e+00          3.0  3.000000e+00   
timestamp  1.261624e+09  1.200939e+09  978746793.0  1.194740e+09   

userId            590.0        524.0        414.0        42.0         45.0   \
userId     5.900000e+02        524.0        414.0         42.0         45.0   
movieId    1.020000e+03       1020.0       1020.0       1020.0       1020.0   
rating     3.000000e+00          3.0          2.0          2.0          3.0   
timestamp  1.267238e+09  851609131.0  961438911.0  996220248.0  951170628.0   

userId           263.0  ...         198.0         380.0         448.0  \
userId           263.0  ...  1.980000e+02  3.800000e+02  4.480000e+02   
movieId         1020.0  ...  1.020000e+03  1.020000e+03  1.020000e+03   
rating             4.0

userId            328.0         599.0        304.0         177.0        117.0  \
1028       5.000000e+00  2.500000e+00          4.0  4.000000e+00          4.0   
timestamp  1.494211e+09  1.519120e+09  891173962.0  1.435534e+09  844163734.0   

userId            282.0        474.0         200.0        201.0         398.0  \
1028       4.500000e+00          3.0  3.000000e+00          4.0  5.000000e+00   
timestamp  1.378498e+09  978575945.0  1.229876e+09  939801930.0  1.311207e+09   

userId     ...         91.0          325.0         132.0        414.0  \
1028       ...  3.000000e+00  4.000000e+00  4.000000e+00          4.0   
timestamp  ...  1.112714e+09  1.039397e+09  1.157923e+09  961595525.0   

userId           288.0         153.0        188.0        216.0         232.0  \
1028               4.0  2.000000e+00          5.0          4.0  3.500000e+00   
timestamp  976120723.0  1.525552e+09  962559967.0  975212727.0  1.085353e+09   

userId            205.0  
1028       3.000000e+00  

userId            308.0        42.0          239.0        31.0          222.0  \
1036       1.000000e+00          4.0  4.000000e+00          5.0  3.000000e+00   
timestamp  1.421374e+09  996216013.0  1.221158e+09  850467468.0  1.391350e+09   

userId            116.0         226.0        474.0        84.0          400.0  \
1036       3.500000e+00  3.000000e+00          3.0          3.0  4.500000e+00   
timestamp  1.337200e+09  1.095662e+09  983034060.0  857653843.0  1.498870e+09   

userId     ...         98.0         385.0         30.0          608.0  \
1036       ...  5.000000e+00          3.0  4.000000e+00  3.000000e+00   
timestamp  ...  1.532458e+09  842892161.0  1.500370e+09  1.117415e+09   

userId           160.0         600.0        414.0         419.0         135.0  \
1036               2.0  3.000000e+00          5.0  4.500000e+00  5.000000e+00   
timestamp  971113527.0  1.237859e+09  961515684.0  1.321659e+09  1.009693e+09   

userId            232.0  
1036       4.500000e+0

userId           603.0
1053               3.0
timestamp  953925191.0
Float64Index([603.0], dtype='float64', name='userId')
Index([1053, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
99597     610     1054     2.0  1493850563
                  99597
userId     6.100000e+02
movieId    1.054000e+03
rating     2.000000e+00
timestamp  1.493851e+09
userId            610.0
userId     6.100000e+02
movieId    1.054000e+03
rating     2.000000e+00
timestamp  1.493851e+09
userId            610.0
1054       2.000000e+00
timestamp  1.493851e+09
Float64Index([610.0], dtype='float64', name='userId')
Index([1054, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
56258     372     1055     2.0   874414588
28640     199     1055     1.0  1126036284
                 56258         28640
userId           372.0  1.990000e+02
movieId         1055.0  1.055000e+03
rating             2.0  1.000000e+00
timestamp  874414588.0  1.126036e+09
userId           372.0   

userId           597.0        217.0        409.0         387.0        469.0  \
1078               4.0          2.0          5.0  3.000000e+00          4.0   
timestamp  941640314.0  955943999.0  967920117.0  1.140942e+09  965336221.0   

userId           294.0         477.0        603.0         480.0  
1078               3.0  5.000000e+00          2.0  3.500000e+00  
timestamp  966595998.0  1.200949e+09  963176514.0  1.179161e+09  
Float64Index([597.0, 217.0, 409.0, 387.0, 469.0, 294.0, 477.0, 603.0, 480.0], dtype='float64', name='userId')
Index([1078, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
17018     108     1079     5.0  1042840657
57012     380     1079     5.0  1493422710
44586     298     1079     3.5  1447597961
82011     520     1079     2.0  1326609456
7487       51     1079     5.0  1230929741
88561     572     1079     5.0   945892394
49058     318     1079     3.5  1235480341
5997       42     1079     3.0   996258093
22030     144     1079  

                  67067         75527         81120         32625  \
userId     4.330000e+02  4.770000e+02  5.140000e+02  2.220000e+02   
movieId    1.089000e+03  1.089000e+03  1.089000e+03  1.089000e+03   
rating     4.000000e+00  4.000000e+00  4.000000e+00  4.000000e+00   
timestamp  1.506824e+09  1.200940e+09  1.536457e+09  1.391350e+09   

                  30637         56014         46897         78616  \
userId     2.150000e+02  3.700000e+02  3.070000e+02  4.890000e+02   
movieId    1.089000e+03  1.089000e+03  1.089000e+03  1.089000e+03   
rating     3.000000e+00  2.500000e+00  4.000000e+00  3.500000e+00   
timestamp  1.260909e+09  1.159162e+09  1.186161e+09  1.385823e+09   

                  27581         44587  ...         99600         37456  \
userId     1.870000e+02  2.980000e+02  ...  6.100000e+02  2.510000e+02   
movieId    1.089000e+03  1.089000e+03  ...  1.089000e+03  1.089000e+03   
rating     4.500000e+00  4.000000e+00  ...  5.000000e+00  5.000000e+00   
timestamp  1

userId            275.0        414.0        33.0          599.0         182.0  \
1095       5.000000e+00          4.0          4.0  4.000000e+00  4.000000e+00   
timestamp  1.049076e+09  961516939.0  939716778.0  1.498499e+09  1.069634e+09   

userId           265.0        437.0         28.0         140.0         18.0   \
1095               3.0          5.0  3.000000e+00          2.0  4.000000e+00   
timestamp  965317904.0  859721610.0  1.234571e+09  949666857.0  1.502911e+09   

userId           372.0         307.0         108.0  
1095               3.0  3.000000e+00  2.000000e+00  
timestamp  874417087.0  1.207702e+09  1.042840e+09  
Float64Index([275.0, 414.0,  33.0, 599.0, 182.0, 265.0, 437.0,  28.0, 140.0,
               18.0, 372.0, 307.0, 108.0],
             dtype='float64', name='userId')
Index([1095, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
73383     474     1096     2.5  1087832346
9537       64     1096     5.0  1161536367
43276     290     1

                  49064         28130         52748         39106  \
userId     3.180000e+02  1.950000e+02  3.460000e+02  2.710000e+02   
movieId    1.120000e+03  1.120000e+03  1.120000e+03  1.120000e+03   
rating     3.500000e+00  4.000000e+00  3.000000e+00  4.000000e+00   
timestamp  1.419077e+09  1.008559e+09  1.094647e+09  1.234302e+09   

                 96345         40654        23345         66429        70911  \
userId           603.0  2.750000e+02        160.0  4.280000e+02        453.0   
movieId         1120.0  1.120000e+03       1120.0  1.120000e+03       1120.0   
rating             2.0  4.000000e+00          3.0  3.500000e+00          4.0   
timestamp  963179734.0  1.049079e+09  971620157.0  1.111524e+09  972622830.0   

                  4270   ...         9540          27582        43278  \
userId     2.800000e+01  ...  6.400000e+01  1.870000e+02        290.0   
movieId    1.120000e+03  ...  1.120000e+03  1.120000e+03       1120.0   
rating     3.000000e+00  ...  3.50

                  65469         30639         22788        13281        56273  \
userId     4.200000e+02  2.150000e+02  1.560000e+02         84.0        372.0   
movieId    1.131000e+03  1.131000e+03  1.131000e+03       1131.0       1131.0   
rating     4.000000e+00  3.500000e+00  3.500000e+00          5.0          4.0   
timestamp  1.229803e+09  1.260909e+09  1.106881e+09  858772802.0  874414749.0   

                 84713        29483         60503  
userId           547.0        202.0  3.910000e+02  
movieId         1131.0       1131.0  1.131000e+03  
rating             4.0          4.0  5.000000e+00  
timestamp  942723440.0  974912846.0  1.032388e+09  
userId            420.0         215.0         156.0        84.0         372.0  \
userId     4.200000e+02  2.150000e+02  1.560000e+02         84.0        372.0   
movieId    1.131000e+03  1.131000e+03  1.131000e+03       1131.0       1131.0   
rating     4.000000e+00  3.500000e+00  3.500000e+00          5.0          4.0   
timestamp 

timestamp  861813370.0  856883725.0
userId           277.0        456.0
1167               3.0          4.0
timestamp  861813370.0  856883725.0
Float64Index([277.0, 456.0], dtype='float64', name='userId')
Index([1167, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
79430     493     1170     3.0  1001562535
85425     555     1170     2.0   980125797
                  79430        85425
userId     4.930000e+02        555.0
movieId    1.170000e+03       1170.0
rating     3.000000e+00          2.0
timestamp  1.001563e+09  980125797.0
userId            493.0        555.0
userId     4.930000e+02        555.0
movieId    1.170000e+03       1170.0
rating     3.000000e+00          2.0
timestamp  1.001563e+09  980125797.0
userId            493.0        555.0
1170       3.000000e+00          2.0
timestamp  1.001563e+09  980125797.0
Float64Index([493.0, 555.0], dtype='float64', name='userId')
Index([1170, 'timestamp'], dtype='object')
       userId  movieId  rating   times

                  76364        72199        8196         72405        70913  \
userId     4.800000e+02        465.0         57.0        467.0        453.0   
movieId    1.183000e+03       1183.0       1183.0       1183.0       1183.0   
rating     4.500000e+00          5.0          4.0          5.0          4.0   
timestamp  1.179162e+09  959896573.0  972174580.0  919671976.0  972622737.0   

                 14110        28655        38287        5107          97548  \
userId            90.0        199.0        263.0         33.0  6.060000e+02   
movieId         1183.0       1183.0       1183.0       1183.0  1.183000e+03   
rating             4.0          4.0          5.0          3.0  3.500000e+00   
timestamp  856354212.0  940544123.0  941591735.0  939647016.0  1.178491e+09   

           ...        96359         41888         14282        18018  \
userId     ...        603.0  2.860000e+02  9.100000e+01        113.0   
movieId    ...       1183.0  1.183000e+03  1.183000e+03       11

userId            131.0        450.0        156.0         330.0         220.0  \
1193       3.500000e+00          5.0          4.0  4.000000e+00  5.000000e+00   
timestamp  1.349840e+09  974705774.0  939884874.0  1.285904e+09  1.230060e+09   

userId            417.0         590.0         420.0         424.0  \
1193       4.500000e+00  4.500000e+00  4.000000e+00  4.000000e+00   
timestamp  1.532134e+09  1.258416e+09  1.218041e+09  1.457844e+09   

userId            339.0  ...         400.0        265.0         69.0   \
1193       5.000000e+00  ...  4.500000e+00          3.0  5.000000e+00   
timestamp  1.460519e+09  ...  1.498870e+09  965316088.0  1.021643e+09   

userId            178.0         177.0        368.0         22.0   \
1193       5.000000e+00  4.000000e+00          5.0  5.000000e+00   
timestamp  1.164355e+09  1.435534e+09  971273035.0  1.268726e+09   

userId            580.0        171.0         239.0  
1193       4.000000e+00          5.0  4.000000e+00  
timestamp  1.1677

userId            362.0         187.0         50.0         27.0   \
1201       5.000000e+00  5.000000e+00  4.000000e+00          4.0   
timestamp  1.530641e+09  1.161850e+09  1.536870e+09  965149552.0   

userId            18.0         288.0         305.0         499.0  \
1201       5.000000e+00          4.0  5.000000e+00  4.500000e+00   
timestamp  1.455051e+09  978465510.0  1.460135e+09  1.289960e+09   

userId            309.0         474.0  ...         552.0         16.0   \
1201       4.500000e+00  3.500000e+00  ...  4.000000e+00  3.000000e+00   
timestamp  1.086563e+09  1.086017e+09  ...  1.111473e+09  1.377477e+09   

userId            382.0         51.0          66.0          221.0  \
1201       4.500000e+00  4.000000e+00  5.000000e+00  4.000000e+00   
timestamp  1.515161e+09  1.230932e+09  1.099187e+09  1.111176e+09   

userId            421.0         560.0        202.0        524.0  
1201       4.500000e+00  4.000000e+00          5.0          4.0  
timestamp  1.311495e+09  1.

                  60525        29506        22249         87811         61589  \
userId     3.910000e+02        202.0        149.0  5.670000e+02  4.080000e+02   
movieId    1.210000e+03       1210.0       1210.0  1.210000e+03  1.210000e+03   
rating     4.000000e+00          3.0          4.0  1.500000e+00  4.000000e+00   
timestamp  1.032389e+09  974912027.0  902084798.0  1.525289e+09  1.472537e+09   

                  52593         1861          33958        23356  \
userId     3.440000e+02  1.800000e+01  2.320000e+02        160.0   
movieId    1.210000e+03  1.210000e+03  1.210000e+03       1210.0   
rating     5.000000e+00  4.500000e+00  5.000000e+00          5.0   
timestamp  1.420497e+09  1.455210e+09  1.077235e+09  971113953.0   

                  88735  ...         26268         51048         39427  \
userId     5.730000e+02  ...  1.820000e+02  3.300000e+02  2.740000e+02   
movieId    1.210000e+03  ...  1.210000e+03  1.210000e+03  1.210000e+03   
rating     5.000000e+00  ...  2

timestamp  1.250336e+09  946799037.0  1.099188e+09  940381382.0  
userId           414.0         74.0         84.0          23.0         597.0  \
1217               5.0  4.000000e+00          4.0  3.500000e+00          4.0   
timestamp  961512311.0  1.207502e+09  858773023.0  1.107342e+09  941639635.0   

userId            474.0         64.0          600.0         606.0  \
1217       4.000000e+00  5.000000e+00  4.000000e+00  4.000000e+00   
timestamp  1.089387e+09  1.161531e+09  1.237760e+09  1.171324e+09   

userId            390.0        156.0         66.0         199.0  
1217       5.000000e+00          5.0  5.000000e+00          5.0  
timestamp  1.250336e+09  946799037.0  1.099188e+09  940381382.0  
Float64Index([414.0,  74.0,  84.0,  23.0, 597.0, 474.0,  64.0, 600.0, 606.0,
              390.0, 156.0,  66.0, 199.0],
             dtype='float64', name='userId')
Index([1217, 'timestamp'], dtype='object')
       userId  movieId  rating   timestamp
39431     274     1218     4.0  1250

userId            29.0          312.0         51.0         32.0   \
userId     2.900000e+01  3.120000e+02  5.100000e+01         32.0   
movieId    1.224000e+03  1.224000e+03  1.224000e+03       1224.0   
rating     4.000000e+00  4.000000e+00  5.000000e+00          3.0   
timestamp  1.307906e+09  1.043177e+09  1.230931e+09  856737338.0   

userId            66.0         59.0         603.0        156.0        372.0  \
userId     6.600000e+01         59.0        603.0        156.0        372.0   
movieId    1.224000e+03       1224.0       1224.0       1224.0       1224.0   
rating     4.500000e+00          5.0          2.0          4.0          4.0   
timestamp  1.104645e+09  953609923.0  953927590.0  946799037.0  874415603.0   

userId            474.0         221.0        547.0         332.0  \
userId     4.740000e+02  2.210000e+02        547.0  3.320000e+02   
movieId    1.224000e+03  1.224000e+03       1224.0  1.224000e+03   
rating     4.000000e+00  5.000000e+00          3.0  3.50000

                  9223          32351         13585        58865  \
userId     6.300000e+01  2.210000e+02  8.900000e+01        383.0   
movieId    1.234000e+03  1.234000e+03  1.234000e+03       1234.0   
rating     2.500000e+00  3.500000e+00  5.000000e-01          3.0   
timestamp  1.443200e+09  1.118247e+09  1.520408e+09  943571272.0   

                  22164         50269        44114         16277  \
userId     1.470000e+02  3.250000e+02        294.0  1.050000e+02   
movieId    1.234000e+03  1.234000e+03       1234.0  1.234000e+03   
rating     4.500000e+00  4.000000e+00          3.0  4.000000e+00   
timestamp  1.203268e+09  1.039399e+09  966597447.0  1.448196e+09   

                  4291          5311   ...         81627         90682  \
userId     2.800000e+01  3.600000e+01  ...  5.170000e+02  5.900000e+02   
movieId    1.234000e+03  1.234000e+03  ...  1.234000e+03  1.234000e+03   
rating     3.000000e+00  4.000000e+00  ...  2.000000e+00  4.000000e+00   
timestamp  1.234570e+0

timestamp  1.171825e+09  1.354127e+09  
userId            182.0         23.0         469.0         105.0  \
1243       5.000000e+00  3.500000e+00          5.0  3.000000e+00   
timestamp  1.054780e+09  1.107342e+09  965425565.0  1.448196e+09   

userId            387.0        414.0         66.0          509.0  \
1243       3.000000e+00          5.0  5.000000e+00  4.000000e+00   
timestamp  1.182720e+09  961438248.0  1.099188e+09  1.436102e+09   

userId            599.0        57.0          51.0          68.0   \
1243       3.000000e+00          5.0  5.000000e+00  1.000000e+00   
timestamp  1.498525e+09  965796886.0  1.230931e+09  1.332629e+09   

userId            606.0         246.0  
1243       4.000000e+00  5.000000e+00  
timestamp  1.171825e+09  1.354127e+09  
Float64Index([182.0,  23.0, 469.0, 105.0, 387.0, 414.0,  66.0, 509.0, 599.0,
               57.0,  51.0,  68.0, 606.0, 246.0],
             dtype='float64', name='userId')
Index([1243, 'timestamp'], dtype='object')
       use

userId            264.0         387.0        422.0         29.0   \
userId     2.640000e+02  3.870000e+02        422.0  2.900000e+01   
movieId    1.250000e+03  1.250000e+03       1250.0  1.250000e+03   
rating     5.000000e+00  4.000000e+00          4.0  5.000000e+00   
timestamp  1.136978e+09  1.095041e+09  995235719.0  1.308025e+09   

userId            348.0        27.0          502.0        59.0         140.0  \
userId     3.480000e+02         27.0  5.020000e+02         59.0        140.0   
movieId    1.250000e+03       1250.0  1.250000e+03       1250.0       1250.0   
rating     4.500000e+00          4.0  5.000000e+00          5.0          5.0   
timestamp  1.378851e+09  962686417.0  1.111757e+09  953609923.0  942841937.0   

userId            448.0  ...         76.0          221.0         334.0  \
userId     4.480000e+02  ...  7.600000e+01  2.210000e+02  3.340000e+02   
movieId    1.250000e+03  ...  1.250000e+03  1.250000e+03  1.250000e+03   
rating     5.000000e+00  ...  3.0000

                  58178         38012         32358         88309  \
userId     3.810000e+02  2.600000e+02  2.210000e+02  5.700000e+02   
movieId    1.258000e+03  1.258000e+03  1.258000e+03  1.258000e+03   
rating     4.000000e+00  3.500000e+00  5.000000e+00  3.000000e+00   
timestamp  1.166891e+09  1.109408e+09  1.118247e+09  1.181478e+09   

                  47998         48831         5688          39441  \
userId     3.110000e+02  3.170000e+02  4.100000e+01  2.740000e+02   
movieId    1.258000e+03  1.258000e+03  1.258000e+03  1.258000e+03   
rating     4.000000e+00  5.000000e+00  5.000000e+00  4.500000e+00   
timestamp  1.057854e+09  1.430517e+09  1.458939e+09  1.171409e+09   

                  1713         45478  ...         16282         61177  \
userId     1.700000e+01        299.0  ...  1.050000e+02  4.000000e+02   
movieId    1.258000e+03       1258.0  ...  1.258000e+03  1.258000e+03   
rating     4.000000e+00          4.0  ...  4.000000e+00  4.500000e+00   
timestamp  1.322

                  59447        21230        387           25837         50014  \
userId     3.870000e+02        140.0          4.0  1.780000e+02  3.220000e+02   
movieId    1.265000e+03       1265.0       1265.0  1.265000e+03  1.265000e+03   
rating     3.500000e+00          3.0          4.0  4.500000e+00  4.000000e+00   
timestamp  1.094877e+09  949666815.0  964622684.0  1.164355e+09  1.217676e+09   

                  35719         76392         27603        84     \
userId     2.410000e+02  4.800000e+02  1.870000e+02          1.0   
movieId    1.265000e+03  1.265000e+03  1.265000e+03       1265.0   
rating     3.000000e+00  3.000000e+00  4.500000e+00          4.0   
timestamp  1.447536e+09  1.179162e+09  1.161864e+09  964983599.0   

                  52595  ...         67152         8816          21721  \
userId     3.440000e+02  ...  4.340000e+02  6.200000e+01  1.410000e+02   
movieId    1.265000e+03  ...  1.265000e+03  1.265000e+03  1.265000e+03   
rating     5.000000e+00  ...  5

KeyboardInterrupt: 

In [None]:
for movieId, group in tqdm(grouped):
    group_copied = group
    group_copied = group_copied.transpose()
    group_copied.loc['userId'] = pd.to_numeric(group_copied.loc['userId'])
    group_copied.columns = group_copied.loc['userId']
    group_copied = group_copied.drop(['userId', 'movieId']).rename(index={'rating': movieId})

    sparse_matrix.update(group_copied)

### 2. 간단하고 효율적인 방법
- pandas dataframe의 unstack(계층적 인덱싱)


In [19]:
sparse_matrix = train_df.groupby('movieId').apply(lambda x: pd.Series(x['rating'].values, index=x['userId'])).unstack()
sparse_matrix.index.name = 'movieId'

sparse_matrix

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,4.0,,4.5,,,,...,4.0,,4.0,3.0,4.0,2.5,,2.5,,5.0
2,,,,,,,,4.0,,,...,,4.0,,,3.5,,,2.0,,
3,4.0,,,,,5.0,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,5.0,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193573,,,,,,,,,,,...,,,,,,,,,,
193579,,,,,,,,,,,...,,,,,,,,,,
193581,,,,,,,,,,,...,,,,,,,,,,
193587,,,,,,,,,,,...,,,,,,,,,,


- `MEMO`: 0이 아닌 다른 숫자를 주는 것도 또 하나 시도해 볼 수 있는 실험이다. 

## 코사인 유사도 활용하기

In [20]:
from sklearn.metrics.pairwise import cosine_similarity

def cossim_matrix(a, b):
    cossim_values = cosine_similarity(a.values, b.values)
    cossim_df = pd.DataFrame(data=cossim_values, columns = a.index.values, index=a.index)

    return cossim_df

## Neighborhood-based 협업필터링 추천점수 계산하기

### Item-based

In [21]:
item_sparse_matrix = sparse_matrix.fillna(0)
item_sparse_matrix.shape

(8938, 610)

In [22]:
item_sparse_matrix

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,0.0,2.5,0.0,5.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,0.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193573,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
# item과 item의 코사인 유사도
item_cossim_df = cossim_matrix(item_sparse_matrix, item_sparse_matrix)
item_cossim_df

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,190219,191005,193565,193567,193571,193573,193579,193581,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.304336,0.267816,0.040259,0.221228,0.266544,0.149392,0.132943,0.182044,0.296838,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.304336,1.000000,0.226138,0.052482,0.154783,0.209716,0.189420,0.068012,0.027945,0.303157,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.267816,0.226138,1.000000,0.000000,0.306435,0.245555,0.368724,0.168267,0.253679,0.178219,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.040259,0.052482,0.000000,1.000000,0.095673,0.068708,0.205962,0.000000,0.000000,0.044835,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.221228,0.154783,0.306435,0.095673,1.000000,0.238683,0.343507,0.204088,0.222925,0.150729,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193573,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193579,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193581,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193587,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [24]:
# movieId: 8938개, userId: 610개
# train_df에 포함된 userId를 계산에 반영한다
userId_grouped = train_df.groupby('userId')
# index: userId, columns: total movieId
item_prediction_result_df = pd.DataFrame(index=list(userId_grouped.indices.keys()), columns=item_sparse_matrix.index)
item_prediction_result_df

movieId,1,2,3,4,5,6,7,8,9,10,...,190219,191005,193565,193567,193571,193573,193579,193581,193587,193609
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,,,,,,...,,,,,,,,,,
607,,,,,,,,,,,...,,,,,,,,,,
608,,,,,,,,,,,...,,,,,,,,,,
609,,,,,,,,,,,...,,,,,,,,,,


In [25]:
for userId, group in tqdm(userId_grouped):
    # user가 rating한 movieId * 전체 movieId
    user_sim = item_cossim_df.loc[group['movieId']]
    # user가 rating한 movieId * 1
    user_rating = group['rating']
    # 전체 movieId * 1
    sim_sum = user_sim.sum(axis=0)

    # userId의 전체 rating predictions (8938 * 1)
    pred_ratings = np.matmul(user_sim.T.to_numpy(), user_rating) / (sim_sum+1)
    item_prediction_result_df.loc[userId] = pred_ratings

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


  0%|          | 0/610 [00:00<?, ?it/s]

In [26]:
item_prediction_result_df.head(10)

movieId,1,2,3,4,5,6,7,8,9,10,...,190219,191005,193565,193567,193571,193573,193579,193581,193587,193609
1,4.20279,4.18783,4.17609,3.03272,4.04561,4.24019,3.9933,3.90547,3.70279,4.18855,...,0.979003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.38273
2,3.19261,3.15207,2.46138,0.280903,2.61916,2.9754,1.85877,1.68768,1.09268,3.02235,...,0.387743,1.70885,1.70885,1.70885,1.70885,1.70885,1.70885,1.70885,1.70885,2.32666
3,1.32315,1.20446,1.39635,0.217016,0.819716,1.59889,0.802514,0.755008,0.86708,1.55365,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3.48994,3.48805,3.38864,3.15017,3.30488,3.50267,3.31794,2.90629,3.03413,3.41697,...,1.0335,0.505535,0.505535,0.505535,0.505535,0.505535,0.505535,0.505535,0.505535,1.963
5,3.27916,3.1278,2.99947,2.56582,2.94179,3.16869,3.00793,2.67809,2.27668,3.01557,...,0.391638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.562705
6,3.58597,3.57233,3.53615,3.33866,3.51444,3.5477,3.53826,3.42057,3.3406,3.55275,...,0.446497,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.672442
7,3.36009,3.25338,3.24516,2.22581,3.2114,3.33808,3.1832,2.8918,2.89194,3.3515,...,0.872628,0.3554,0.3554,0.3554,0.3554,0.3554,0.3554,0.3554,0.3554,2.50752
8,3.26123,3.22385,3.03702,2.58129,2.94548,3.21505,3.06126,2.6713,2.37,3.17426,...,0.597653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.730709
9,2.80797,2.62156,2.28209,0.767416,2.25043,2.64179,2.04582,1.59945,1.07072,2.63652,...,0.458811,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,3.07042,3.04355,2.85983,0.908598,2.97965,2.905,2.75228,2.24064,1.58879,3.05624,...,0.63253,1.21136,1.21136,1.21136,1.21136,1.21136,1.21136,1.21136,1.21136,2.33335


In [34]:
user_sim

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,190219,191005,193565,193567,193571,193573,193579,193581,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6294,0.126311,0.154142,0.038764,0.0,0.049955,0.201797,0.000000,0.000000,0.000000,0.246324,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
148626,0.123049,0.165150,0.054465,0.0,0.074149,0.182987,0.000000,0.000000,0.021681,0.086002,...,0.000000,0.264443,0.264443,0.264443,0.264443,0.264443,0.264443,0.264443,0.264443,0.0
111360,0.109545,0.113282,0.034396,0.0,0.041719,0.167778,0.000000,0.000000,0.000000,0.104107,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
5323,0.079107,0.132934,0.000000,0.0,0.000000,0.189852,0.000000,0.000000,0.000000,0.117006,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
97752,0.121847,0.147378,0.036918,0.0,0.067166,0.169270,0.000000,0.000000,0.026671,0.055083,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4144,0.158466,0.044723,0.090335,0.0,0.050901,0.108734,0.064473,0.000000,0.000000,0.021832,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
156371,0.075384,0.078616,0.000000,0.0,0.000000,0.107210,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
31086,0.096538,0.000000,0.000000,0.0,0.000000,0.137296,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1221,0.264267,0.197649,0.120291,0.0,0.051373,0.309114,0.048751,0.075608,0.038817,0.278270,...,0.115163,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0


In [36]:
user_sim

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,190219,191005,193565,193567,193571,193573,193579,193581,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6294,0.126311,0.154142,0.038764,0.0,0.049955,0.201797,0.000000,0.000000,0.000000,0.246324,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
148626,0.123049,0.165150,0.054465,0.0,0.074149,0.182987,0.000000,0.000000,0.021681,0.086002,...,0.000000,0.264443,0.264443,0.264443,0.264443,0.264443,0.264443,0.264443,0.264443,0.0
111360,0.109545,0.113282,0.034396,0.0,0.041719,0.167778,0.000000,0.000000,0.000000,0.104107,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
5323,0.079107,0.132934,0.000000,0.0,0.000000,0.189852,0.000000,0.000000,0.000000,0.117006,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
97752,0.121847,0.147378,0.036918,0.0,0.067166,0.169270,0.000000,0.000000,0.026671,0.055083,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4144,0.158466,0.044723,0.090335,0.0,0.050901,0.108734,0.064473,0.000000,0.000000,0.021832,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
156371,0.075384,0.078616,0.000000,0.0,0.000000,0.107210,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
31086,0.096538,0.000000,0.000000,0.0,0.000000,0.137296,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1221,0.264267,0.197649,0.120291,0.0,0.051373,0.309114,0.048751,0.075608,0.038817,0.278270,...,0.115163,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0


### User-based

In [38]:
user_sparse_matrix = sparse_matrix.fillna(0).transpose()

In [39]:
user_sparse_matrix.head(5)

movieId,1,2,3,4,5,6,7,8,9,10,...,190219,191005,193565,193567,193571,193573,193579,193581,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
user_sparse_matrix.shape

(610, 8938)

In [41]:
user_cossim_df = cossim_matrix(user_sparse_matrix, user_sparse_matrix)
user_cossim_df

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.016665,0.070570,0.160438,0.075410,0.087404,0.123664,0.069225,0.037416,0.009900,...,0.056163,0.124896,0.162324,0.050852,0.101583,0.128198,0.240652,0.225897,0.063984,0.095228
2,0.016665,1.000000,0.000000,0.004295,0.020560,0.030996,0.027726,0.000000,0.000000,0.057112,...,0.151666,0.019379,0.006645,0.000000,0.000000,0.028180,0.000000,0.046286,0.033522,0.090288
3,0.070570,0.000000,1.000000,0.002677,0.000000,0.003477,0.000000,0.000000,0.000000,0.000000,...,0.002878,0.002174,0.027609,0.000000,0.000000,0.012090,0.000000,0.023927,0.000000,0.018332
4,0.160438,0.004295,0.002677,1.000000,0.121648,0.093634,0.106495,0.047930,0.000000,0.034281,...,0.065328,0.116656,0.251105,0.056396,0.075940,0.187236,0.102819,0.109515,0.038805,0.086941
5,0.075410,0.020560,0.000000,0.121648,1.000000,0.164390,0.075932,0.302418,0.000000,0.000000,...,0.090020,0.306634,0.103646,0.145354,0.119800,0.071333,0.064705,0.111755,0.159946,0.049236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.128198,0.028180,0.012090,0.187236,0.071333,0.076207,0.159508,0.051044,0.034675,0.066999,...,0.123464,0.083967,0.238107,0.068227,0.110706,1.000000,0.101661,0.218106,0.073027,0.164355
607,0.240652,0.000000,0.000000,0.102819,0.064705,0.121999,0.167129,0.167671,0.000000,0.010033,...,0.064124,0.196274,0.176603,0.111658,0.116814,0.101661,1.000000,0.233010,0.142659,0.114225
608,0.225897,0.046286,0.023927,0.109515,0.111755,0.144931,0.274492,0.166382,0.053714,0.054227,...,0.134966,0.183543,0.198187,0.130758,0.159466,0.218106,0.233010,1.000000,0.107489,0.250085
609,0.063984,0.033522,0.000000,0.038805,0.159946,0.168015,0.080615,0.372351,0.000000,0.027156,...,0.028968,0.315137,0.051141,0.183182,0.079507,0.073027,0.142659,0.107489,1.000000,0.047981


In [42]:
movieId_grouped = train_df.groupby('movieId')
user_prediction_result_df = pd.DataFrame(index=list(movieId_grouped.indices.keys()), columns=user_sparse_matrix.index)
user_prediction_result_df

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193573,,,,,,,,,,,...,,,,,,,,,,
193579,,,,,,,,,,,...,,,,,,,,,,
193581,,,,,,,,,,,...,,,,,,,,,,
193587,,,,,,,,,,,...,,,,,,,,,,


In [43]:
for movieId, group in tqdm(movieId_grouped):
    user_sim = user_cossim_df.loc[group['userId']]
    user_rating = group['rating']
    sim_sum = user_sim.sum(axis=0)

    pred_ratings = np.matmul(user_sim.T.to_numpy(), user_rating) / (sim_sum+1)
    user_prediction_result_df.loc[movieId] = pred_ratings

# return user_prediction_result_df.transpose()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


  0%|          | 0/8938 [00:00<?, ?it/s]

In [44]:
# 전체 user가 모든 movieId에 매긴 평점
print(item_prediction_result_df.head())
print(user_prediction_result_df.transpose().head())

user_prediction_result_df = user_prediction_result_df.transpose()

movieId   1        2        3         4         5        6         7       \
1        4.20279  4.18783  4.17609   3.03272   4.04561  4.24019    3.9933   
2        3.19261  3.15207  2.46138  0.280903   2.61916   2.9754   1.85877   
3        1.32315  1.20446  1.39635  0.217016  0.819716  1.59889  0.802514   
4        3.48994  3.48805  3.38864   3.15017   3.30488  3.50267   3.31794   
5        3.27916   3.1278  2.99947   2.56582   2.94179  3.16869   3.00793   

movieId    8        9        10      ...    190219    191005    193565  \
1         3.90547  3.70279  4.18855  ...  0.979003         0         0   
2         1.68768  1.09268  3.02235  ...  0.387743   1.70885   1.70885   
3        0.755008  0.86708  1.55365  ...         0         0         0   
4         2.90629  3.03413  3.41697  ...    1.0335  0.505535  0.505535   
5         2.67809  2.27668  3.01557  ...  0.391638         0         0   

movieId    193567    193571    193573    193579    193581    193587    193609  
1           

## RMSE로 추천시스템 성능 평가하기

In [45]:
test_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
99731,610,3527,5.0,1479545223
97583,606,1250,3.5,1171376891
38197,262,213,5.0,840310907
11474,68,69406,3.0,1261622505
34105,232,4728,3.0,1218166950


In [46]:
def evaluate(test_df, prediction_result_df):
    groups_with_movie_ids = test_df.groupby(by='movieId')
    groups_with_user_ids = test_df.groupby(by='userId')
    intersection_movie_ids = sorted(list(set(list(prediction_result_df.columns)).intersection(set(list(groups_with_movie_ids.indices.keys())))))
    intersection_user_ids = sorted(list(set(list(prediction_result_df.index)).intersection(set(groups_with_user_ids.indices.keys()))))

    print(len(intersection_movie_ids))
    print(len(intersection_user_ids))

    compressed_prediction_df = prediction_result_df.loc[intersection_user_ids][intersection_movie_ids]
    # compressed_prediction_df

    # test_df에 대해서 RMSE 계산
    grouped = test_df.groupby(by='userId')
    result_df = pd.DataFrame(columns=['rmse'])
    for userId, group in tqdm(grouped):
      if userId in intersection_user_ids:
          pred_ratings = compressed_prediction_df.loc[userId][compressed_prediction_df.loc[userId].index.intersection(list(group['movieId'].values))]
          pred_ratings = pred_ratings.to_frame(name='rating').reset_index().rename(columns={'index':'movieId','rating':'pred_rating'})
          actual_ratings = group[['rating', 'movieId']].rename(columns={'rating':'actual_rating'})

          final_df = pd.merge(actual_ratings, pred_ratings, how='inner', on=['movieId'])
          final_df = final_df.round(4) # 반올림

          # if not final_df.empty:
          #     rmse = sqrt(mean_squared_error(final_df['rating_actual'], final_df['rating_pred']))
          #     result_df.loc[userId] = rmse
          #     # print(userId, rmse)

    return final_df

In [47]:
evaluate(test_df, user_prediction_result_df)

4385
610


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  app.launch_new_instance()


  0%|          | 0/610 [00:00<?, ?it/s]

Unnamed: 0,actual_rating,movieId,pred_rating
0,5.0,3527,3.28567
1,3.5,84772,2.21159
2,3.5,103141,2.83544
3,4.0,81132,0.450808
4,4.5,130634,1.02244
...,...,...,...
218,4.0,106100,2.86823
219,4.0,111759,3.51395
220,1.0,4852,0.202402
221,3.0,2628,2.93777


In [48]:
evaluate(test_df, item_prediction_result_df)

4385
610


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  app.launch_new_instance()


  0%|          | 0/610 [00:00<?, ?it/s]

Unnamed: 0,actual_rating,movieId,pred_rating
0,5.0,3527,3.79741
1,3.5,84772,3.73065
2,3.5,103141,3.6897
3,4.0,81132,3.33402
4,4.5,130634,3.4512
...,...,...,...
218,4.0,106100,3.72969
219,4.0,111759,3.75842
220,1.0,4852,3.64888
221,3.0,2628,3.81238


In [49]:
result_df = evaluate(test_df, user_prediction_result_df)
print(result_df)
print(f"RMSE: {sqrt(mean_squared_error(result_df['actual_rating'].values, result_df['pred_rating'].values))}")

4385
610


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  app.launch_new_instance()


  0%|          | 0/610 [00:00<?, ?it/s]

     actual_rating  movieId pred_rating
0              5.0     3527     3.28567
1              3.5    84772     2.21159
2              3.5   103141     2.83544
3              4.0    81132    0.450808
4              4.5   130634     1.02244
..             ...      ...         ...
218            4.0   106100     2.86823
219            4.0   111759     3.51395
220            1.0     4852    0.202402
221            3.0     2628     2.93777
222            5.0     1953     3.17222

[223 rows x 3 columns]
RMSE: 1.6949489761800296


In [50]:
result_df = evaluate(test_df, item_prediction_result_df)
print(result_df)
print(f"RMSE: {sqrt(mean_squared_error(result_df['actual_rating'].values, result_df['pred_rating'].values))}")

4385
610


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  app.launch_new_instance()


  0%|          | 0/610 [00:00<?, ?it/s]

     actual_rating  movieId pred_rating
0              5.0     3527     3.79741
1              3.5    84772     3.73065
2              3.5   103141      3.6897
3              4.0    81132     3.33402
4              4.5   130634      3.4512
..             ...      ...         ...
218            4.0   106100     3.72969
219            4.0   111759     3.75842
220            1.0     4852     3.64888
221            3.0     2628     3.81238
222            5.0     1953     3.86646

[223 rows x 3 columns]
RMSE: 0.8145193961484049
