# Quailty of RatingBased Datasets

In this process, Quality of both RatingBased datasets will be checked and if it is necessary it will improved

Null values and duplicates will be checked

User and Movie size will be checked

The representativeness of the training dataset will be checked and improved.

Organized data will be save as pkl file for future use

In [1]:
#Importing libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
#Printing library versions
print('numpy Version: ' + np.__version__)
print('pandas Version: ' + pd.__version__)

numpy Version: 1.16.5
pandas Version: 0.25.1


In [3]:
#Reading raw rating data from pkl file
ratingDf = pd.read_pickle("../Data/pkl/1M/RawData/Rating.pkl")
ratingDf

Unnamed: 0,UserId,MovieId,Rating
0,0,0,3.5
1,0,1,3.5
2,0,2,1.5
3,0,3,4.5
4,0,4,4.5
...,...,...,...
1000888,10072,12665,3.0
1000889,10072,6417,3.0
1000890,10072,9689,3.0
1000891,10072,22032,3.0


In [4]:
#Checking is there any nan value in ratingDf
ratingDf.isnull().values.any()

False

In [5]:
#Check if any duplicate user-movie pairs exist in datframe
ratingDf[['UserId', 'MovieId']].duplicated().any()

False

In [6]:
#Calculating number of users in raw data
userSize = ratingDf['UserId'].unique().shape[0]
userSize

10073

In [7]:
#Calculating number of movies in raw data
movieSize = ratingDf['MovieId'].unique().shape[0]
movieSize

22033

In [8]:
#Reading raw movie data from pkl file
movie = pd.read_pickle("../Data/pkl/1M/RawData/Movie.pkl")
movie

Unnamed: 0,MovieId,Title
0,0,Three Colors: Blue (Trois couleurs: Bleu) (1993)
1,1,Kalifornia (1993)
2,2,Weekend at Bernie's (1989)
3,3,Better Off Dead... (1985)
4,4,Waiting for Guffman (1996)
...,...,...
22028,22028,London Paris New York (2012)
22029,22029,Wild Zero (2000)
22030,22030,Mr. Accident (2000)
22031,22031,Max Steel (2016)


In [9]:
#Checking if movieSize is true
movie.shape[0]

22033

In [10]:
#Reading RatingBased RatedOnly training data from pkl file
trainingDf = pd.read_pickle("../Data/pkl/1M/RatingBased/RatedOnly/Training.pkl")
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,718,3798,0.444444
1,5676,390,0.777778
2,2605,793,0.666667
3,8572,2762,0.000000
4,8354,1519,0.555556
...,...,...,...
750664,1926,3513,1.000000
750665,1926,215,1.000000
750666,8035,4518,0.111111
750667,7862,178,1.000000


In [11]:
#Reading RatingBased RatedOnly validation data from pkl file
validationDf = pd.read_pickle("../Data/pkl/1M/RatingBased/RatedOnly/Validation.pkl")
validationDf

Unnamed: 0,UserId,MovieId,Rating
0,3831,3236,0.555556
1,9346,416,1.000000
2,3958,7048,0.222222
3,8807,586,0.777778
4,2024,4725,1.000000
...,...,...,...
125107,4186,851,0.555556
125108,7752,1601,0.777778
125109,9995,1564,0.888889
125110,2740,1248,1.000000


In [12]:
#Reading RatingBased RatedOnly test data from pkl file
testDf = pd.read_pickle("../Data/pkl/1M/RatingBased/RatedOnly/Test.pkl")
testDf

Unnamed: 0,UserId,MovieId,Rating
0,7704,3933,0.222222
1,8640,759,0.777778
2,9964,306,0.555556
3,9489,1352,0.666667
4,6124,540,0.444444
...,...,...,...
125107,5356,4799,0.111111
125108,9730,13943,0.666667
125109,8653,1335,0.555556
125110,1896,1796,0.777778


In [13]:
#Generating totalTestDf that include test and validation data
totalTestDf = pd.concat([validationDf, testDf], ignore_index = True)
totalTestDf

Unnamed: 0,UserId,MovieId,Rating
0,3831,3236,0.555556
1,9346,416,1.000000
2,3958,7048,0.222222
3,8807,586,0.777778
4,2024,4725,1.000000
...,...,...,...
250219,5356,4799,0.111111
250220,9730,13943,0.666667
250221,8653,1335,0.555556
250222,1896,1796,0.777778


In [14]:
#Since represent users in training dataset more important than represent them in test dataset
#For each unique user appending training dataframe more row than totalTest dataframe
#Following loop just calculate which rows deleted from totalTestDf, adding rows a list and delete them from totalTestDf
#totalTestDf printing just for check its situation
rowList = []
removeIndex = []
for i in range(userSize):
    elementTrainingSize = trainingDf[trainingDf['UserId'] == i].shape[0]
    elementTestSize = totalTestDf[totalTestDf['UserId'] == i].shape[0]
        
    if elementTrainingSize < elementTestSize:
        transferItemSize = ((elementTrainingSize + elementTestSize) // 2) + 1 - elementTrainingSize
        transferItemIndex = totalTestDf[totalTestDf['UserId'] == i].index[:transferItemSize].tolist()
        removeIndex.extend(transferItemIndex)
        for j in transferItemIndex:
            rowList.append([totalTestDf.iloc[j].UserId, totalTestDf.iloc[j].MovieId, totalTestDf.iloc[j].Rating])
totalTestDf.drop(removeIndex, inplace=True)
totalTestDf.reset_index(drop=True, inplace=True)
totalTestDf

Unnamed: 0,UserId,MovieId,Rating
0,3831,3236,0.555556
1,9346,416,1.000000
2,3958,7048,0.222222
3,8807,586,0.777778
4,2024,4725,1.000000
...,...,...,...
249965,5356,4799,0.111111
249966,9730,13943,0.666667
249967,8653,1335,0.555556
249968,1896,1796,0.777778


In [15]:
len(rowList)

254

In [16]:
#Adding unrepresented users to training dataframe 
trainingDf = pd.concat([trainingDf, pd.DataFrame(rowList, columns=['UserId', 'MovieId', 'Rating'])], ignore_index=True)
trainingDf['UserId'] = trainingDf['UserId'].astype(np.int32)
trainingDf['MovieId'] = trainingDf['MovieId'].astype(np.int32)
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,718,3798,0.444444
1,5676,390,0.777778
2,2605,793,0.666667
3,8572,2762,0.000000
4,8354,1519,0.555556
...,...,...,...
750918,9673,166,1.000000
750919,9971,52,1.000000
750920,9971,1145,1.000000
750921,10041,5287,1.000000


In [17]:
#Checking is there at least 1 input for each user in training data
trainingDf['UserId'].unique().shape[0]

10073

In [18]:
#Since represent movies in training dataset more important than represent them in test dataset
#For each unique movie appending training dataframe more row than totalTest dataframe
#Following loop just calculate which rows deleted from totalTestDf, adding rows a list and delete them from totalTestDf
#totalTestDf printing just for check its situation
rowList = []
removeIndex = []
for i in range(movieSize):
    elementTrainingSize = trainingDf[trainingDf['MovieId'] == i].shape[0]
    elementTestSize = totalTestDf[totalTestDf['MovieId'] == i].shape[0]
    if elementTrainingSize < elementTestSize:
        transferItemSize = ((elementTrainingSize + elementTestSize) // 2) + 1 - elementTrainingSize
        transferItemIndex = totalTestDf[totalTestDf['MovieId'] == i].index[:transferItemSize].tolist()
        removeIndex.extend(transferItemIndex)
        for j in transferItemIndex:
            rowList.append([totalTestDf.iloc[j].UserId, totalTestDf.iloc[j].MovieId, totalTestDf.iloc[j].Rating])
totalTestDf.drop(removeIndex, inplace=True)
totalTestDf.reset_index(drop=True, inplace=True)
totalTestDf

Unnamed: 0,UserId,MovieId,Rating
0,3831,3236,0.555556
1,9346,416,1.000000
2,3958,7048,0.222222
3,8807,586,0.777778
4,2024,4725,1.000000
...,...,...,...
247206,5356,4799,0.111111
247207,9730,13943,0.666667
247208,8653,1335,0.555556
247209,1896,1796,0.777778


In [19]:
len(rowList)

2759

In [20]:
#Adding unrepresented movies to training dataframe 
trainingDf = pd.concat([trainingDf, pd.DataFrame(rowList, columns=['UserId', 'MovieId', 'Rating'])], ignore_index=True)
trainingDf['UserId'] = trainingDf['UserId'].astype(np.int32)
trainingDf['MovieId'] = trainingDf['MovieId'].astype(np.int32)
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,718,3798,0.444444
1,5676,390,0.777778
2,2605,793,0.666667
3,8572,2762,0.000000
4,8354,1519,0.555556
...,...,...,...
753677,10026,22012,0.444444
753678,10037,22014,0.666667
753679,10037,22017,0.555556
753680,10037,22019,0.888889


In [21]:
#Shuffle trainingDf
trainingDf = trainingDf.sample(frac=1).reset_index(drop=True)
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,3448,1096,0.666667
1,7042,465,0.777778
2,4214,3691,0.555556
3,9967,767,0.888889
4,324,1410,0.333333
...,...,...,...
753677,571,7554,0.333333
753678,4795,17048,0.333333
753679,3685,1941,0.555556
753680,1366,658,0.555556


In [22]:
#Checking is there at least 1 input for each movie in training data
trainingDf['MovieId'].unique().shape[0]

22033

In [23]:
#totalTest data is splitting as validation data and test data
validationDf, testDf = train_test_split(totalTestDf, test_size=0.5)
validationDf.reset_index(drop=True, inplace = True)
validationDf

Unnamed: 0,UserId,MovieId,Rating
0,7981,215,0.777778
1,8165,3195,0.777778
2,5490,174,0.777778
3,5080,802,0.888889
4,7693,344,0.555556
...,...,...,...
123600,3821,265,0.888889
123601,2074,807,1.000000
123602,3930,1942,0.777778
123603,8844,1395,0.777778


In [24]:
#Resetting Index and Checking test data frame
testDf.reset_index(drop=True, inplace = True)
testDf

Unnamed: 0,UserId,MovieId,Rating
0,2092,1440,0.777778
1,464,4450,0.777778
2,787,617,0.777778
3,3980,376,1.000000
4,3111,165,0.777778
...,...,...,...
123601,338,4367,0.000000
123602,4526,1536,0.777778
123603,8778,950,0.888889
123604,9104,674,0.888889


In [25]:
#Organized data save as pkl file for future uses
trainingDf.to_pickle("../Data/pkl/1M/Qualified/RatingBased/RatedOnly/Training.pkl")
validationDf.to_pickle("../Data/pkl/1M/Qualified/RatingBased/RatedOnly/Validation.pkl")
testDf.to_pickle("../Data/pkl/1M/Qualified/RatingBased/RatedOnly/Test.pkl")

In [26]:
#Reading Qualified RatingBased RatedOnly training data from pkl file
trainingDf = pd.read_pickle("../Data/pkl/1M/Qualified/RatingBased/RatedOnly/Training.pkl")
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,3448,1096,0.666667
1,7042,465,0.777778
2,4214,3691,0.555556
3,9967,767,0.888889
4,324,1410,0.333333
...,...,...,...
753677,571,7554,0.333333
753678,4795,17048,0.333333
753679,3685,1941,0.555556
753680,1366,658,0.555556


In [27]:
#Reading Qualified RatingBased RatedOnly validation data from pkl file
validationDf = pd.read_pickle("../Data/pkl/1M/Qualified/RatingBased/RatedOnly/Validation.pkl")
validationDf

Unnamed: 0,UserId,MovieId,Rating
0,7981,215,0.777778
1,8165,3195,0.777778
2,5490,174,0.777778
3,5080,802,0.888889
4,7693,344,0.555556
...,...,...,...
123600,3821,265,0.888889
123601,2074,807,1.000000
123602,3930,1942,0.777778
123603,8844,1395,0.777778


In [28]:
#Reading Qualified RatingBased RatedOnly test data from pkl file
testDf = pd.read_pickle("../Data/pkl/1M/Qualified/RatingBased/RatedOnly/Test.pkl")
testDf

Unnamed: 0,UserId,MovieId,Rating
0,2092,1440,0.777778
1,464,4450,0.777778
2,787,617,0.777778
3,3980,376,1.000000
4,3111,165,0.777778
...,...,...,...
123601,338,4367,0.000000
123602,4526,1536,0.777778
123603,8778,950,0.888889
123604,9104,674,0.888889


In [29]:
#Generating df that include training, test and validation data
df = pd.concat([trainingDf, validationDf, testDf], ignore_index = True)
df

Unnamed: 0,UserId,MovieId,Rating
0,3448,1096,0.666667
1,7042,465,0.777778
2,4214,3691,0.555556
3,9967,767,0.888889
4,324,1410,0.333333
...,...,...,...
1000888,338,4367,0.000000
1000889,4526,1536,0.777778
1000890,8778,950,0.888889
1000891,9104,674,0.888889


In [30]:
#Checking is there any nan value in df
df.isnull().values.any()

False

In [31]:
#Check if any duplicate user-movie pairs exist in df
df[['UserId', 'MovieId']].duplicated().any()

False

In [32]:
#Calculating number of users in Qualified Data
df['UserId'].unique().shape[0]

10073

In [33]:
#Calculating number of movies in Qualified Data
df['MovieId'].unique().shape[0]

22033

In [34]:
#Reading RatingBased UnratedSample training data from pkl file
trainingDf = pd.read_pickle("../Data/pkl/1M/RatingBased/UnratedSample/Training.pkl")
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,5596,3959,1.0
1,2866,1564,1.0
2,1465,42,0.6
3,2982,862,0.8
4,4795,1443,0.6
...,...,...,...
758170,3387,467,0.4
758171,1371,1092,0.5
758172,5867,781,0.7
758173,8261,1433,0.6


In [35]:
#Reading RatingBased UnratedSample validation data from pkl file
validationDf = pd.read_pickle("../Data/pkl/1M/RatingBased/UnratedSample/Validation.pkl")
validationDf

Unnamed: 0,UserId,MovieId,Rating
0,4378,764,0.7
1,2655,272,0.8
2,4185,2727,1.0
3,5218,2460,0.7
4,6690,225,0.6
...,...,...,...
126358,3161,15608,0.4
126359,2797,338,0.4
126360,5520,1308,0.8
126361,6886,179,0.8


In [36]:
#Reading RatingBased UnratedSample test data from pkl file
testDf = pd.read_pickle("../Data/pkl/1M/RatingBased/UnratedSample/Test.pkl")
testDf

Unnamed: 0,UserId,MovieId,Rating
0,5339,726,0.7
1,6121,389,0.9
2,6344,42,0.6
3,234,3126,1.0
4,1119,253,0.5
...,...,...,...
126358,9435,617,1.0
126359,8615,1065,0.8
126360,5746,18474,0.3
126361,6935,156,0.5


In [37]:
#Generating totalTestDf that include test and validation data
totalTestDf = pd.concat([validationDf, testDf], ignore_index = True)
totalTestDf

Unnamed: 0,UserId,MovieId,Rating
0,4378,764,0.7
1,2655,272,0.8
2,4185,2727,1.0
3,5218,2460,0.7
4,6690,225,0.6
...,...,...,...
252721,9435,617,1.0
252722,8615,1065,0.8
252723,5746,18474,0.3
252724,6935,156,0.5


In [38]:
#Since represent users in training dataset more important than represent them in test dataset
#For each unique user appending training dataframe more row than totalTest dataframe
#Following loop just calculate which rows deleted from totalTestDf, adding rows a list and delete them from totalTestDf
#totalTestDf printing just for check its situation
rowList = []
removeIndex = []
for i in range(userSize):
    elementTrainingSize = trainingDf[trainingDf['UserId'] == i].shape[0]
    elementTestSize = totalTestDf[totalTestDf['UserId'] == i].shape[0]
        
    if elementTrainingSize < elementTestSize:
        transferItemSize = ((elementTrainingSize + elementTestSize) // 2) + 1 - elementTrainingSize
        transferItemIndex = totalTestDf[totalTestDf['UserId'] == i].index[:transferItemSize].tolist()
        removeIndex.extend(transferItemIndex)
        for j in transferItemIndex:
            rowList.append([totalTestDf.iloc[j].UserId, totalTestDf.iloc[j].MovieId, totalTestDf.iloc[j].Rating])
totalTestDf.drop(removeIndex, inplace=True)
totalTestDf.reset_index(drop=True, inplace=True)
totalTestDf

Unnamed: 0,UserId,MovieId,Rating
0,4378,764,0.7
1,2655,272,0.8
2,4185,2727,1.0
3,5218,2460,0.7
4,6690,225,0.6
...,...,...,...
252520,9435,617,1.0
252521,8615,1065,0.8
252522,5746,18474,0.3
252523,6935,156,0.5


In [39]:
len(rowList)

201

In [40]:
#Adding unrepresented users to training dataframe 
trainingDf = pd.concat([trainingDf, pd.DataFrame(rowList, columns=['UserId', 'MovieId', 'Rating'])], ignore_index=True)
trainingDf['UserId'] = trainingDf['UserId'].astype(np.int32)
trainingDf['MovieId'] = trainingDf['MovieId'].astype(np.int32)
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,5596,3959,1.0
1,2866,1564,1.0
2,1465,42,0.6
3,2982,862,0.8
4,4795,1443,0.6
...,...,...,...
758371,9927,101,1.0
758372,9943,1639,0.8
758373,9943,1902,0.6
758374,9968,337,1.0


In [41]:
#Checking is there at least 1 input for each user in training data
trainingDf['UserId'].unique().shape[0]

10073

In [42]:
#Since represent movies in training dataset more important than represent them in test dataset
#For each unique movie appending training dataframe more row than totalTest dataframe
#Following loop just calculate which rows deleted from totalTestDf, adding rows a list and delete them from totalTestDf
#totalTestDf printing just for check its situation
rowList = []
removeIndex = []
for i in range(movieSize):
    elementTrainingSize = trainingDf[trainingDf['MovieId'] == i].shape[0]
    elementTestSize = totalTestDf[totalTestDf['MovieId'] == i].shape[0]
    if elementTrainingSize < elementTestSize:
        transferItemSize = ((elementTrainingSize + elementTestSize) // 2) + 1 - elementTrainingSize
        transferItemIndex = totalTestDf[totalTestDf['MovieId'] == i].index[:transferItemSize].tolist()
        removeIndex.extend(transferItemIndex)
        for j in transferItemIndex:
            rowList.append([totalTestDf.iloc[j].UserId, totalTestDf.iloc[j].MovieId, totalTestDf.iloc[j].Rating])
totalTestDf.drop(removeIndex, inplace=True)
totalTestDf.reset_index(drop=True, inplace=True)
totalTestDf

Unnamed: 0,UserId,MovieId,Rating
0,4378,764,0.7
1,2655,272,0.8
2,4185,2727,1.0
3,5218,2460,0.7
4,6690,225,0.6
...,...,...,...
250193,9435,617,1.0
250194,8615,1065,0.8
250195,5746,18474,0.3
250196,6935,156,0.5


In [43]:
len(rowList)

2327

In [44]:
#Adding unrepresented movies to training dataframe 
trainingDf = pd.concat([trainingDf, pd.DataFrame(rowList, columns=['UserId', 'MovieId', 'Rating'])], ignore_index=True)
trainingDf['UserId'] = trainingDf['UserId'].astype(np.int32)
trainingDf['MovieId'] = trainingDf['MovieId'].astype(np.int32)
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,5596,3959,1.0
1,2866,1564,1.0
2,1465,42,0.6
3,2982,862,0.8
4,4795,1443,0.6
...,...,...,...
760698,10018,22011,0.4
760699,10037,22021,0.6
760700,1404,22027,0.0
760701,10037,22027,0.8


In [45]:
#Shuffle trainingDf
trainingDf = trainingDf.sample(frac=1).reset_index(drop=True)
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,5567,5871,0.6
1,8242,9624,0.1
2,4691,285,0.6
3,6937,2827,0.6
4,6948,4235,1.0
...,...,...,...
760698,939,838,1.0
760699,3757,806,0.4
760700,7723,5315,0.8
760701,4152,1670,0.8


In [46]:
#Checking is there at least 1 input for each movie in training data
trainingDf['MovieId'].unique().shape[0]

22033

In [47]:
#totalTest data is splitting as validation data and test data
validationDf, testDf = train_test_split(totalTestDf, test_size=0.5)
validationDf.reset_index(drop=True, inplace = True)
validationDf

Unnamed: 0,UserId,MovieId,Rating
0,7411,7484,0.8
1,2024,6413,0.5
2,9999,8864,0.8
3,1522,626,0.7
4,4208,566,0.9
...,...,...,...
125094,3440,224,0.6
125095,4940,28,0.7
125096,7697,945,0.8
125097,4064,139,0.6


In [48]:
#Resetting Index and Checking test data frame
testDf.reset_index(drop=True, inplace = True)
testDf

Unnamed: 0,UserId,MovieId,Rating
0,2557,168,1.0
1,7470,242,0.6
2,6915,1400,0.8
3,1066,54,0.4
4,2037,159,0.4
...,...,...,...
125094,5060,2460,0.7
125095,7302,701,0.7
125096,7724,215,0.1
125097,718,1117,0.6


In [49]:
#Organized data save as pkl file for future uses
trainingDf.to_pickle("../Data/pkl/1M/Qualified/RatingBased/UnratedSample/Training.pkl")
validationDf.to_pickle("../Data/pkl/1M/Qualified/RatingBased/UnratedSample/Validation.pkl")
testDf.to_pickle("../Data/pkl/1M/Qualified/RatingBased/UnratedSample/Test.pkl")

In [50]:
#Reading Qualified RatingBased UnratedSample training data from pkl file
trainingDf = pd.read_pickle("../Data/pkl/1M/Qualified/RatingBased/UnratedSample/Training.pkl")
trainingDf

Unnamed: 0,UserId,MovieId,Rating
0,5567,5871,0.6
1,8242,9624,0.1
2,4691,285,0.6
3,6937,2827,0.6
4,6948,4235,1.0
...,...,...,...
760698,939,838,1.0
760699,3757,806,0.4
760700,7723,5315,0.8
760701,4152,1670,0.8


In [51]:
#Reading Qualified RatingBased UnratedSample validation data from pkl file
validationDf = pd.read_pickle("../Data/pkl/1M/Qualified/RatingBased/UnratedSample/Validation.pkl")
validationDf

Unnamed: 0,UserId,MovieId,Rating
0,7411,7484,0.8
1,2024,6413,0.5
2,9999,8864,0.8
3,1522,626,0.7
4,4208,566,0.9
...,...,...,...
125094,3440,224,0.6
125095,4940,28,0.7
125096,7697,945,0.8
125097,4064,139,0.6


In [52]:
#Reading Qualified RatingBased UnratedSample test data from pkl file
testDf = pd.read_pickle("../Data/pkl/1M/Qualified/RatingBased/UnratedSample/Test.pkl")
testDf

Unnamed: 0,UserId,MovieId,Rating
0,2557,168,1.0
1,7470,242,0.6
2,6915,1400,0.8
3,1066,54,0.4
4,2037,159,0.4
...,...,...,...
125094,5060,2460,0.7
125095,7302,701,0.7
125096,7724,215,0.1
125097,718,1117,0.6


In [53]:
#Generating df that include training, test and validation data
df = pd.concat([trainingDf, validationDf, testDf], ignore_index = True)
df

Unnamed: 0,UserId,MovieId,Rating
0,5567,5871,0.6
1,8242,9624,0.1
2,4691,285,0.6
3,6937,2827,0.6
4,6948,4235,1.0
...,...,...,...
1010896,5060,2460,0.7
1010897,7302,701,0.7
1010898,7724,215,0.1
1010899,718,1117,0.6


In [54]:
#Checking is there any nan value in df
df.isnull().values.any()

False

In [55]:
#Check if any duplicate user-movie pairs exist in df
df[['UserId', 'MovieId']].duplicated().any()

False

In [56]:
#Calculating number of users in Qualified Data
df['UserId'].unique().shape[0]

10073

In [57]:
#Calculating number of movies in Qualified Data
df['MovieId'].unique().shape[0]

22033