In [259]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from IPython.display import display
from sklearn.pipeline import Pipeline
import category_encoders as ce

import sys
sys.path.insert(1, '../')

from src import preprocessing as pp
from src import analysis

# Enable module reloading
%load_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', None)

plt.rcParams['figure.figsize'] = (10, 6)
%config InlineBackend.figure_format='retina'
plt.rcParams.update({'font.size': 15})

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [261]:
pp.ExportSimilarBooksRating

src.preprocessing.transform_column.ExportSimilarBooksRating

In [3]:
from sklearn.base import TransformerMixin
import json
from sklearn.model_selection import train_test_split


# Data preprocessing

In this notebook, we are going to preprocess our data, so we can use them during training.

## Load datasets

In [93]:
books = pd.read_csv('../data/books.csv')
reviews = pd.read_csv('../data/reviews.csv')
# intr = pd.read_csv('../data/interactions.csv')
authors = pd.read_csv('../data/authors.csv')

## Merge data

In [12]:
data = pd.merge(books, reviews , on='book_id')

In [13]:
data.head(3)

Unnamed: 0,isbn,text_reviews_count,series,country_code,language_code,popular_shelves,asin,is_ebook,average_rating,kindle_asin,similar_books,description,format,link,authors,publisher,num_pages,publication_day,isbn13,publication_month,edition_information,publication_year,url,image_url,book_id,ratings_count,work_id,title,title_without_series,user_id,review_id,rating,review_text,date_added,date_updated,read_at,started_at,n_votes,n_comments
0,,1,['147734'],US,,"[{'count': '1057', 'name': 'to-read'}, {'count...",B0056A00P4,True,4.04,B0056A00P4,"['519546', '1295074', '21407416']",This is the final tale in the bestselling auth...,,https://www.goodreads.com/book/show/12182387-t...,"[{'author_id': '50873', 'role': ''}, {'author_...",,,,,,,,https://www.goodreads.com/book/show/12182387-t...,https://s.gr-assets.com/assets/nophoto/book/11...,12182387,4,285263,"The Passion (Dark Visions, #3)","The Passion (Dark Visions, #3)",8a6085f339853bb493a8341f0d7e3bdf,fc61f4a89afd084140b9ea2090e552e2,5,Nachdem Gabriel und Lydia verschwunden sind un...,Fri Sep 09 12:32:42 -0700 2011,Wed Oct 26 11:11:47 -0700 2011,Tue Oct 25 00:00:00 -0700 2011,Sun Oct 23 00:00:00 -0700 2011,0,0
1,,2,['425995'],US,,"[{'count': '1010', 'name': 'to-read'}, {'count...",B006KLYIAG,True,3.8,B006KLYIAG,"['13400912', '13327517', '18107102', '15797097...",Life should be simple for Cassie.\nFor the sma...,,https://www.goodreads.com/book/show/20135365-h...,"[{'author_id': '5395324', 'role': ''}]",,,,,,,,https://www.goodreads.com/book/show/20135365-h...,https://s.gr-assets.com/assets/nophoto/book/11...,20135365,5,18450480,Hope's Daughter,Hope's Daughter,c7cafc5c262441aaa9fc8c816dcd20d5,9f5ee0e6211043932bcb46793222c2f6,4,I received this book from the author in exchan...,Wed Apr 04 11:09:28 -0700 2012,Thu Mar 20 11:44:09 -0700 2014,Thu Mar 20 11:44:09 -0700 2014,Wed Mar 19 00:00:00 -0700 2014,2,0
2,698143760.0,17,['493993'],US,,"[{'count': '1799', 'name': 'fantasy'}, {'count...",,True,3.8,,"['15728807', '17182499', '15673520', '16081758...",Wanted by no one.\nHunted by everyone.\nSixtee...,ebook,https://www.goodreads.com/book/show/21401181-h...,"[{'author_id': '7314532', 'role': ''}]",Viking Children's,416.0,4.0,9780698143760.0,3.0,,2014.0,https://www.goodreads.com/book/show/21401181-h...,https://images.gr-assets.com/books/1394747643m...,21401181,33,24802827,"Half Bad (Half Life, #1)","Half Bad (Half Life, #1)",88d99966e16ad22b3d824758e41bdc31,d1c5a076f2433511b239dd4745d88f99,4,Dark story about a young soon to be witch boy....,Fri Mar 24 04:36:25 -0700 2017,Mon Mar 27 13:28:45 -0700 2017,Mon Mar 27 13:28:45 -0700 2017,Fri Mar 24 04:36:26 -0700 2017,0,0


## Train/Test split

In [14]:
drop_cols = ['rating']

X = data.drop(drop_cols, axis=1)

In [15]:
y = data['rating']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

## Create preprocessing pipeline

### books
In this section, we will try to preprocess data from book dataset.

Problems needed to be solved by preprocessing are:
- drop columns that does not contain any useful information for our task (these are also columns that contain most of the missing values)
- replace authors columns with the average rating of authors
- export shelves (by default 'favorites', 'currently-reading', 'to-read', or any other) with number of votes
- replace missing values in columns, we are planning to use later
- normalize and scale the numeric atributes

### Reviews

In this section, we will try to preprocess data from reviews dataset.

Problems needed to be solved by preprocessing are:
- drop columns, that does not contain any usefull information
- since we found out during analysis that longest reviews contain lots of useless data, we will set trashold for max length of review (in number of words)
- get rid of reviews that are not in english
- remove urls from reviews
- remove other special characters

In [21]:
X_train.columns

Index(['isbn', 'text_reviews_count', 'series', 'country_code', 'language_code',
       'popular_shelves', 'asin', 'is_ebook', 'average_rating', 'kindle_asin',
       'similar_books', 'description', 'format', 'link', 'authors',
       'publisher', 'num_pages', 'publication_day', 'isbn13',
       'publication_month', 'edition_information', 'publication_year', 'url',
       'image_url', 'book_id', 'ratings_count', 'work_id', 'title',
       'title_without_series', 'user_id', 'review_id', 'review_text',
       'date_added', 'date_updated', 'read_at', 'started_at', 'n_votes',
       'n_comments'],
      dtype='object')

In [253]:
X_train.shape

(1601233, 38)

In [262]:
drop_cols1 = ['isbn', 'series', 'country_code', 'language_code',
              'asin', 'kindle_asin',
              'description', 
              'format',
              'link',
              'publisher',
              'num_pages',
              'publication_day', 'isbn13', 'publication_month',
              'edition_information',
              'publication_year', 'url', 'image_url',
              'title',
              'title_without_series',
              'date_added', 'date_updated', 'read_at', 'started_at',
              'n_votes',
              'n_comments'
             ]

# drop_cols2 = ['popular_shelves', 'authors']

tags = ['favorites', 'currently-reading', 'to-read']

encoder = ce.OneHotEncoder()

ppl = Pipeline([
    ('DropUnusedCols1', pp.DropColumns(drop_cols1)),
    ('SelectTopNPercentileOfBooks', pp.SelectBooksWithNPercentile('text_reviews_count', 0.9)),
    ('ExportAuthorsAverageRating', pp.ExportAuthorsAverageRating('authors', 'authors_average_rating', authors)),
    ('ExportSimilarBooksAverageRating', pp.ExportSimilarBooksRating(
        books[['book_id','average_rating']], 'similar_books', 'sim_books_average_rating')),
    ('ExtraxtPopularShelves',pp.ExportBookShelves('popular_shelves', tags)),
    ('EmptyValuesFilter', pp.EmptyValuesFilter(['review_text'])),
    ('TextPreprocessor', pp.TextPreprocessor('review_text')),
    ('ReviewLengthFilter', pp.ReviewLengthFilter('review_text', 0, 2000)),
    ('ReviewsLanguageFilter', pp.ReviewsLanguageFilter('review_text', 'en')),
    ('DropUnusedCols2', pp.DropColumns(drop_cols2)),
#     ('EncodeCategories', pp.EncodeCategories(encoder))
])

model = ppl.fit(X_train.iloc[:5000])

(fit) Drop columns: ['isbn', 'series', 'country_code', 'language_code', 'asin', 'kindle_asin', 'description', 'format', 'link', 'publisher', 'num_pages', 'publication_day', 'isbn13', 'publication_month', 'edition_information', 'publication_year', 'url', 'image_url', 'title', 'title_without_series', 'date_added', 'date_updated', 'read_at', 'started_at', 'n_votes', 'n_comments']
(transform) Drop columns: ['isbn', 'series', 'country_code', 'language_code', 'asin', 'kindle_asin', 'description', 'format', 'link', 'publisher', 'num_pages', 'publication_day', 'isbn13', 'publication_month', 'edition_information', 'publication_year', 'url', 'image_url', 'title', 'title_without_series', 'date_added', 'date_updated', 'read_at', 'started_at', 'n_votes', 'n_comments']
(fit) Select books with: text_reviews_count >= 22363.0
(transform) Select books with: text_reviews_count >= 22363.0
(fit) Export authors average rating
(transform) Export authors average rating
(fit) Export similar books average ratin

In [263]:
pX_train = ppl.transform(X_train.iloc[:5000])

(transform) Drop columns: ['isbn', 'series', 'country_code', 'language_code', 'asin', 'kindle_asin', 'description', 'format', 'link', 'publisher', 'num_pages', 'publication_day', 'isbn13', 'publication_month', 'edition_information', 'publication_year', 'url', 'image_url', 'title', 'title_without_series', 'date_added', 'date_updated', 'read_at', 'started_at', 'n_votes', 'n_comments']
(transform) Select books with: text_reviews_count >= 22363.0
(transform) Export authors average rating
(transform) Export similar books average rating
(transform) ExportBookShelves, tag_col: popular_shelves, tags:['favorites', 'currently-reading', 'to-read']
(transform) Empty values filter
(transform) Text preprocessing
(transform) Review length filter
(transform) Reviews language filter
(transform) Drop columns: ['popular_shelves', 'authors']
(transform) Category encoder OneHotEncoder(cols=['similar_books', 'user_id', 'review_id', 'review_text'],
              drop_invariant=False, handle_missing='value',


In [265]:
pX_train.head()

Unnamed: 0,text_reviews_count,is_ebook,average_rating,similar_books_1,similar_books_2,similar_books_3,similar_books_4,similar_books_5,similar_books_6,similar_books_7,similar_books_8,similar_books_9,similar_books_10,similar_books_11,similar_books_12,similar_books_13,similar_books_14,similar_books_15,similar_books_16,similar_books_17,similar_books_18,similar_books_19,similar_books_20,similar_books_21,similar_books_22,similar_books_23,similar_books_24,similar_books_25,similar_books_26,similar_books_27,similar_books_28,similar_books_29,book_id,ratings_count,work_id,user_id_1,user_id_2,user_id_3,user_id_4,user_id_5,user_id_6,user_id_7,user_id_8,user_id_9,user_id_10,user_id_11,user_id_12,user_id_13,user_id_14,user_id_15,user_id_16,user_id_17,user_id_18,user_id_19,user_id_20,user_id_21,user_id_22,user_id_23,user_id_24,user_id_25,user_id_26,user_id_27,user_id_28,user_id_29,user_id_30,user_id_31,user_id_32,user_id_33,user_id_34,user_id_35,user_id_36,user_id_37,user_id_38,user_id_39,user_id_40,user_id_41,user_id_42,user_id_43,user_id_44,user_id_45,user_id_46,user_id_47,user_id_48,user_id_49,user_id_50,user_id_51,user_id_52,user_id_53,user_id_54,user_id_55,user_id_56,user_id_57,user_id_58,user_id_59,user_id_60,user_id_61,user_id_62,user_id_63,user_id_64,user_id_65,user_id_66,user_id_67,user_id_68,user_id_69,user_id_70,user_id_71,user_id_72,user_id_73,user_id_74,user_id_75,user_id_76,user_id_77,user_id_78,user_id_79,user_id_80,user_id_81,user_id_82,user_id_83,user_id_84,user_id_85,user_id_86,user_id_87,user_id_88,user_id_89,user_id_90,user_id_91,user_id_92,user_id_93,user_id_94,user_id_95,user_id_96,user_id_97,user_id_98,user_id_99,user_id_100,user_id_101,user_id_102,user_id_103,user_id_104,user_id_105,user_id_106,user_id_107,user_id_108,user_id_109,user_id_110,user_id_111,user_id_112,user_id_113,user_id_114,user_id_115,user_id_116,user_id_117,user_id_118,user_id_119,user_id_120,user_id_121,user_id_122,user_id_123,user_id_124,user_id_125,user_id_126,user_id_127,user_id_128,user_id_129,user_id_130,user_id_131,user_id_132,user_id_133,user_id_134,user_id_135,user_id_136,user_id_137,user_id_138,user_id_139,user_id_140,user_id_141,user_id_142,user_id_143,user_id_144,user_id_145,user_id_146,user_id_147,user_id_148,user_id_149,user_id_150,user_id_151,user_id_152,user_id_153,user_id_154,user_id_155,user_id_156,user_id_157,user_id_158,user_id_159,user_id_160,user_id_161,user_id_162,user_id_163,user_id_164,user_id_165,user_id_166,user_id_167,user_id_168,user_id_169,user_id_170,user_id_171,user_id_172,user_id_173,user_id_174,user_id_175,user_id_176,user_id_177,user_id_178,user_id_179,user_id_180,user_id_181,user_id_182,user_id_183,user_id_184,user_id_185,user_id_186,user_id_187,user_id_188,user_id_189,user_id_190,user_id_191,user_id_192,user_id_193,user_id_194,user_id_195,user_id_196,user_id_197,user_id_198,user_id_199,user_id_200,user_id_201,user_id_202,user_id_203,user_id_204,user_id_205,user_id_206,user_id_207,user_id_208,user_id_209,user_id_210,user_id_211,user_id_212,user_id_213,user_id_214,user_id_215,user_id_216,user_id_217,user_id_218,user_id_219,user_id_220,user_id_221,user_id_222,user_id_223,user_id_224,user_id_225,user_id_226,user_id_227,user_id_228,user_id_229,user_id_230,user_id_231,user_id_232,user_id_233,user_id_234,user_id_235,user_id_236,user_id_237,user_id_238,user_id_239,user_id_240,user_id_241,user_id_242,user_id_243,user_id_244,user_id_245,user_id_246,user_id_247,user_id_248,user_id_249,user_id_250,user_id_251,user_id_252,user_id_253,user_id_254,user_id_255,user_id_256,user_id_257,user_id_258,user_id_259,user_id_260,user_id_261,user_id_262,user_id_263,user_id_264,user_id_265,user_id_266,user_id_267,user_id_268,user_id_269,user_id_270,user_id_271,user_id_272,user_id_273,user_id_274,user_id_275,user_id_276,user_id_277,user_id_278,user_id_279,user_id_280,user_id_281,user_id_282,user_id_283,user_id_284,user_id_285,user_id_286,user_id_287,user_id_288,user_id_289,user_id_290,user_id_291,user_id_292,user_id_293,user_id_294,user_id_295,user_id_296,user_id_297,user_id_298,user_id_299,user_id_300,user_id_301,user_id_302,user_id_303,user_id_304,user_id_305,user_id_306,user_id_307,user_id_308,user_id_309,user_id_310,user_id_311,user_id_312,user_id_313,user_id_314,user_id_315,user_id_316,user_id_317,user_id_318,user_id_319,user_id_320,user_id_321,user_id_322,user_id_323,user_id_324,user_id_325,user_id_326,user_id_327,user_id_328,user_id_329,user_id_330,user_id_331,user_id_332,user_id_333,user_id_334,user_id_335,user_id_336,user_id_337,user_id_338,user_id_339,user_id_340,user_id_341,user_id_342,user_id_343,user_id_344,user_id_345,user_id_346,user_id_347,user_id_348,user_id_349,user_id_350,user_id_351,user_id_352,user_id_353,user_id_354,user_id_355,user_id_356,user_id_357,user_id_358,user_id_359,user_id_360,user_id_361,user_id_362,user_id_363,user_id_364,user_id_365,user_id_366,user_id_367,user_id_368,user_id_369,user_id_370,user_id_371,user_id_372,user_id_373,user_id_374,user_id_375,user_id_376,user_id_377,user_id_378,user_id_379,user_id_380,user_id_381,user_id_382,user_id_383,user_id_384,user_id_385,user_id_386,user_id_387,user_id_388,user_id_389,user_id_390,user_id_391,user_id_392,user_id_393,user_id_394,user_id_395,user_id_396,user_id_397,user_id_398,user_id_399,user_id_400,user_id_401,user_id_402,user_id_403,user_id_404,user_id_405,user_id_406,user_id_407,user_id_408,user_id_409,user_id_410,user_id_411,user_id_412,user_id_413,user_id_414,user_id_415,user_id_416,user_id_417,user_id_418,user_id_419,user_id_420,user_id_421,user_id_422,user_id_423,user_id_424,user_id_425,user_id_426,user_id_427,user_id_428,user_id_429,user_id_430,user_id_431,user_id_432,user_id_433,user_id_434,user_id_435,user_id_436,user_id_437,user_id_438,user_id_439,user_id_440,user_id_441,user_id_442,user_id_443,user_id_444,user_id_445,user_id_446,user_id_447,user_id_448,user_id_449,user_id_450,user_id_451,user_id_452,user_id_453,user_id_454,user_id_455,user_id_456,user_id_457,user_id_458,user_id_459,user_id_460,user_id_461,user_id_462,user_id_463,user_id_464,user_id_465,user_id_466,user_id_467,user_id_468,user_id_469,user_id_470,review_id_1,review_id_2,review_id_3,review_id_4,review_id_5,review_id_6,review_id_7,review_id_8,review_id_9,review_id_10,review_id_11,review_id_12,review_id_13,review_id_14,review_id_15,review_id_16,review_id_17,review_id_18,review_id_19,review_id_20,review_id_21,review_id_22,review_id_23,review_id_24,review_id_25,review_id_26,review_id_27,review_id_28,review_id_29,review_id_30,review_id_31,review_id_32,review_id_33,review_id_34,review_id_35,review_id_36,review_id_37,review_id_38,review_id_39,review_id_40,review_id_41,review_id_42,review_id_43,review_id_44,review_id_45,review_id_46,review_id_47,review_id_48,review_id_49,review_id_50,review_id_51,review_id_52,review_id_53,review_id_54,review_id_55,review_id_56,review_id_57,review_id_58,review_id_59,review_id_60,review_id_61,review_id_62,review_id_63,review_id_64,review_id_65,review_id_66,review_id_67,review_id_68,review_id_69,review_id_70,review_id_71,review_id_72,review_id_73,review_id_74,review_id_75,review_id_76,review_id_77,review_id_78,review_id_79,review_id_80,review_id_81,review_id_82,review_id_83,review_id_84,review_id_85,review_id_86,review_id_87,review_id_88,review_id_89,review_id_90,review_id_91,review_id_92,review_id_93,review_id_94,review_id_95,review_id_96,review_id_97,review_id_98,review_id_99,review_id_100,review_id_101,review_id_102,review_id_103,review_id_104,review_id_105,review_id_106,review_id_107,review_id_108,review_id_109,review_id_110,review_id_111,review_id_112,review_id_113,review_id_114,review_id_115,review_id_116,review_id_117,review_id_118,review_id_119,review_id_120,review_id_121,review_id_122,review_id_123,review_id_124,review_id_125,review_id_126,review_id_127,review_id_128,review_id_129,review_id_130,review_id_131,review_id_132,review_id_133,review_id_134,review_id_135,review_id_136,review_id_137,review_id_138,review_id_139,review_id_140,review_id_141,review_id_142,review_id_143,review_id_144,review_id_145,review_id_146,review_id_147,review_id_148,review_id_149,review_id_150,review_id_151,review_id_152,review_id_153,review_id_154,review_id_155,review_id_156,review_id_157,review_id_158,review_id_159,review_id_160,review_id_161,review_id_162,review_id_163,review_id_164,review_id_165,review_id_166,review_id_167,review_id_168,review_id_169,review_id_170,review_id_171,review_id_172,review_id_173,review_id_174,review_id_175,review_id_176,review_id_177,review_id_178,review_id_179,review_id_180,review_id_181,review_id_182,review_id_183,review_id_184,review_id_185,review_id_186,review_id_187,review_id_188,review_id_189,review_id_190,review_id_191,review_id_192,review_id_193,review_id_194,review_id_195,review_id_196,review_id_197,review_id_198,review_id_199,review_id_200,review_id_201,review_id_202,review_id_203,review_id_204,review_id_205,review_id_206,review_id_207,review_id_208,review_id_209,review_id_210,review_id_211,review_id_212,review_id_213,review_id_214,review_id_215,review_id_216,review_id_217,review_id_218,review_id_219,review_id_220,review_id_221,review_id_222,review_id_223,review_id_224,review_id_225,review_id_226,review_id_227,review_id_228,review_id_229,review_id_230,review_id_231,review_id_232,review_id_233,review_id_234,review_id_235,review_id_236,review_id_237,review_id_238,review_id_239,review_id_240,review_id_241,review_id_242,review_id_243,review_id_244,review_id_245,review_id_246,review_id_247,review_id_248,review_id_249,review_id_250,review_id_251,review_id_252,review_id_253,review_id_254,review_id_255,review_id_256,review_id_257,review_id_258,review_id_259,review_id_260,review_id_261,review_id_262,review_id_263,review_id_264,review_id_265,review_id_266,review_id_267,review_id_268,review_id_269,review_id_270,review_id_271,review_id_272,review_id_273,review_id_274,review_id_275,review_id_276,review_id_277,review_id_278,review_id_279,review_id_280,review_id_281,review_id_282,review_id_283,review_id_284,review_id_285,review_id_286,review_id_287,review_id_288,review_id_289,review_id_290,review_id_291,review_id_292,review_id_293,review_id_294,review_id_295,review_id_296,review_id_297,review_id_298,review_id_299,review_id_300,review_id_301,review_id_302,review_id_303,review_id_304,review_id_305,review_id_306,review_id_307,review_id_308,review_id_309,review_id_310,review_id_311,review_id_312,review_id_313,review_id_314,review_id_315,review_id_316,review_id_317,review_id_318,review_id_319,review_id_320,review_id_321,review_id_322,review_id_323,review_id_324,review_id_325,review_id_326,review_id_327,review_id_328,review_id_329,review_id_330,review_id_331,review_id_332,review_id_333,review_id_334,review_id_335,review_id_336,review_id_337,review_id_338,review_id_339,review_id_340,review_id_341,review_id_342,review_id_343,review_id_344,review_id_345,review_id_346,review_id_347,review_id_348,review_id_349,review_id_350,review_id_351,review_id_352,review_id_353,review_id_354,review_id_355,review_id_356,review_id_357,review_id_358,review_id_359,review_id_360,review_id_361,review_id_362,review_id_363,review_id_364,review_id_365,review_id_366,review_id_367,review_id_368,review_id_369,review_id_370,review_id_371,review_id_372,review_id_373,review_id_374,review_id_375,review_id_376,review_id_377,review_id_378,review_id_379,review_id_380,review_id_381,review_id_382,review_id_383,review_id_384,review_id_385,review_id_386,review_id_387,review_id_388,review_id_389,review_id_390,review_id_391,review_id_392,review_id_393,review_id_394,review_id_395,review_id_396,review_id_397,review_id_398,review_id_399,review_id_400,review_id_401,review_id_402,review_id_403,review_id_404,review_id_405,review_id_406,review_id_407,review_id_408,review_id_409,review_id_410,review_id_411,review_id_412,review_id_413,review_id_414,review_id_415,review_id_416,review_id_417,review_id_418,review_id_419,review_id_420,review_id_421,review_id_422,review_id_423,review_id_424,review_id_425,review_id_426,review_id_427,review_id_428,review_id_429,review_id_430,review_id_431,review_id_432,review_id_433,review_id_434,review_id_435,review_id_436,review_id_437,review_id_438,review_id_439,review_id_440,review_id_441,review_id_442,review_id_443,review_id_444,review_id_445,review_id_446,review_id_447,review_id_448,review_id_449,review_id_450,review_id_451,review_id_452,review_id_453,review_id_454,review_id_455,review_id_456,review_id_457,review_id_458,review_id_459,review_id_460,review_id_461,review_id_462,review_id_463,review_id_464,review_id_465,review_id_466,review_id_467,review_id_468,review_id_469,review_id_470,review_id_471,review_id_472,review_text_1,review_text_2,review_text_3,review_text_4,review_text_5,review_text_6,review_text_7,review_text_8,review_text_9,review_text_10,review_text_11,review_text_12,review_text_13,review_text_14,review_text_15,review_text_16,review_text_17,review_text_18,review_text_19,review_text_20,review_text_21,review_text_22,review_text_23,review_text_24,review_text_25,review_text_26,review_text_27,review_text_28,review_text_29,review_text_30,review_text_31,review_text_32,review_text_33,review_text_34,review_text_35,review_text_36,review_text_37,review_text_38,review_text_39,review_text_40,review_text_41,review_text_42,review_text_43,review_text_44,review_text_45,review_text_46,review_text_47,review_text_48,review_text_49,review_text_50,review_text_51,review_text_52,review_text_53,review_text_54,review_text_55,review_text_56,review_text_57,review_text_58,review_text_59,review_text_60,review_text_61,review_text_62,review_text_63,review_text_64,review_text_65,review_text_66,review_text_67,review_text_68,review_text_69,review_text_70,review_text_71,review_text_72,review_text_73,review_text_74,review_text_75,review_text_76,review_text_77,review_text_78,review_text_79,review_text_80,review_text_81,review_text_82,review_text_83,review_text_84,review_text_85,review_text_86,review_text_87,review_text_88,review_text_89,review_text_90,review_text_91,review_text_92,review_text_93,review_text_94,review_text_95,review_text_96,review_text_97,review_text_98,review_text_99,review_text_100,review_text_101,review_text_102,review_text_103,review_text_104,review_text_105,review_text_106,review_text_107,review_text_108,review_text_109,review_text_110,review_text_111,review_text_112,review_text_113,review_text_114,review_text_115,review_text_116,review_text_117,review_text_118,review_text_119,review_text_120,review_text_121,review_text_122,review_text_123,review_text_124,review_text_125,review_text_126,review_text_127,review_text_128,review_text_129,review_text_130,review_text_131,review_text_132,review_text_133,review_text_134,review_text_135,review_text_136,review_text_137,review_text_138,review_text_139,review_text_140,review_text_141,review_text_142,review_text_143,review_text_144,review_text_145,review_text_146,review_text_147,review_text_148,review_text_149,review_text_150,review_text_151,review_text_152,review_text_153,review_text_154,review_text_155,review_text_156,review_text_157,review_text_158,review_text_159,review_text_160,review_text_161,review_text_162,review_text_163,review_text_164,review_text_165,review_text_166,review_text_167,review_text_168,review_text_169,review_text_170,review_text_171,review_text_172,review_text_173,review_text_174,review_text_175,review_text_176,review_text_177,review_text_178,review_text_179,review_text_180,review_text_181,review_text_182,review_text_183,review_text_184,review_text_185,review_text_186,review_text_187,review_text_188,review_text_189,review_text_190,review_text_191,review_text_192,review_text_193,review_text_194,review_text_195,review_text_196,review_text_197,review_text_198,review_text_199,review_text_200,review_text_201,review_text_202,review_text_203,review_text_204,review_text_205,review_text_206,review_text_207,review_text_208,review_text_209,review_text_210,review_text_211,review_text_212,review_text_213,review_text_214,review_text_215,review_text_216,review_text_217,review_text_218,review_text_219,review_text_220,review_text_221,review_text_222,review_text_223,review_text_224,review_text_225,review_text_226,review_text_227,review_text_228,review_text_229,review_text_230,review_text_231,review_text_232,review_text_233,review_text_234,review_text_235,review_text_236,review_text_237,review_text_238,review_text_239,review_text_240,review_text_241,review_text_242,review_text_243,review_text_244,review_text_245,review_text_246,review_text_247,review_text_248,review_text_249,review_text_250,review_text_251,review_text_252,review_text_253,review_text_254,review_text_255,review_text_256,review_text_257,review_text_258,review_text_259,review_text_260,review_text_261,review_text_262,review_text_263,review_text_264,review_text_265,review_text_266,review_text_267,review_text_268,review_text_269,review_text_270,review_text_271,review_text_272,review_text_273,review_text_274,review_text_275,review_text_276,review_text_277,review_text_278,review_text_279,review_text_280,review_text_281,review_text_282,review_text_283,review_text_284,review_text_285,review_text_286,review_text_287,review_text_288,review_text_289,review_text_290,review_text_291,review_text_292,review_text_293,review_text_294,review_text_295,review_text_296,review_text_297,review_text_298,review_text_299,review_text_300,review_text_301,review_text_302,review_text_303,review_text_304,review_text_305,review_text_306,review_text_307,review_text_308,review_text_309,review_text_310,review_text_311,review_text_312,review_text_313,review_text_314,review_text_315,review_text_316,review_text_317,review_text_318,review_text_319,review_text_320,review_text_321,review_text_322,review_text_323,review_text_324,review_text_325,review_text_326,review_text_327,review_text_328,review_text_329,review_text_330,review_text_331,review_text_332,review_text_333,review_text_334,review_text_335,review_text_336,review_text_337,review_text_338,review_text_339,review_text_340,review_text_341,review_text_342,review_text_343,review_text_344,review_text_345,review_text_346,review_text_347,review_text_348,review_text_349,review_text_350,review_text_351,review_text_352,review_text_353,review_text_354,review_text_355,review_text_356,review_text_357,review_text_358,review_text_359,review_text_360,review_text_361,review_text_362,review_text_363,review_text_364,review_text_365,review_text_366,review_text_367,review_text_368,review_text_369,review_text_370,review_text_371,review_text_372,review_text_373,review_text_374,review_text_375,review_text_376,review_text_377,review_text_378,review_text_379,review_text_380,review_text_381,review_text_382,review_text_383,review_text_384,review_text_385,review_text_386,review_text_387,review_text_388,review_text_389,review_text_390,review_text_391,review_text_392,review_text_393,review_text_394,review_text_395,review_text_396,review_text_397,review_text_398,review_text_399,review_text_400,review_text_401,review_text_402,review_text_403,review_text_404,review_text_405,review_text_406,review_text_407,review_text_408,review_text_409,review_text_410,review_text_411,review_text_412,review_text_413,review_text_414,review_text_415,review_text_416,review_text_417,review_text_418,review_text_419,review_text_420,review_text_421,review_text_422,review_text_423,review_text_424,review_text_425,review_text_426,review_text_427,review_text_428,review_text_429,review_text_430,review_text_431,review_text_432,review_text_433,review_text_434,review_text_435,review_text_436,review_text_437,review_text_438,review_text_439,review_text_440,review_text_441,review_text_442,review_text_443,review_text_444,review_text_445,review_text_446,review_text_447,review_text_448,review_text_449,review_text_450,review_text_451,review_text_452,review_text_453,review_text_454,review_text_455,review_text_456,review_text_457,review_text_458,review_text_459,review_text_460,review_text_461,review_text_462,review_text_463,review_text_464,review_text_465,review_text_466,review_text_467,review_text_468,review_text_469,review_text_470,review_text_471,review_text_472,authors_average_rating,sim_books_average_rating,favorites,currently-reading,to-read
1703631,40116,False,4.21,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22628,906322,2236198,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.21,3.876667,23667,15217,427726
1700061,32909,False,3.69,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,428263,1146155,2675454,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.64,4.084444,6604,6104,1296
2313973,68482,False,4.23,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13335037,1962813,13155899,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.09,4.142778,28751,29031,281
612566,31536,False,4.43,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11387515,255461,16319487,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.42,4.203333,6251,16522,263587
616703,31536,False,4.43,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11387515,255461,16319487,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.42,4.203333,6251,16522,263587


In [10]:
drop_cols1 = ['date_added', 'date_updated', 'read_at', 'started_at']

reviews_ppl = Pipeline([
    ('DropUnusedCols1', pp.DropColumns(drop_cols1)),
    ('EmptyValuesFilter', pp.EmptyValuesFilter(['review_text'])),
    ('TextPreprocessor', pp.TextPreprocessor('review_text')),
    ('ReviewLengthFilter', pp.ReviewLengthFilter('review_text', 0, 2000)),
    ('ReviewsLanguageFilter', pp.ReviewsLanguageFilter('review_text', 'en'))
])

model = reviews_ppl.fit(reviews)

(fit) Drop columns: ['date_added', 'date_updated', 'read_at', 'started_at']
(transform) Drop columns: ['date_added', 'date_updated', 'read_at', 'started_at']
(fit) Empty values filter
(transform) Empty values filter
(fit) Text preprocessing
(transform) Text preprocessing
(fit) Review length filter
(transform) Review length filter
(fit) Reviews language filter


In [11]:
reviews_transformed = reviews_ppl.transform(reviews)

(transform) Drop columns: ['date_added', 'date_updated', 'read_at', 'started_at']
(transform) Empty values filter
(transform) Text preprocessing
(transform) Review length filter
(transform) Reviews language filter


LangDetectException: No features in text.

In [None]:
reviews_transformed.head(3)