In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow
from collections import defaultdict

In [26]:
df = pd.read_csv("/content/book_data.csv")
df.head()

Unnamed: 0,book_authors,book_desc,book_edition,book_format,book_isbn,book_pages,book_rating,book_rating_count,book_review_count,book_title,genres,image_url
0,Suzanne Collins,Winning will make you famous. Losing means cer...,,Hardcover,9780440000000.0,374 pages,4.33,5519135,160706,The Hunger Games,Young Adult|Fiction|Science Fiction|Dystopia|F...,https://images.gr-assets.com/books/1447303603l...
1,J.K. Rowling|Mary GrandPré,There is a door at the end of a silent corrido...,US Edition,Paperback,9780440000000.0,870 pages,4.48,2041594,33264,Harry Potter and the Order of the Phoenix,Fantasy|Young Adult|Fiction,https://images.gr-assets.com/books/1255614970l...
2,Harper Lee,The unforgettable novel of a childhood in a sl...,50th Anniversary,Paperback,9780060000000.0,324 pages,4.27,3745197,79450,To Kill a Mockingbird,Classics|Fiction|Historical|Historical Fiction...,https://images.gr-assets.com/books/1361975680l...
3,Jane Austen|Anna Quindlen|Mrs. Oliphant|George...,«È cosa ormai risaputa che a uno scapolo in po...,"Modern Library Classics, USA / CAN",Paperback,9780680000000.0,279 pages,4.25,2453620,54322,Pride and Prejudice,Classics|Fiction|Romance,https://images.gr-assets.com/books/1320399351l...
4,Stephenie Meyer,About three things I was absolutely positive.F...,,Paperback,9780320000000.0,498 pages,3.58,4281268,97991,Twilight,Young Adult|Fantasy|Romance|Paranormal|Vampire...,https://images.gr-assets.com/books/1361039443l...


In [27]:
df.dropna(subset=["genres", "book_desc"], inplace=True)

In [28]:
data = df.copy()

Calculating the frequency of each genre

In [29]:
def genre_count(x):
    try:
        return len(x.split('|'))
    except:
        return 0

data['genre_count'] = data['genres'].map(lambda x: genre_count(x))

make a genre columns into a list of all genres

In [30]:
def genre_listing(x):
    try:
        lst = [genre for genre in x.split("|")]
        return lst
    except:
        return []

data['genre_list'] = data['genres'].map(lambda x: genre_listing(x))

In [31]:
genre_dict = defaultdict(int)
for index, row in data.iterrows():
  book_genres = row["genre_list"]
  if type(book_genres) == list:
    for genre in book_genres:
      genre_dict[genre] += 1

In [32]:
len(genre_dict)

864

In [33]:
genre_pd = pd.DataFrame.from_records(sorted(genre_dict.items(), key=lambda x:x[1], reverse=True),
                                     columns=['genre', 'count'])
genre_pd[:50].head()

Unnamed: 0,genre,count
0,Fiction,25736
1,Fantasy,23583
2,Romance,18636
3,Young Adult,11251
4,Historical,10789


In [36]:
def determine_fiction(x):
    lower_list = [genre.lower() for genre in x]
    if 'fiction' in lower_list:
        return 'fiction'
    elif 'nonfiction' in lower_list:
        return 'nonfiction'
    else:
        return 'others'
data['label'] = data['genre_list'].apply(determine_fiction)

In [37]:
data.label.value_counts()

fiction       25736
others        16843
nonfiction     7598
Name: label, dtype: int64

In [38]:
data

Unnamed: 0,book_authors,book_desc,book_edition,book_format,book_isbn,book_pages,book_rating,book_rating_count,book_review_count,book_title,genres,image_url,genre_count,genre_list,label
0,Suzanne Collins,Winning will make you famous. Losing means cer...,,Hardcover,9.78044E+12,374 pages,4.33,5519135,160706,The Hunger Games,Young Adult|Fiction|Science Fiction|Dystopia|F...,https://images.gr-assets.com/books/1447303603l...,6,"[Young Adult, Fiction, Science Fiction, Dystop...",fiction
1,J.K. Rowling|Mary GrandPré,There is a door at the end of a silent corrido...,US Edition,Paperback,9.78044E+12,870 pages,4.48,2041594,33264,Harry Potter and the Order of the Phoenix,Fantasy|Young Adult|Fiction,https://images.gr-assets.com/books/1255614970l...,3,"[Fantasy, Young Adult, Fiction]",fiction
2,Harper Lee,The unforgettable novel of a childhood in a sl...,50th Anniversary,Paperback,9.78006E+12,324 pages,4.27,3745197,79450,To Kill a Mockingbird,Classics|Fiction|Historical|Historical Fiction...,https://images.gr-assets.com/books/1361975680l...,6,"[Classics, Fiction, Historical, Historical Fic...",fiction
3,Jane Austen|Anna Quindlen|Mrs. Oliphant|George...,«È cosa ormai risaputa che a uno scapolo in po...,"Modern Library Classics, USA / CAN",Paperback,9.78068E+12,279 pages,4.25,2453620,54322,Pride and Prejudice,Classics|Fiction|Romance,https://images.gr-assets.com/books/1320399351l...,3,"[Classics, Fiction, Romance]",fiction
4,Stephenie Meyer,About three things I was absolutely positive.F...,,Paperback,9.78032E+12,498 pages,3.58,4281268,97991,Twilight,Young Adult|Fantasy|Romance|Paranormal|Vampire...,https://images.gr-assets.com/books/1361039443l...,8,"[Young Adult, Fantasy, Romance, Paranormal, Va...",fiction
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54295,Avi Steinberg,Avi Steinberg is stumped. After defecting from...,,Hardcover,9.78039E+12,399 pages,3.51,3717,661,Running the Books: The Adventures of an Accide...,Nonfiction|Autobiography|Memoir|Biography|Writ...,https://images.gr-assets.com/books/1320533033l...,7,"[Nonfiction, Autobiography, Memoir, Biography,...",nonfiction
54296,Howard Megdal,"In this fearless and half-crazy story, Howard ...",,Hardcover,9.78161E+12,256 pages,3.37,27,9,Taking the Field: A Fan's Quest to Run the Tea...,Sports|Baseball|Sports and Games|Sports|Nonfic...,https://images.gr-assets.com/books/1312074392l...,5,"[Sports, Baseball, Sports and Games, Sports, N...",nonfiction
54297,Howard Megdal,From the icons of the game to the players who ...,,Hardcover,9.78006E+12,256 pages,3.97,34,5,"The Baseball Talmud: Koufax, Greenberg, and th...",Nonfiction|Sports and Games|Sports,https://images.gr-assets.com/books/1348841629l...,3,"[Nonfiction, Sports and Games, Sports]",nonfiction
54299,Mimi Baird|Eve Claxton,"Soon to be a major motion picture, from Brad P...",,Hardcover,9.7808E+12,272 pages,3.82,867,187,He Wanted the Moon: The Madness and Medical Ge...,Nonfiction|Autobiography|Memoir|Biography|Psyc...,https://images.gr-assets.com/books/1403192135l...,9,"[Nonfiction, Autobiography, Memoir, Biography,...",nonfiction


In [None]:
pip install langdetect

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m30.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993224 sha256=d137d466522edb84b53e103421a6ab285f178ed282e749744a883fc4ae38a7d0
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9


In [None]:
df.columns

Index(['book_authors', 'book_desc', 'book_edition', 'book_format', 'book_isbn',
       'book_pages', 'book_rating', 'book_rating_count', 'book_review_count',
       'book_title', 'genres', 'image_url'],
      dtype='object')

Preprocessing data


In [None]:
df.dropna(subset=["book_desc"], inplace=True)

In [None]:
df.dropna(subset=["genres"], inplace=True)

In [None]:
df

Unnamed: 0,book_authors,book_desc,book_edition,book_format,book_isbn,book_pages,book_rating,book_rating_count,book_review_count,book_title,genres,image_url
0,Suzanne Collins,Winning will make you famous. Losing means cer...,,Hardcover,9.78044E+12,374 pages,4.33,5519135,160706,The Hunger Games,Young Adult|Fiction|Science Fiction|Dystopia|F...,https://images.gr-assets.com/books/1447303603l...
1,J.K. Rowling|Mary GrandPré,There is a door at the end of a silent corrido...,US Edition,Paperback,9.78044E+12,870 pages,4.48,2041594,33264,Harry Potter and the Order of the Phoenix,Fantasy|Young Adult|Fiction,https://images.gr-assets.com/books/1255614970l...
2,Harper Lee,The unforgettable novel of a childhood in a sl...,50th Anniversary,Paperback,9.78006E+12,324 pages,4.27,3745197,79450,To Kill a Mockingbird,Classics|Fiction|Historical|Historical Fiction...,https://images.gr-assets.com/books/1361975680l...
3,Jane Austen|Anna Quindlen|Mrs. Oliphant|George...,«È cosa ormai risaputa che a uno scapolo in po...,"Modern Library Classics, USA / CAN",Paperback,9.78068E+12,279 pages,4.25,2453620,54322,Pride and Prejudice,Classics|Fiction|Romance,https://images.gr-assets.com/books/1320399351l...
4,Stephenie Meyer,About three things I was absolutely positive.F...,,Paperback,9.78032E+12,498 pages,3.58,4281268,97991,Twilight,Young Adult|Fantasy|Romance|Paranormal|Vampire...,https://images.gr-assets.com/books/1361039443l...
...,...,...,...,...,...,...,...,...,...,...,...,...
27050,Nigel McCrery,Memories haunt Violet Chambers. Taking tea wit...,,Paperback,9.78185E+12,389 pages,3.70,651,99,Core of Evil,Mystery|Fiction|Mystery|Crime|European Literat...,https://images.gr-assets.com/books/1348064439l...
27051,Winston Graham,"Cornwall, 1818-1820The stories of the Poldark ...",,Paperback,9.78033E+12,688 pages,4.30,2864,257,Bella Poldark,Historical|Historical Fiction|Fiction|Historic...,https://images.gr-assets.com/books/1385276287l...
27052,Jack Canfield|Mark Victor Hansen|Amy Newmark|A...,"Teenage years are tough, but this book will he...",,Paperback,9.78194E+12,384 pages,4.13,121,11,Chicken Soup for the Soul: Just for Teenagers:...,Nonfiction|Inspirational,https://images.gr-assets.com/books/1348577234l...
27053,Thomas E. Sniegoski,DESTINED FOR HEAVEN OR HELL?Aaron's senior yea...,,Paperback,9.78144E+12,541 pages,4.09,7882,265,Aerie and Reckoning,Paranormal|Angels|Young Adult|Fantasy|Fantasy|...,https://images.gr-assets.com/books/1282266612l...


In [None]:
from langdetect import detect

def is_english(text):
    try:
        lang = detect(text)
        if lang == 'en':
            return True
        else:
            return False
    except:
        return False

In [None]:
df['is_english'] = df['book_desc'].apply(lambda x: is_english(x))
df = df[df['is_english']]
df = df.drop(columns=['is_english'])

In [None]:
def detect_fiction(text):
  text = text.lower()
  words = text.split("|")
  if "fiction" in words:
    return "Fiction"
  if "nonfiction" in words:
    return "Non-Fiction"
  else:
    return "Other"

In [None]:
df["genres"] = df["genres"].apply(detect_fiction)

In [None]:
df.genres.value_counts()

Fiction        12174
Other           7153
Non-Fiction     3179
Name: genres, dtype: int64

In [None]:
df[df["genres"] == "Other"]

Unnamed: 0,book_authors,book_desc,book_edition,book_format,book_isbn,book_pages,book_rating,book_rating_count,book_review_count,book_title,genres,image_url
27,Cassandra Clare|Franca Fritz|Heinrich Koop|Fra...,When fifteen-year-old Clary Fray heads out to ...,,Hardcover,9.78142E+12,485 pages,4.11,1383479,55446,City of Bones,Other,https://images.gr-assets.com/books/1432730315l...
83,Richelle Mead,Only a true best friend can protect you from y...,,Paperback,,332 pages,4.13,491540,20760,Vampire Academy,Other,https://images.gr-assets.com/books/1381459853l...
104,Cassandra Clare,In a time when Shadowhunters are barely winnin...,First Edition,Hardcover,9.78142E+12,481 pages,4.33,572963,26716,Clockwork Angel,Other,https://images.gr-assets.com/books/1454962884l...
116,Eric Carle,"THE all-time classic picture book, from genera...",,Board book,9.78024E+12,26 pages,4.29,338885,6637,The Very Hungry Caterpillar,Other,https://images.gr-assets.com/books/1517550266l...
128,Becca Fitzpatrick,A SACRED OATHA FALLEN ANGELA FORBIDDEN LOVERom...,First Edition,Hardcover,9.78142E+12,391 pages,3.99,503810,23115,"Hush, Hush",Other,https://images.gr-assets.com/books/1358261334l...
...,...,...,...,...,...,...,...,...,...,...,...,...
27031,Barnabas Miller,"ARE YOU READY TO ROCK?Dear Sammy,The truth is ...",,Paperback,9.7814E+12,288 pages,3.14,37,10,Rock God: The Legend of BJ Levine,Other,https://images.gr-assets.com/books/1327885495l...
27036,Dana Marie Bell,"The better to make you mine, my dear…When it c...",,ebook,9.78162E+12,85 pages,3.84,1357,56,Mr. Red Riding Hoode,Other,https://images.gr-assets.com/books/1374619628l...
27039,Vina Jackson,"\r\r\r\nIn the manner of Fifty Shades of Grey,...",,Paperback,9.78141E+12,334 pages,2.92,5015,605,Eighty Days Yellow,Other,https://images.gr-assets.com/books/1358273545l...
27053,Thomas E. Sniegoski,DESTINED FOR HEAVEN OR HELL?Aaron's senior yea...,,Paperback,9.78144E+12,541 pages,4.09,7882,265,Aerie and Reckoning,Other,https://images.gr-assets.com/books/1282266612l...


In [None]:
df.genre_and_votes.value_counts()

Fiction                                                                                                                         29133
Non-Fiction                                                                                                                      8175
childrens 1user                                                                                                                    42
romance 1user                                                                                                                      37
childrens 2                                                                                                                        37
                                                                                                                                ...  
romance-paranormal romance 2, romance 2                                                                                             1
new adult 84, romance 64, contemporary 29, sports-sports 24   

In [None]:
data = open('/content/data.txt').read()
data

'When you’re looking for something special,\nJust rush on out\nMove from coast to coast. You don’t need tickets\nDon’t need luggage either3min with you, around the world\nTo the unseen West of Eden\nThe way to a dazzling Heaven,\nHere’s the signLet’s get out\nYou feel the world is calling?\nGimme just three minutes\nEven if we face the boring\nRhythm and soul beat it\nRhythm and soul, we are\nTo a dazzling Heaven,\nTo the unseen West of Eden,\nWe have a definite purpose for our journey\nHere’s the signLet’s get out\nYou feel the world is calling?\nGimme just three minutes\nEven if we face the boring\nRhythm and soul beat it\nRhythm and soul, we areNo matter where in the world you are tomorrow,\nI can come meet you in just 3min\nI’ll find you\nI feel you so far away, close and nearLet’s get out\nYou feel the world is calling?\nGimme just three minutes\nEven if we face the boring\nRhythm and soul beat itLet’s get out\nYou hear? The world is calling\nGimme just three minutes\nEven if we f

In [None]:
corpus = data.lower().split("\n")
corpus = list(set(corpus))

In [None]:
corpus

['am02:00, still unable to sleep, we cross over midnight with this kinda',
 'the evening stars still won’t light; even my impatience seems faint — where is the moon?',
 'my feelings sayin’, “i don’t wanna go home just yet”.',
 'just rush on out',
 'unable to say that i want to be by your side, at 02:00 am',
 'dancing a capricious, clumsy dance.',
 'my heartbeat is accelerating just a bit… though i’m sure you’d laugh that i’m still not over it.',
 'true, true, true feelings',
 'but you always passed away.',
 'to the two of us, uncaring and endlessly whimsical, morning is calling!',
 'i can come meet you in just 3min',
 'the kind, pouring rain',
 'even if we face the boring',
 'if you saw me like this you’d laugh, right?',
 'pretending not to notice and laughing',
 'are still my heart; yes, they are a refrain',
 'am00:00, only by expressing emotion, having expectations, do we get this surfacing',
 'you don’t know how i really i feel.',
 'a calling voice, never receiving a response, is dr

In [None]:
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import *
from tensorflow.keras.layers import *


In [None]:
import numpy as np

In [None]:
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]

    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]

    input_sequences.append(n_gram_sequence)

In [None]:
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(tf.keras.utils.pad_sequences(input_sequences,
                       maxlen = max_sequence_len, padding='pre'))

In [None]:
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

In [None]:
maxlen = 1024
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 128

In [None]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True))
model.add(GRU(units=1024, return_sequences=True))
model.add(Dense(vocab_size))

model.compile(optimizer=Adam(), loss=SparseCategoricalCrossentropy(from_logits=True))

history = model.fit(x_train, y_train, epochs=50, verbose=1)

NameError: ignored

In [None]:
history = model.fit(predictors, label, epochs= 100, verbose=1)

Epoch 1/100


ValueError: ignored