In [1]:
import pandas as pd
import freeman as fm
from unidecode import unidecode

In [2]:
data = pd.read_csv("Goodreads_BestBooksEver_1-10000.csv")
data.columns

Index(['url', 'bookTitle', 'bookImage', 'bookAuthors', 'bookDesc',
       'bookRating', 'ratingCount', 'reviewCount', 'bookPages', 'bookGenres',
       'bookISBN', 'recommendations'],
      dtype='object')

In [3]:
data = data.drop(["url", "bookImage", "bookISBN"], axis=1)

In [4]:
data = data[:500]

In [5]:
data["bookGenres"] = data["bookGenres"].apply(lambda x: str(x)).apply(lambda y: y.split("|"))

In [6]:
data

Unnamed: 0,bookTitle,bookAuthors,bookDesc,bookRating,ratingCount,reviewCount,bookPages,bookGenres,recommendations
0,The Hunger Games,Suzanne Collins,"Could you survive on your own in the wild, wit...",4.32,6717635,176054,374 pages,"[Young Adult/31,498, Fiction/17,878, Science F...","['Divergent (Divergent, #1)|https://www.goodre..."
1,Harry Potter and the Order of the Phoenix,"J.K. Rowling,Mary GrandPré",There is a door at the end of a silent corrido...,4.50,2668409,45724,870 pages,"[Fantasy/1,797, Young Adult/15,961, Fiction/14...",['Harry Potter and the Cursed Child: Parts One...
2,To Kill a Mockingbird,Harper Lee,The unforgettable novel of a childhood in a sl...,4.28,4772918,95595,324 pages,"[Classics/47,203, Fiction/23,575, Historical-H...",['The Great Gatsby|https://www.goodreads.com/b...
3,Pride and Prejudice,"Jane Austen,Anna Quindlen",Alternate cover edition of ISBN 9780679783268S...,4.27,3206070,74020,279 pages,"[Classics/52,699, Fiction/15,730, Romance/12,8...",['Jane Eyre|https://www.goodreads.com/book/sho...
4,Twilight,Stephenie Meyer,About three things I was absolutely positive.F...,3.61,5231000,107619,501 pages,"[Young Adult/19,982, Fantasy/19,312, Romance/1...","['The Hunger Games (The Hunger Games, #1)|http..."
...,...,...,...,...,...,...,...,...,...
495,Brideshead Revisited,Evelyn Waugh,The most nostalgic and reflective of Evelyn Wa...,4.00,96150,4928,351 pages,"[Classics/2,798, Fiction/2,385, Historical-His...",['Maurice|https://www.goodreads.com/book/show/...
496,Wizard's First Rule,Terry Goodkind,In the aftermath of the brutal murder of his f...,4.11,235318,6738,836 pages,"[Fantasy/9,688, Fiction/1,016, Fantasy-Epic Fa...","['The Eye of the World (The Wheel of Time, #1)..."
497,Demian: Die Geschichte von Emil Sinclairs Jugend,"Hermann Hesse,Thomas Mann",Wie alle Hauptwerke Hermann Hesses hat auch de...,4.13,79507,4558,194 pages,"[Fiction/1,340, Classics/1,105, Philosophy/372...",['The Ones Who Walk Away from Omelas|https://w...
498,The God Delusion,Richard Dawkins,A preeminent scientist - and the world's most ...,3.89,243989,8525,374 pages,"[Nonfiction/5,481, Religion/2,898, Science/2,4...",['God Is Not Great: How Religion Poisons Every...


In [7]:
data["highestVotedGenre"] = data["bookGenres"].apply(lambda x: x[0].split("/")[0])
data["highestVotedGenre"] = data["highestVotedGenre"].apply(lambda y: str(y))
data["bookTitle"] = data["bookTitle"].apply(lambda x: str(x))
data["bookTitle"] = data["bookTitle"].apply(lambda y: y.replace("\"", ""))

data = data[data["highestVotedGenre"] != "nan"]

In [8]:
network = data[["bookTitle", "highestVotedGenre"]]
network.reset_index(drop=True, inplace=True)
network

Unnamed: 0,bookTitle,highestVotedGenre
0,The Hunger Games,Young Adult
1,Harry Potter and the Order of the Phoenix,Fantasy
2,To Kill a Mockingbird,Classics
3,Pride and Prejudice,Classics
4,Twilight,Young Adult
...,...,...
495,Brideshead Revisited,Classics
496,Wizard's First Rule,Fantasy
497,Demian: Die Geschichte von Emil Sinclairs Jugend,Fiction
498,The God Delusion,Nonfiction


In [9]:
dic = {}
nodes = []
for i in range(len(network["highestVotedGenre"])):
    title = unidecode(network["bookTitle"][i])
    genre = unidecode(network["highestVotedGenre"][i])
    
    if title not in nodes:
        nodes.append(title)
    
    if genre not in dic:
        dic[genre] = []
    
    if title not in dic[genre]:
        dic[genre].append(title)
    
dic.keys()

dict_keys(['Young Adult', 'Fantasy', 'Classics', 'Historical-Historical Fiction', 'Childrens', 'Fiction', 'Plays', 'Science Fiction', 'Horror', 'Nonfiction', 'Romance', 'Mystery', 'Childrens-Picture Books', 'Sequential Art-Graphic Novels', 'Poetry', 'History', 'Sequential Art-Comics', 'Philosophy', 'Travel', 'Religion', 'Fairies-Fae', 'Science', 'Christian Fiction'])

In [11]:
with open("books_genres.gml", "w+") as rede:
    rede.write("graph [\n")
    rede.write("    directed 0\n")
    
    for node in nodes:
        rede.write("    node [\n")
        rede.write(f"        id \"{node}\" \n")
        rede.write("    ]\n")
        
    for neighbors in dic.values():
        for i in range(len(neighbors)-1):
            for j in range(i+1, len(neighbors)):
                rede.write("    edge [\n")
                rede.write(f"        source \"{neighbors[i]}\" \n")
                rede.write(f"        target \"{neighbors[j]}\" \n")
                rede.write("    ]\n")
    
    rede.write("]\n")

In [13]:
g = fm.load("books_genres.gml")
g.label_nodes()
for n in g.nodes:
    g.nodes[n]['labpos'] = 'hover'
g.move('kamada_kawai')
g.draw()