# Genre Identification

This tool identifies to which genre the given chapter belongs to, making it convenient to the user to navigate to the chapter according to his / her mood.

In [1]:
#import necessary modules

import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\24Shreyaskumar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\24Shreyaskumar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Train the model

In [2]:
#load the dataset
data = pd.read_csv(r"C:\Users\24Shreyaskumar\Documents\Bit Manupilators - Coriolis Hackathon\ReadEZ\dataset\data.csv")
data

Unnamed: 0,index,title,genre,summary
0,0,Drowned Wednesday,fantasy,Drowned Wednesday is the first Trustee among ...
1,1,The Lost Hero,fantasy,"As the book opens, Jason awakens on a school ..."
2,2,The Eyes of the Overworld,fantasy,Cugel is easily persuaded by the merchant Fia...
3,3,Magic's Promise,fantasy,The book opens with Herald-Mage Vanyel return...
4,4,Taran Wanderer,fantasy,Taran and Gurgi have returned to Caer Dallben...
...,...,...,...,...
4652,4652,Hounded,fantasy,"Atticus O’Sullivan, last of the Druids, lives ..."
4653,4653,Charlie and the Chocolate Factory,fantasy,Charlie Bucket's wonderful adventure begins wh...
4654,4654,Red Rising,fantasy,"""I live for the dream that my children will be..."
4655,4655,Frostbite,fantasy,"Rose loves Dimitri, Dimitri might love Tasha, ..."


In [3]:
df = pd.DataFrame(data)
data = df[['summary', 'genre']]
data

Unnamed: 0,summary,genre
0,Drowned Wednesday is the first Trustee among ...,fantasy
1,"As the book opens, Jason awakens on a school ...",fantasy
2,Cugel is easily persuaded by the merchant Fia...,fantasy
3,The book opens with Herald-Mage Vanyel return...,fantasy
4,Taran and Gurgi have returned to Caer Dallben...,fantasy
...,...,...
4652,"Atticus O’Sullivan, last of the Druids, lives ...",fantasy
4653,Charlie Bucket's wonderful adventure begins wh...,fantasy
4654,"""I live for the dream that my children will be...",fantasy
4655,"Rose loves Dimitri, Dimitri might love Tasha, ...",fantasy


In [4]:
#preprocess the data

def preprocess(summary):
    words = nltk.word_tokenize(summary.lower())
    stop_words = set(stopwords.words("english"))
    words = [word for word in words if word.isalpha() and word not in stop_words]
    return " ".join(words)

In [5]:
corpus = [preprocess(summary) for summary in data['summary']]
labels = [genre for genre in data['genre']]

In [6]:
vectorizer = TfidfVectorizer()
x = vectorizer.fit_transform(corpus)

In [7]:
classifier = LinearSVC()
classifier.fit(x, labels)
classifier.score(x, labels)

0.9783122181662014

# Identify Genre using the above Model.

In [8]:
#function that returns the genre of the provided inpnut.

def classify_genre(content):
    preprocessed_content = preprocess(content)
    vectorized_content = vectorizer.transform([preprocessed_content])
    genre = classifier.predict(vectorized_content)[0]
    return genre

In [9]:
def get_content():
    return "Shiva returned to the temple steps and immersed himself in the serene atmosphere. In that moment, he experienced a profound encounter—the long-awaited meeting with a soul from a past life, a moment of rare fortune despite divine hindrances—the entrance of 'she' into his life." 

In [10]:
content = get_content()
genre = classify_genre(content)
genre

'thriller'