In [59]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sweetviz as sv
import seaborn as sns
from textblob import TextBlob
import math
from sklearn.feature_extraction.text import TfidfVectorizer
from IPython.display import display

In [2]:
train_data = pd.read_csv('Datasets\Religious_text_train.csv')
train_data = train_data.rename(columns={'Unnamed: 0': 'Chapters'})
train_data.head(5)

Unnamed: 0,Chapters,foolishness,hath,wholesome,takest,feelings,anger,vaivaswata,matrix,kindled,...,erred,thinkest,modern,reigned,sparingly,visual,thoughts,illumines,attire,explains
0,Buddhism_Ch1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Buddhism_Ch2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Buddhism_Ch3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Buddhism_Ch4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Buddhism_Ch5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Training Data Split By Book (with Columns Maintained)

In [25]:
buddhism = train_data[train_data['Chapters'].str.contains('Buddhism')]
tao_te_ching = train_data[train_data['Chapters'].str.contains('TaoTeChing')]
upanishad = train_data[train_data['Chapters'].str.contains('Upanishad')]
yoga_sutra = train_data[train_data['Chapters'].str.contains('YogaSutra')]
book_proverb = train_data[train_data['Chapters'].str.contains('Proverb')]
book_of_ecclesiastes = train_data[train_data['Chapters'].str.contains('Ecclesiastes')]
book_of_eccleasiasticus = train_data[train_data['Chapters'].str.contains('Eccleasiasticus')]
book_of_wisdom = train_data[train_data['Chapters'].str.contains('Wisdom')]


In [26]:
book_names = ['Buddhism', 'TaoTeChing', 'Upanishad', 'YogaSutra', 'Proverb', 'Ecclesiastes', 'Eccleasiasticus', 'Wisdom']
book_dfs = [buddhism,tao_te_ching, upanishad, yoga_sutra, book_proverb, book_of_ecclesiastes, book_of_eccleasiasticus, book_of_wisdom]
dataframes = dict(zip(book_names, book_dfs))

pat = '|'.join(book_names)
by_book = train_data['Chapters'].str.extract('(' + pat + ')', expand = False)
books = train_data.groupby(by_book).sum().reset_index().rename(columns={'Chapters':'Books'}).set_index('Books')
books.head(5)

Unnamed: 0_level_0,foolishness,hath,wholesome,takest,feelings,anger,vaivaswata,matrix,kindled,convict,...,erred,thinkest,modern,reigned,sparingly,visual,thoughts,illumines,attire,explains
Books,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Buddhism,0,0,0,0,19,0,0,0,0,0,...,0,0,0,0,0,0,9,0,0,0
Eccleasiasticus,0,189,3,1,0,14,0,0,3,0,...,0,0,0,1,1,0,7,0,2,0
Ecclesiastes,0,46,0,0,0,5,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
Proverb,2,65,0,0,0,11,0,0,0,0,...,0,0,0,0,0,0,8,0,1,0
TaoTeChing,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Sentiment Analysis on DataFrames

In [30]:
def find_top_words(df:pd.DataFrame, count: int = 20) -> pd.DataFrame:
    """ Given data frame find the {count} most common words (column values)"""
    df = df.sum(axis=0).drop('Chapters').squeeze()
    df = pd.DataFrame({'count': df})
    return df.sort_values(by='count', ascending=False).head(count)

def get_text_blob(df:pd.DataFrame) -> TextBlob:
    """ Given a dataframe, creates a text blob of its columns"""
    df = df.loc[:, (df != 0).any(axis=0)]
    return TextBlob(" ".join(df.columns))

def get_word_pos_pairs(df:pd.DataFrame) -> list:
    """ Given dataframe, returns all words (columns) and associated part of speech"""
    return get_text_blob(df).tags

def get_pos_tag_list(df:pd.DataFrame) -> list:
    """ Returns list of all unique parts of speech found in all words (columns) of a dataframe"""
    word_tags = get_word_pos_pairs(df)
    return list(set([tag[1] for tag in word_tags]))

def get_data_for_pos(df:pd.DataFrame, pos:str) -> pd.DataFrame:
    """Given a dataframe and part of speech, subsets dataframe to only include columns matching the pos"""
    word_tags = get_word_pos_pairs(df)
    chosen_pos = [tag[0] for tag in word_tags if tag[1] == pos]
    return df.loc[:,df.columns.isin(chosen_pos)]

def unique_words_frame(df: pd.DataFrame) -> pd.DataFrame:
    """ Removes any columns with 0 as a sum (meaning word not actually in book)"""
    df = df.loc[:, (df != 0).any(axis=0)]
    if 'Chapters' in df.columns:
        df = df.drop(columns='Chapters')
    return df


### Crude TF-IDF Analysis

In [35]:
book_blobs = [get_text_blob(book) for book in dataframes.values()]

def tf(word:str, df: pd.DataFrame) -> float:
    """ Generate term refrequency of a word in a dataframe's columns"""
    return df.get(word).sum() /df.shape[1]

def n_containing(word: str, blobs: list) -> int:
    """Return number of books in list containing specific word"""
    return sum(1 for blob in blobs if word in blob)

def idf(word: str, blobs: list):
    """ Calculate inverse doc frequency of word in list of documents"""
    x = n_containing(word, blobs)
    return math.log(len(blobs) / (x if x else 1))

def tf_idf(word: str, df: pd.DataFrame, blobs = book_blobs) -> float:
    return tf(word, df) * idf(word, blobs)

def find_top_tf_idf(df: pd.DataFrame, count: int = 20) -> pd.DataFrame:
    """ Given data frame find the {count} words (column values) with the highest tf idf values"""
    df = unique_words_frame(df)
    df_tf_idfs = {word: tf_idf(word, df) for word in df.columns}
    to_dict = dict(sorted(df_tf_idfs.items(), key = lambda item: item[1], reverse = True))
    return pd.Series(to_dict).rename('tf_idf').to_frame().head(count)

def in_both(df: pd.DataFrame, amount: int = 20):
    """ Given a dataframe finds any words in common between
        the top {amount} of words by tf idf and sheer count"""
    tf = find_top_tf_idf(df, amount)
    count = find_top_words(df, amount)
    return tf[tf.index.isin(count.index)]

def only_in_count(df:pd.DataFrame, amount: int = 20) -> pd.DataFrame:
    """ Given a dataframe, finds top amount of words
    by tf idf and sheer count and returns the words
    present in top count but not tf idf """
    tf = find_top_tf_idf(df)
    count = find_top_words(df)
    return count[count.index.isin(tf.index) == False]

def only_in_tf_idf(df:pd.DataFrame, amount: int = 20) -> pd.DataFrame:
    """ Given a dataframe, finds top amount of words
    by tf idf and sheer count and returns the words
    present in top tf idf but not count"""
    
    tf = find_top_tf_idf(df, amount)
    count = find_top_words(df, amount)
    return tf[tf.index.isin(count.index[:amount]) == False]

def tf_idf_comparison(df_1: pd.DataFrame, df_2: pd.DataFrame, amount: int = 20) -> pd.DataFrame:
    """Given two dataframes, finds top {amount} of words by tf idf and compares the two.
       Returns a tuple of three values:
               - first = in both
               - second = only in first df
               - third = only in second df
    """
    tf_1 = find_top_tf_idf(df_1)
    tf_2 = find_top_tf_idf(df_2)
    return (tf_1[tf_1.index.isin(tf_2.index[:amount])],
            tf_1[tf_1.index.isin(tf_2.index[:amount]) == False],
            tf_2[tf_2.index.isin(tf_1.index[:amount]) == False]
           )

def top_words_comparison(df_1: pd.DataFrame, df_2: pd.DataFrame, amount: int = 20) -> pd.DataFrame:
    """Given two dataframes, finds top {amount} of words by count and compares the two.
       Returns a tuple of three values:
               - first = in both
               - second = only in first df
               - third = only in second df
    """
    words_1 = find_top_words(df_1)
    words_2 = find_top_words(df_2)
    return (words_1[words_1.index.isin(words_2.index[:amount])],
            words_1[words_1.index.isin(words_2.index[:amount]) == False],
            words_2[words_2.index.isin(words_1.index[:amount]) == False]
           )


# Group By Religion

To begin, we group by religions of each of the books, in order to compare the shared keywords relative to TF-IDF frequency AND by most common keywords, in order to display similarities:


In [61]:
# Group 1:
# book_proverb
# book_of_ecclesiastes
# book_of_eccleasiasticus
# book_of_wisdom

print("\033[1m" + "Comparing TF-IDF of Proverbs AND Ecclesiastes:" + "\033[0m")
display(tf_idf_comparison(book_proverb, book_of_ecclesiastes)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(book_proverb, book_of_ecclesiastes)[0]

[1mComparing TF-IDF of Proverbs AND Ecclesiastes:[0m


Unnamed: 0,tf_idf
wicked,0.032602
shall,0.026556
hath,0.015619
thy,0.011264
fool,0.011053
folly,0.007087


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
shall,389
man,176
thy,165
thou,93
wise,71
hath,65
heart,64
evil,54
wisdom,53
good,46


In [62]:
print("\033[1m" + "Comparing TF-IDF of Proverbs AND Eccleasiasticus:" + "\033[0m")
display(tf_idf_comparison(book_proverb, book_of_eccleasiasticus)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(book_proverb, book_of_eccleasiasticus)[0]

[1mComparing TF-IDF of Proverbs AND Eccleasiasticus:[0m


Unnamed: 0,tf_idf
wicked,0.032602
shall,0.026556
hath,0.015619
thy,0.011264
shalt,0.009922
justice,0.008859
thee,0.008678
woman,0.007796


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
shall,389
man,176
thy,165
thou,93
lord,85
hath,65
heart,64
thee,59
wisdom,53
soul,47


In [64]:
print("\033[1m" + "Comparing TF-IDF of Proverbs AND Wisdom:" + "\033[0m")
display(tf_idf_comparison(book_proverb, book_of_wisdom)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(book_proverb, book_of_wisdom)[0]

[1mComparing TF-IDF of Proverbs AND Wisdom:[0m


Unnamed: 0,tf_idf
wicked,0.032602
shall,0.026556
hath,0.015619
thy,0.011264
justice,0.008859
thee,0.008678
judgment,0.006379


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
shall,389
man,176
thy,165
thou,93
wicked,92
lord,85
hath,65
thee,59
wisdom,53
good,46


In [65]:
print("\033[1m" + "Comparing TF-IDF of Ecclesiastes AND Eccleasiasticus:" + "\033[0m")
display(tf_idf_comparison(book_of_ecclesiastes, book_of_eccleasiasticus)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(book_of_ecclesiastes, book_of_eccleasiasticus)[0]

[1mComparing TF-IDF of Ecclesiastes AND Eccleasiasticus:[0m


Unnamed: 0,tf_idf
hath,0.022289
shall,0.012252
wicked,0.00786
god,0.006332
thy,0.005919


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
shall,89
man,71
things,46
hath,46
god,46
thy,43
heart,27
thou,26
wisdom,26
good,22


In [66]:
print("\033[1m" + "Comparing TF-IDF of Ecclesiastes AND Wisdom:" + "\033[0m")
display(tf_idf_comparison(book_of_ecclesiastes, book_of_wisdom)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(book_of_ecclesiastes, book_of_wisdom)[0]

[1mComparing TF-IDF of Ecclesiastes AND Wisdom:[0m


Unnamed: 0,tf_idf
hath,0.022289
shall,0.012252
wicked,0.00786
god,0.006332
thy,0.005919


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
shall,89
man,71
things,46
hath,46
god,46
thy,43
thou,26
wisdom,26
good,22
men,22


In [67]:
print("\033[1m" + "Comparing TF-IDF of Eccleasiasticus AND Wisdom:" + "\033[0m")
display(tf_idf_comparison(book_of_eccleasiasticus, book_of_wisdom)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(book_of_eccleasiasticus, book_of_wisdom)[0]

[1mComparing TF-IDF of Eccleasiasticus AND Wisdom:[0m


Unnamed: 0,tf_idf
hath,0.02966
shall,0.022649
thee,0.016329
thy,0.014713
wicked,0.00972
god,0.008605
justice,0.006943
enemies,0.006249


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
shall,508
thy,330
man,232
thou,230
god,193
hath,189
thee,170
lord,152
things,120
upon,115


### As shown, each of the comparisons share **at least** 5 keywords with top TF-IDF, and **at least** 10 common keywords within their top 20 common keywords, showing a clear grouping between these four books.

### If we try to compare any of the other four bibles with any of the 4 books above, the correlation won't be as strong:

## Group 2 - Buddhism

In [77]:
print("\033[1m" + "Comparing TF-IDF of Buddhism AND Proverbs:" + "\033[0m")
display(tf_idf_comparison(buddhism, book_proverb)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(buddhism, book_proverb)[0]

[1mComparing TF-IDF of Buddhism AND Proverbs:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
way,55


In [69]:
print("\033[1m" + "Comparing TF-IDF of Buddhism AND Ecclesiastes:" + "\033[0m")
display(tf_idf_comparison(buddhism, book_of_ecclesiastes)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(buddhism, book_of_ecclesiastes)[0]

[1mComparing TF-IDF of Buddhism AND Ecclesiastes:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
one,75


In [70]:
print("\033[1m" + "Comparing TF-IDF of Buddhism AND Eccleasiasticus:" + "\033[0m")
display(tf_idf_comparison(buddhism, book_of_eccleasiasticus)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(buddhism, book_of_eccleasiasticus)[0]

[1mComparing TF-IDF of Buddhism AND Eccleasiasticus:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
one,75


In [78]:
print("\033[1m" + "Comparing TF-IDF of Buddhism AND Wisdom:" + "\033[0m")
display(tf_idf_comparison(buddhism, book_of_wisdom)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(buddhism, book_of_wisdom)[0]

[1mComparing TF-IDF of Buddhism AND Wisdom:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count


## Group 3 - Taoism

In [74]:
print("\033[1m" + "Comparing TF-IDF of Tao Te Ching AND Proverbs:" + "\033[0m")
display(tf_idf_comparison(tao_te_ching, book_proverb)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(tao_te_ching, book_proverb)[0]

[1mComparing TF-IDF of Tao Te Ching AND Proverbs:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
things,56
way,24


In [75]:
print("\033[1m" + "Comparing TF-IDF of Tao Te Ching AND Ecclesiastes:" + "\033[0m")
display(tf_idf_comparison(tao_te_ching, book_of_ecclesiastes)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(tao_te_ching, book_of_ecclesiastes)[0]

[1mComparing TF-IDF of Tao Te Ching AND Ecclesiastes:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
things,56
one,51
men,45


In [76]:
print("\033[1m" + "Comparing TF-IDF of Buddhism AND Eccleasiasticus:" + "\033[0m")
display(tf_idf_comparison(tao_te_ching, book_of_eccleasiasticus)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(tao_te_ching, book_of_eccleasiasticus)[0]

[1mComparing TF-IDF of Buddhism AND Eccleasiasticus:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
things,56
one,51
men,45


In [79]:
print("\033[1m" + "Comparing TF-IDF of Buddhism AND Wisdom:" + "\033[0m")
display(tf_idf_comparison(tao_te_ching, book_of_wisdom)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(tao_te_ching, book_of_wisdom)[0]

[1mComparing TF-IDF of Buddhism AND Wisdom:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
things,56
men,45
therefore,40


## Group 4 - Hinduism

In [80]:
print("\033[1m" + "Comparing TF-IDF of Upanishad AND Proverbs:" + "\033[0m")
display(tf_idf_comparison(upanishad, book_proverb)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(upanishad, book_proverb)[0]

[1mComparing TF-IDF of Upanishad AND Proverbs:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
man,63
heart,31
wise,30


In [81]:
print("\033[1m" + "Comparing TF-IDF of Upanishad AND Ecclesiastes:" + "\033[0m")
display(tf_idf_comparison(upanishad, book_of_ecclesiastes)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(upanishad, book_of_ecclesiastes)[0]

[1mComparing TF-IDF of Upanishad AND Ecclesiastes:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
one,100
man,63
heart,31
god,31
wise,30


In [82]:
print("\033[1m" + "Comparing TF-IDF of Upanishad AND Eccleasiasticus:" + "\033[0m")
display(tf_idf_comparison(upanishad, book_of_eccleasiasticus)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(upanishad, book_of_eccleasiasticus)[0]

[1mComparing TF-IDF of Upanishad AND Eccleasiasticus:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
one,100
man,63
heart,31
god,31


In [83]:
print("\033[1m" + "Comparing TF-IDF of Upanishad AND Wisdom:" + "\033[0m")
display(tf_idf_comparison(upanishad, book_of_wisdom)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(upanishad, book_of_wisdom)[0]

[1mComparing TF-IDF of Upanishad AND Wisdom:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
man,63
therefore,32
god,31


## Group 5 - Yoga Sutra

In [84]:
print("\033[1m" + "Comparing TF-IDF of Yoga Sutra AND Proverbs:" + "\033[0m")
display(tf_idf_comparison(yoga_sutra, book_proverb)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(yoga_sutra, book_proverb)[0]

[1mComparing TF-IDF of Yoga Sutra AND Proverbs:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
man,239
soul,94
things,88


In [85]:
print("\033[1m" + "Comparing TF-IDF of Yoga Sutra AND Ecclesiastes:" + "\033[0m")
display(tf_idf_comparison(yoga_sutra, book_of_ecclesiastes)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(yoga_sutra, book_of_ecclesiastes)[0]

[1mComparing TF-IDF of Yoga Sutra AND Ecclesiastes:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
man,239
one,108
things,88


In [86]:
print("\033[1m" + "Comparing TF-IDF of Yoga Sutra AND Eccleasiasticus:" + "\033[0m")
display(tf_idf_comparison(yoga_sutra, book_of_eccleasiasticus)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(yoga_sutra, book_of_eccleasiasticus)[0]

[1mComparing TF-IDF of Yoga Sutra AND Eccleasiasticus:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
man,239
one,108
soul,94
things,88


In [87]:
print("\033[1m" + "Comparing TF-IDF of Yoga Sutra AND Wisdom:" + "\033[0m")
display(tf_idf_comparison(yoga_sutra, book_of_wisdom)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(yoga_sutra, book_of_wisdom)[0]

[1mComparing TF-IDF of Yoga Sutra AND Wisdom:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
man,239
life,148
things,88


### As shown in these comparisons, not a single shared tf_idf was shown, and at most 5 similar top word counts were shared.

## Since the religions of Yoga Sutra, Hinduism, and Buddhism share the same origin, Asia, we figured grouping them by REGIONS instead of RELIGIONS would provide us different, but important results. 
## This could also give us insight into a smaller amount of groups.
### It is important to note that Yoga Sutra shares the Hindu system of Dualism, and the religion Buddhism agrees with many Hindu fundamentals such as karma, dharma, moksha, and reincarnation.

### Group Middle East: Proverbs, Ecclesiastes, Eccleasiasticus, Wisdom
Comparisons made above

### Group Asia: Upanishad (Hinduism), Yoga Sutra, Buddhism

In [89]:
print("\033[1m" + "Comparing TF-IDF of Upanishad AND Yoga Sutra:" + "\033[0m")
display(tf_idf_comparison(upanishad, yoga_sutra)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(upanishad, yoga_sutra)[0]

[1mComparing TF-IDF of Upanishad AND Yoga Sutra:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
one,100
self,79
mind,71
man,63
must,46


In [90]:
print("\033[1m" + "Comparing TF-IDF of Upanishad AND Buddhism:" + "\033[0m")
display(tf_idf_comparison(upanishad, buddhism)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(upanishad, buddhism)[0]

[1mComparing TF-IDF of Upanishad AND Buddhism:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
one,100
mind,71


In [91]:
print("\033[1m" + "Comparing TF-IDF of Yoga Sutra AND Buddhism:" + "\033[0m")
display(tf_idf_comparison(yoga_sutra, buddhism)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(yoga_sutra, buddhism)[0]

[1mComparing TF-IDF of Yoga Sutra AND Buddhism:[0m


Unnamed: 0,tf_idf
consciousness,0.03646


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
consciousness,122
one,108
mind,98
body,52


## Although only a max. of 1 common TF_IDF keyword was found, the most common keywords that are shared among this group are "consciousness," "one," "mind," "body," and "self," which leads us to believe that these are very self-centered religions.

## The book of Taoism, Tao Te Ching, appears to have no big similarities with any of the other books:

In [92]:
print("\033[1m" + "Comparing TF-IDF of Tao Te Ching AND Buddhism:" + "\033[0m")
display(tf_idf_comparison(tao_te_ching, buddhism)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(tao_te_ching, buddhism)[0]

[1mComparing TF-IDF of Tao Te Ching AND Buddhism:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
one,51
way,24


In [93]:
print("\033[1m" + "Comparing TF-IDF of Tao Te Ching AND Proverbs:" + "\033[0m")
display(tf_idf_comparison(tao_te_ching, book_proverb)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(tao_te_ching, book_proverb)[0]

[1mComparing TF-IDF of Tao Te Ching AND Proverbs:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
things,56
way,24


In [94]:
print("\033[1m" + "Comparing TF-IDF of Tao Te Ching AND Eccleasiasticus:" + "\033[0m")
display(tf_idf_comparison(tao_te_ching, book_of_eccleasiasticus)[0])
print("\033[1m" + "Now, comparing by similar top word counts:" + "\033[0m")
top_words_comparison(tao_te_ching, book_of_eccleasiasticus)[0]

[1mComparing TF-IDF of Tao Te Ching AND Eccleasiasticus:[0m


Unnamed: 0,tf_idf


[1mNow, comparing by similar top word counts:[0m


Unnamed: 0,count
things,56
one,51
men,45


## The keywords that Tao Te Ching shares with other books are meaningless keywords.