## Importing necessary libraries

In [1]:
#importing necessary libraries
import pandas as pd
import re #for reg expression manipulation
import os #for file handling and OS procedures
import glob #to find all the pathnames matching a specified pattern 

#for natural language processing and catching the eng words by stem
from nltk.stem.snowball import SnowballStemmer
import nltk

#for tracking time-stats
import time

## identifying current working directory

In [2]:
#identifying current working directory
curr_work_dir= os.getcwd()
curr_work_dir

'C:\\Users\\ravie'

## defining location of the song-directories

In [3]:
#defining location of the directories in which the song files are present
filepath1= curr_work_dir + '\McCutchionAndBhowmick_Songs'
filepath2= curr_work_dir + '\Weslyan_Songs'

#making a list of the two directories
filepath_list=[filepath1, filepath2]
filepath_list

['C:\\Users\\ravie\\McCutchionAndBhowmick_Songs',
 'C:\\Users\\ravie\\Weslyan_Songs']

## using the words_alpha.txt dictionary
##### from https://github.com/dwyl/englishwords

In [4]:
#defining a separate function to load dictionary of words 
#using the words_alpha dictionary text file suggested in the Assignment pdf

def load_words():
    '''This function loads words from the words_alpha.txt
    and returns the set of all unique words loaded.'''
    
    #opening the .txt file in read mode and utf-8 encoding
    with open('words_alpha.txt', mode='r', encoding='utf-8') as word_file:
        valid_words = set(word_file.read().split())

    return valid_words


## The mapper function

In [5]:
#definig the mapper function
def mapper_func(file_name_loc, contents):
    '''This is the mapper function that takes as input 
    the file location and the contents and then performs the following
    tasks on the content of the file:
    1) printing the raw file-content
    2) replacing '-' and '_' with space
    3) stripping white spaces and non-alpha characters 
    4) converting the string to smaller case
    5) splitting string to list of words
    6) using SnowballStemmer to weed out english words
    7) rejoining the list of stem_checked_words as string to run 
        dictionary check
    8) running through the string and identifying non-eng words using the 
        dictionary of words defined earlier
    9) keeping only the non-english words filtered
    10) returning the final non-english string
    '''
    
    #1) printing the raw file-content
    print("orginal file contents")
    print("-"*50)
    print(contents)
    print("-"*50)
    
    ##cleaning string- replacing '-' and '_' with space
    #stripping the spaces at start and end and converting the string to lowercase
    temp1= re.sub('-',' ', contents)
    temp1= re.sub('_',' ', contents)
    temp1 = re.sub(r'[^\w\s_]+', ' ', temp1).strip().lower()
    
#     ##
#     print("after cleaning-Round 1")
#     print("-"*50)
#     print(temp1)
#     print("-"*50)
    
    ##breaking the string into a list of words
    content_list1= temp1.split()
    print("The String as List of words after cleaning")
    print("-"*50)
    print(content_list1)
    print("-"*50)
    
    ##Using snowball stemmer to identify stem of all english words
    #if non-english, it returns the full word
    snow_stemmer = SnowballStemmer(language='english')

    
    #looping on each word and doing stem check 
    #keeping the word if it doesn't change after stem-check
    #this means that there is a high-probability that the word is non-english
    #we will be furthe cleaning this list
    after_stem_check=[]
    for words in content_list1:
        x = snow_stemmer.stem(words)
        if x== words:
            after_stem_check.append(x)
    
    print("words having same stem in english or non-english")
    print("-"*50)
    print(after_stem_check)
    print("-"*50)
    
    
    #rejoining the list as a string
    new_str= ' '.join(after_stem_check)
#     print("The string after english words stem check")
#     print("-"*50)
#     print(new_str)
#     print("-"*50)
    
    #loading our dictionary of words
    english_words = load_words()
    
    #filtering out non-english words 
    #keeping only the non-emglish words
    non_eng_str_updated=" ".join(w for w in nltk.wordpunct_tokenize(new_str) \
         if w.lower() not in english_words)

    print("-"*50)
    print("The string with all non-eng words identified")
    print("-"*50)
    print(non_eng_str_updated)
    print("-"*50)
    
    #dict1= dict(file_name_loc= non_eng_str_updated )
    
    #returning the final non-eng-string
    return(non_eng_str_updated)

# The reducer Function

In [6]:
def reducer_func(non_eng_string):
    ''' This function acts as the reducer function for this assignment.
    It takes as input a string and splits it by spaces to create a list 
    and then count the number of words in the string.
    
    This function then returns the count of total number of words in 
    the string'''
    #creating list of words from the received non_eng_string
    list_non_eng_words= non_eng_string.split()
    
    #counting length of the list created
    total_count= len(list_non_eng_words)
    #print(total_count)
    
    #returning the total_count as output
    return(total_count)

### Calling mapper to generate intermediate space.



In [7]:
#looping on each filepaths

#creating a dictionary variable to be used as intermediate space
#this dictionary will hold key value pairs for file_name_loc as the key 
#and non_eng_str returned from the mapper as value.

final_dict= {}

start = time.time()
#looping on each filepaths of the 2 directories of songs
for path in filepath_list:
    
    #looping on each file's location within each directory
    for file_name_loc in glob.glob(os.path.join(path, '*.txt')):
            #print(filename)
            #opening the file in read mode to read content
            with open(file_name_loc, mode='r', encoding='utf-8') as f:
                contents = f.read()
            
            #calling the mapper function 
            #storing the returned non-eng-string into non_eng_str
            non_eng_str= mapper_func(file_name_loc, contents)
            #print(non_eng_str_dict)
            
            #appending received string as value with the key as 
            #file_name_loc to the final_dict dictionary
            final_dict[file_name_loc]=non_eng_str 
            
end = time.time()

print("The time of execution of mapper program is :", end-start)


orginal file contents
--------------------------------------------------
Name: Behula Bhasan
Translation Source: McCutchion and Bhowmick

Text:
Victory to Manasa, Jagat Gauri, Bishahari
Supremely beautiful on eight snake heads.
Her royal throne and royal bed are both upon the snakes --
She takes her seat especially on the Mangala python.
Angry and full of poison, she is moving on her way;
The wicked merchant Chand Sadagar looks at her askance
From the corner of his eyes, and twisting his beard
He takes a club of hetal wood and puts it on his shoulder.
And Bishahari got to hear about it straight away --
In a fury she snatched away the six sons of Chand,
And six dead sons meant six young widows.
Never in his life had he offered so much as a tiny flower,
Yet the singers are singing in honey-sweet tones,
And everybody on the battlefield worships
The one-eyed fish-head goddess!

Look at the precious youngest son named Lakhindar!
Lets go to Chanpainagar and arrange for his marriage.
The merc

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
behula bhasan mccutchion bhowmick manasa gauri bishahari mangala chand sadagar hetal bishahari chand lakhindar chanpainagar amulya champainagar behula janardhan lakhindar chand sadagar lakhindar kalini lakhindar lakhindar chandra kalini lakhindar lakhindar nera sadagar manasa benana vegitarian champatala beltala gadaghat gadadhar ganguri rui phalaria gada gada manasa manasa gada gadaghat baidyaghat chingridaha boaldaha tamlukghat nitai tamluk dhona tamluk nitai kanchra behula behula behula manasa chand sadagar jata
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Attacking Emancipated Women
Translation Source: McCutchion and Bhowmick

Text:
We all got together and installed a mechanical husking machine in the village. The women got their independence, telling th

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
mccutchion bhowmick pitri
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Engrejer Pat
Translation Source: McCutchion and Bhowmick

Text:
Harbar Ray, Garbar Singh, Chhuchhika, Puchinga, Haro
Khan with all his bag and baggage, Bachha Korar Khan, Gunju
Khan Hazari, made the Tuslika into Pathans. They harrassed
Ramprasad Babu and put Nitai Singh Daraga into prison. 
Chingu Babu hid himself. Being harrassed, like a rosagalla
leaving his house, went to see the temples of gods said to be at 
Burdwan.

Than Singh, Chhuchikka, Puchinga, Haro Khan with all 
his bag and baggage, Bachha Korar Khan, Gunju Khan Hazari,
made the Tuslika into Pathans.

They did not care for the Babus of the Maghs (corrupt)
They are taking ras in pots, walking on the sandy roads. Being
harrass

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
mccutchion bhowmick indrapuri mustn chitrakar amdabad
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Gazi Pat
Translation Source: McCutchion and Bhowmick

Text:
The victory of Gazi sahib, the incarnation of kindness --
Riding on the back of the tiger he has been the best rider.
There is no definite description of his form,
But he does have a topee and a long beard. 
And he has at least the following forms,
Saisatol, Gazi, and Badai Manic.
He's known as Lord Naryan among the Hindus and 
Pir among the Muslims.
He has appeared to receive worship from both the communities.

The tigers are big in size and their teeth look
like long white radishes.
They can grind stones into dust.
In his hand there is a club of the sal tree.
His beard is full and his face is very be

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
mccutchion bhowmick saisatol badai naryan terracotta sirni sudrhan arjuna sirni sirni sirni astana sirni satya narayan goda ramjan
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Jagannath Pat
Translation Source: McCutchion and Bhowmick

Text:
Men go to Orissa and buy rice to eat --
Now open your eyes and see Lord Jagannath:
On the right Balaram, in the middle his sister,
and on her left Kalachand himself.

Here is Lord Kalachand, the best of all,
With the power of his own name he has saved
the living creatures
Twelve houses round walled like red cloud
And the drums are beating in front of the Lion gate.
If you pour water on the tulsi plant, you will
have a place of virtue,
Your family will be saved and you will enter on great happiness.
At Rohini Kundal a crow

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
mccutchion bhowmick orissa balaram kalachand kalachand rohini kundal baikuntha kashinath ekadasi parvati dhenki narada muni prasad sada gauri prasad jata durga maheswar gauri katwa chaitanya nitai hari hari jagai madhai chaitanya chanda rathajatra asharh balaram hari hari kalachand
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Krishna Lila
Translation Source: McCutchion and Bhowmick

Text:
Krishnaji was playing his flute beneath the tree,
And the heart of Radha forgot her housework and flew away.
Kanai appeared on the path with the flute in his hand,
At the sight of him, Radha bent her head low.

"At the sight of me you have covered up 
the treasure of many kings!
Nobody called me, Radhi, but here I am --
Now tell me, Radhi, how will you get away?"
"You are m

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
mccutchion bhowmick krishnaji radha kanai radha radhi radhi kanai ayan ghosh ve jamuna jamuna brisha bhanu jamuna kajal radha kanai radha radha radha
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Ras Lila
Translation Source: McCutchion and Bhowmick

Text:
On the orders of Krishna, Shyama played
the flute beneath the tree.
And when they heard it, all the Braja gopis
could not hold in their hearts.
Some dance, some sing, and some call out, "Hari"
The devotees are rolling about for sheer joy.
The Lord has tied on his turban, and on it
fixed a peacock's feather,
Round the top wound necklaces of various kinds of
Braja jasmin flowers,
And on top of all the jasmin flowers placed a bakul flower.
Golden anklets ring out on his feet.

The Lord went silently to Bidur's 

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
mccutchion bhowmick shyama braja hari braja bakul bidur durjyodhan bidur ve bidur bidur bidur vaisnava bidur hari mathura kangsa yama paban gobinda garur dwaraka mahadeva hari panchanan tripurari kangsa devaki devaki devaki rai patua govinda bapu kangsa bhadra govinda devaki narayana vasuki patal jamuna kalindi narayana devaki kangsa durga himalay gokul putana jasoda sakhi hari putana ve putana birat putana aghasur bakasur kesari pramila shandasur damodar josoda balaram josoda saket kanai basu jamal arjuna janardan arjuna radhika jamuna govinda kadam radhika kanai radha sakhi sakhi barai barai radha narayan nidhuban ayan ghosh srimati murali kalidaha thakurali radha jasoda braja garur garur patal madan mohan brindaban brindaban radha akrur gokul
--------------------------------------------------
orginal file contents
------------------------

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
mccutchion bhowmick harbar garbarh madan moka rahati ripot sipot kalpot mejor sejor daktar kalektar rahati dandikini podbasan sadagarghat balur ghatanimko bilat tipu nimko chandrasekhar ghosh bata bagri nitai bata daymal harinbarim kishor chatraganj naldanga patharghat
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Srimanta Masan
Translation Source: McCutchion and Bhowmick

Text:
Durga, Durga, Mother Tara, luxuriator in sorrows.
Unconquerable Kali of the South, the daughter of Nagendra!
Ten-armed Mother Chandi on guard in ten directions --
Her third eye glitters brightly in the center of her forehead,
And (her children) Lakshmi, Saraswati, Kartik and Ganesh.
The lion, the asura, Jaya and Bijaya, accompany the Mother.

One day she showed her kindness to Kalketu

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
srimanta masan mccutchion bhowmick durga durga nagendra saraswati kartik ganesh asura jaya bijaya kalketu kalketu gujarat phullara dhanapati srimanta khullana srimanta durga durga srimanta durga durga magara ratnamala ratnamala salban srimanta salban salban srimanta salban jala bhagavati srimanta srimanta durga srimanta salban durga durga
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Steamer Song
Translation Source: McCutchion and Bhowmick

Text:
The steamer for Kakdwip
Carries men across.
It was filled up with passengers at the Petua Ghat
In the year 1341
On the 24th day of Jyestha --
It was a Thursday.

In Contai subdivision there had been music for a wedding:
From the village of Buia Balasi came the bride and bridegroom.

Taking on thirty seven cows,
The s

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
swadhinata mccutchion bhowmick 1947
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Bin Laden - 11th September
Translation Source: Wesleyan University
Artist: Hazra Chitrakar

Text:
The US plane attacks—11th September

The plane attacks on US are a weird incident. George Bush tells Laden what a mystery is this?  There can be no greater harm than what you have done. Oh God tell me how I can erase this sorrow! The weird incident of the plane attacks.
Laden tells all his men—listen. My nemesis is approaching. I can’t stay here any more.
I am being blamed by everyone. Oh Khoda tell me why.
The plane attacks on US are a weird incident.
George Bush appeals to other Westerners to help him catch Laden. Some say it’s Ramji Singh, others think of Abu Salem. Yet others th

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
hazra chitrakar durga 10 bhabani jaya bijoya laksmi kartik saraswati ganesha asura kalketu dalim kalketu gujarat khullana srimanta durga durga srimanta kamini shalibahan srimanta kamini srimanta srimnata kalidaha srimanta durga kailash durga 18 hazra chitrakar naya pingla midnapur
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Chandi Mangal
Translation Source: Wesleyan University
Artist:  Chitrakar

Text:
Durga, Durga, Tara oh mother, the remover of distress
Hard to vanquish Dakshina Kali, the daughter of the king of mountains (Himalaya)
Lakshmi and Saraswati are on the left. Kartik, Ganesh, the lion, the Demon, Jaya and Bijoya (the two friends—sakhi) are with the mother.
One day mother Durga was very pleased. She showed the jewels under the pomegranate tree.


--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
chitrakar durga durga dakshina saraswati kartik ganesh jaya bijoya sakhi durga kalketu dalim gujarat 14 srimanta khullana srimanta durga bhabani magra srimanta kamini kamini ganesha srimanta srimanta ratnamala dhamsa shalibahan srimanta srimanta shalibahan kalidaha kamini bhagabati srimanta srimanta durga 18 srimanta shalibhan meena chitrakar naya pingla midnapur
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Communal Harmony
Translation Source: Wesleyan University
Artist: Monimala Chitrakar

Text:
We are the human race—children of the same mother.

Some are Hindus, some Muslims all from the same mother.

We are the human race—children of the same mother.

Some are Hindus, some Muslims all from the same mother

Adam mother conceived and Habil and Kabil were bo

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
gujrat swarna chitrakar gujarat gujarat khoda gujarat khoda khoda khoda khoda khoda khoda khoda khoda khoda
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Krishna Lila
Translation Source: Wesleyan University
Artist: Jamuna Chitrakar

Text:
As soon as Krishna started playing on his flute under a tree, all the women of Braja were affected.

Some danced, some sang, some called to Hari. And Radhika the dreamy one rolled on the ground.

Radha went to bring water with a rolling gait, her sari covering her head. Krishna showed up on the banks. Radha pulled her veil closer when she saw him. You are my nephew by marriage, I happen to be your aunt. How can you tease me so, shameless Kanai?

Let Ayan Ghosh (Radha’s husband) come back; I’ll break your head and tear your h

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
11 lutfa chitrakar 5
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Bin Laden – 11 September (Laden Story)
Translation Source: Wesleyan University
Artist: Mayna Chitrakar

Text:
11th September, the story of American Center’s destruction.

O the strange story of the plane attack in America. Repeat.
Everybody was shocked, saying—we haven’t heard something like this ever.
A fighter plane came and hit a 110 story building and destroyed it. All who were there perished, burning alive none of them survived.
O the strange story of the plane attack in America.
Many people worked in the building. They were killed in the fire, with god’s name on their lips. What sins had they committed o god, you tell us.
O the strange story of the plane attack in America.
Mothers cry, a

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
11 swarna chitrakar 11 wtc wtc sujoy ajay wtc ajay 15th sonarpur ajay 11th sujoy
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Manasha Mangal
Translation Source: Wesleyan University
Artist: Jamuna Chitrakar

Text:
Jai Manasa, the fairest one, Jai Bishahari (the remover of poison), Manasa, the virgin, who was born in a lotus. Her ornamental bed was made of snakes; so was her throne. Mangala, seated on the back of Barari. The merchant (Chand Sadagar) threatens and roars, twisting his beard, with a hental (hard wood) stick on his shoulder. “If fate allows me to get access to you (Manasa) I’ll chop you to pieces.”

Manasa heard the abuse herself and conspired to kill the six sons of Chand. Their wives became widows, all six of them. They couldn’t have any childre

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
manasha jamuna chitrakar manasa bishahari manasa mangala barari chand sadagar hental manasa manasa chand lakhindar nichhani nagar behula amulya benani janardan lakhindar chand lakhindar kalinat nagini lakhindar lakhindar 2 5 chand panta behula manasa sindur behula lakhindar
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Sita, Ramayana
Translation Source: Wesleyan University
Artist: Swarna Chitrakar

Text:
Ram Prince of Ayodhya and hero of the Ramayana, he is depicted as a human hero and as an avatara or incarnation of Vishnu, usually counted as the seventh incarnation in a list of ten. He married Sita and was to assume the throne but, due to palace intrigue, was exiled to the forest, where he was accompanied by his wife and younger brother Lakshmana. There the

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
ramayana swarna chitrakar ayodhya ramayana lakshmana ravana janaki lakshman lakhman panchabati lakhsman lakhsman shurpanakha lakshman ravana ravana marich panchabati lakshman marich chandramukhi lakkshman lakkshman laksman sumitra
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Santal Origins
Translation Source: Wesleyan University
Artist: Lutfa Chitrakar

Text:
In the beginning, with the blessings of Jagannath, Balaram, and Subhadra the crab, turtle, snake, and fish went to Hades to collect soil. Then three cows were produced by Mother Earth.

The cows were Ayen Gai, Rayengai, and Kapila Gai. From their saliva, two birds came. From the eggs of the birds, two humans were born, a man and a woman—Pilchu Haram and Pilchu Buri.

They had seven boys. Pilchu Haram an

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
chitrakar muri gadaburo
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Satya Pir
Translation Source: Wesleyan University
Artist: Jamuna Chitrakar

Text:
Where are you o Satya Pir; I’m in your hands. Who else but you can save us from shame?
You are Narayan for the Hindus, Pir for the Momins
Satya Pir says—I’ll take the name Satya (truth) and establish my identity to the king of Sindhu (Sind?)
having thought thus Satya Pir went to the kingdom disguised as mendicant (fakir).
He tells the king—you’ll have a child by my blessings if you worship me.
The king asked—what are the things that are required to worship you? The fakir said—let me tell you first.
You must give me a house and tie a cow and her calf there.
The king and queen promised to do it. Some time later 

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
satya jamuna chitrakar satya narayan satya satya sindhu satya sinni ve sinni satya sinni sanatan mondol
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Satya Pir
Translation Source: Wesleyan University
Artist: Rani Chitrakar

Text:
Arise oh Baba—Saheb Satya pir.
Where are you—Satya pir, I have taken shelter in you.
Who else will cover our shame?
Arise oh Baba. Arise oh Baba, oh Saheb Satya Pir.
Baba has clogs on his feet; shackles round his waist, with a stick in his hand.Arise oh Baba—Saheb Satya pir.
Where are you—Satya pir, I have taken shelter in you.
Who else will cover our shame?

Arise oh Baba.
Baba, you are Narayan for the Hindus, Pir for the Muslims. You are great, having sinni from both communities.

Arise oh Baba—Saheb Satya pir.
Where are you—Satya 

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
satya chitrakar saheb satya satya saheb satya saheb satya satya narayan sinni saheb satya satya saheb satya satya sindhu saheb satya satya satyapir sindhu saheb satya satya saheb satya satya saheb satya satya saheb satya satya saheb satya satya saheb satya satya saheb satya satya shinni saheb satya satya satya saheb satya satya shinni saheb satya satya saheb satya satya astana shinni saheb satya satya sanatan mandal saheb satya satya saheb satya satya saheb satya satya saheb satya satya narayan saheb satya satya
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: The Abduction of Sita
Translation Source: Wesleyan University
Artist: Hazra Chitrakar

Text:
Ram  was married with all the rituals. He had to go to the forest to honour his father’s promise. Ram went ahead

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
hazra chitrakar janaki lakshman lakhman panchabati lakhsman lakhsman shurpanakha lakshman ravana ravana marich panchabati lakshman marich chandramukhi lakkshman lakkshman lakkshman sumitra laksshman ravana laksmi ravana ravana lanka jatayu dasharath ravana jatayu ravana
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: The Girl Child
Translation Source: Wesleyan University
Artist: Rani Chitrakar

Text:
Listen to me all of you, listen carefully.
Let me talk about girl children. Pay attention please.

Parents are sad at the birth of girl children. O uncontrollable mind, girl children are neglected. Listen to me all of you, listen carefully.

Why should the little flowers be neglected, close to her mother’s heart?
Listen to me all of you, listen carefully.

A little

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
swarna chitrakar matangini hajra
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Tsunami
Translation Source: Wesleyan University
Artist: Hazra Chitrakar

Text:
Tsunami the destruction of everything took so many lives, took so many lives.
So many people drowned; there is no record of how many died.
So many houses went under water, nobody knows how many.
Tsunami, the destruction, took so many lives, took so many lives.

Dayal, it took lives.
Many journalists were trying to get news. They came in groups to help and ended up as saviors, providing courage.
Tsunami the destruction of everything took so many lives, took so many lives.
Birds and beasts cried and said oh Khoda how can we survive?
Tsunami the destruction of everything took so many lives, took so many liv

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
snehalata chitrakar khela lanka 12
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: The Victimization of Women
Translation Source: Wesleyan University
Artist: Rani Chitrakar

Text:
Listen to me everyone, listen carefully. Let me speak about the murder of women.
Scientists have invented such a machine that everything inside a pregnant woman can be seen. If it is a baby girl they suck her out and kill her. Parents are unhappy if a girl child is born. They don’t send her to school, but to the kitchen.

Women work beside their husbands on the fields. They come back home and do all the housework. There is no peace in a woman’s life. So many women are killed for dowry demands. Oh destiny, women are put on the funeral pyre with their husbands.

Women fight for freedom.

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
swarna chitrakar kalipada ranaghat nadia kalipada haripada durga kalipada bardhaman durga baruipur ushadidi rajen baruipur kalipada rajen 25 000 rajen 5000 kalipada usha rajen durga 5000 kalipada durga
--------------------------------------------------
orginal file contents
--------------------------------------------------
Name: Wedding of the fish
Translation Source: Wesleyan University
Artist: Meena Chitrakar

Text:
Tangra fish says—I’ll be the ear ring Rangila. Oh Rangila, I am going to arrange the marriage of the fishes today.
Pankal fish says—I’ll be the necklace Rangila. Oh Rangila, I am going to arrange the marriage of the fishes today.
Chanda fish (pomfret) says—I’ll be the nose ring Rangila. Oh Rangila, I am going to arrange the marriage of the fishes today.
The eel says—I’ll be the waist band Rangila. Oh Rangila, I am going to arr

--------------------------------------------------
The string with all non-eng words identified
--------------------------------------------------
rukmini chitrakar rangila daria rangila daria katla rangila rangila daria taroi shehnai rangila rangila daria shol rangila rangila daria magur rangila rangila daria singi rangila rangila daria chanda rangila rangila daria rangila rangila daria rangila rangila daria rangila rangila daria boyal rangila rangila daria
--------------------------------------------------
The time of execution of mapper program is : 9.994345903396606


## Defining and showcasing intermediate Space
    #### -generated as ouput of each of the mapper calls

In [8]:
#defining intermediate space as the collection of key value pairs in the
#final_dict dictionary
intermediate_space= final_dict
print("intermediate_space\n",intermediate_space )

intermediate_space
 {'C:\\Users\\ravie\\McCutchionAndBhowmick_Songs\\BehulaBhasan.txt': 'behula bhasan mccutchion bhowmick manasa gauri bishahari mangala chand sadagar hetal bishahari chand lakhindar chanpainagar amulya champainagar behula janardhan lakhindar chand sadagar lakhindar kalini lakhindar lakhindar chandra kalini lakhindar lakhindar nera sadagar manasa benana vegitarian champatala beltala gadaghat gadadhar ganguri rui phalaria gada gada manasa manasa gada gadaghat baidyaghat chingridaha boaldaha tamlukghat nitai tamluk dhona tamluk nitai kanchra behula behula behula manasa chand sadagar jata', 'C:\\Users\\ravie\\McCutchionAndBhowmick_Songs\\Emancipated.txt': 'mccutchion bhowmick pitri', 'C:\\Users\\ravie\\McCutchionAndBhowmick_Songs\\EngrejerPat.txt': 'mccutchion bhowmick harbar garbar chhuchhika puchinga haro bachha korar gunju hazari tuslika ramprasad nitai daraga chingu rosagalla burdwan chhuchikka puchinga haro bachha korar gunju hazari tuslika chingubabu ramprasad phans

## Calling reducer using the intermediate space contents as input

#### using reducer output to create the final output with file and non-eng-word-count as key, value pairs of output_dict dictionary

In [9]:


#defining output dictionary that will store the key value pairs
#for file_names (folder wise) as key and total non_eng_word_count as value
output_dict= {}
start1 = time.time()
#looping over each of the key and value pairs in the itermediate space:
for key, value in intermediate_space.items():
    
    file_name= key.replace('C:\\Users\\ravie\\', '')
    non_eng_word_count= reducer_func(value)
    output_dict[file_name]= non_eng_word_count
    
end1 = time.time()

print("The time of execution of reducer program is :", end1-start1)

The time of execution of reducer program is : 0.0


# showcasing final output

In [10]:
output_dict

{'McCutchionAndBhowmick_Songs\\BehulaBhasan.txt': 65,
 'McCutchionAndBhowmick_Songs\\Emancipated.txt': 3,
 'McCutchionAndBhowmick_Songs\\EngrejerPat.txt': 29,
 'McCutchionAndBhowmick_Songs\\FrenchRevolution.txt': 6,
 'McCutchionAndBhowmick_Songs\\GaziPat.txt': 18,
 'McCutchionAndBhowmick_Songs\\JagannathPat.txt': 38,
 'McCutchionAndBhowmick_Songs\\KrishnaLila.txt': 23,
 'McCutchionAndBhowmick_Songs\\RasLila.txt': 109,
 'McCutchionAndBhowmick_Songs\\SahibPat.txt': 35,
 'McCutchionAndBhowmick_Songs\\SrimantaMasan.txt': 44,
 'McCutchionAndBhowmick_Songs\\SteamerSong.txt': 13,
 'McCutchionAndBhowmick_Songs\\SwadhinataSong.txt': 4,
 'Weslyan_Songs\\BinLaden.txt': 7,
 'Weslyan_Songs\\ChandiMangal_v1.txt': 38,
 'Weslyan_Songs\\ChandiMangal_v2.txt': 47,
 'Weslyan_Songs\\CommunalHarmony.txt': 8,
 'Weslyan_Songs\\GujratRiots.txt': 16,
 'Weslyan_Songs\\KrishnaLila.txt': 22,
 'Weslyan_Songs\\LadenStory.txt': 4,
 'Weslyan_Songs\\LadenStory_01a.txt': 5,
 'Weslyan_Songs\\LadenStory_02a.txt': 15,
 'We

### formatting the output as a table

In [11]:
# formatting the output as a table
print ("{:<60} {:<10} ".format('FILE_NAME', 'COUNT_NON_ENG_'))
 
# print each data item.
for key, value in output_dict.items():
    file= key
    count= value
    print ("{:<60} {:<10}".format(file, count))

FILE_NAME                                                    COUNT_NON_ENG_ 
McCutchionAndBhowmick_Songs\BehulaBhasan.txt                 65        
McCutchionAndBhowmick_Songs\Emancipated.txt                  3         
McCutchionAndBhowmick_Songs\EngrejerPat.txt                  29        
McCutchionAndBhowmick_Songs\FrenchRevolution.txt             6         
McCutchionAndBhowmick_Songs\GaziPat.txt                      18        
McCutchionAndBhowmick_Songs\JagannathPat.txt                 38        
McCutchionAndBhowmick_Songs\KrishnaLila.txt                  23        
McCutchionAndBhowmick_Songs\RasLila.txt                      109       
McCutchionAndBhowmick_Songs\SahibPat.txt                     35        
McCutchionAndBhowmick_Songs\SrimantaMasan.txt                44        
McCutchionAndBhowmick_Songs\SteamerSong.txt                  13        
McCutchionAndBhowmick_Songs\SwadhinataSong.txt               4         
Weslyan_Songs\BinLaden.txt                                 

## exporting the output to a .csv file as ouput

In [12]:
#exporting the dictionary to a .csv file as ouput

#converting the dictionary to a dataframe first
df = pd.DataFrame.from_dict(output_dict, orient="index")

#saving the dataframe as .csv file
df.to_csv('output.csv')