# **Map Reduce On Titanic Database**

### The following notebooks explains the use of MapReduce and the questions it asks about the data.

In [1]:
from pymongo import MongoClient
client = MongoClient()

In [2]:
client = MongoClient('localhost', 27017)
db = client.MiniProject
titanic = db.Titanic

In [3]:
a = titanic.find_one()    
a

{'_id': ObjectId('5f59bcc667c280168a424a25'),
 'PassengerId': 2,
 'Survived': 1,
 'Pclass': 1,
 'Name': 'Cumings, Mrs. John Bradley (Florence Briggs Thayer)',
 'Sex': 'female',
 'Age': 38,
 'SibSp': 1,
 'Parch': 0,
 'Ticket': 'PC 17599',
 'Fare': 71.2833,
 'Cabin': 'C85',
 'Embarked': 'C'}

In [4]:
# importing the BSON library to implement the Javascript Map Reduce in Python
from bson.code import Code

In [5]:
#The count of people in each ticket class
mapFunc = Code("function(){\
                emit(this.Pclass,{count: 1});\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 1.0, 'value': {'count': 216.0}}
{'_id': 2.0, 'value': {'count': 184.0}}
{'_id': 3.0, 'value': {'count': 491.0}}


In [6]:
#The count of people from different embarkation(port of boarding the ship)
mapFunc = Code("function(){\
                emit(this.Embarked,{count: 1});\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': None, 'value': {'count': 2.0}}
{'_id': 'C', 'value': {'count': 168.0}}
{'_id': 'Q', 'value': {'count': 77.0}}
{'_id': 'S', 'value': {'count': 644.0}}


In [7]:
#The total Fare collect from people in first class
mapFunc = Code("function(){\
                    key = this.Pclass;\
                    val = this.Fare;\
                    emit(key,{count: val});\
                    }");
reduceFunc = Code("function(key, values){\
                    var sum = 0;\
                    values.forEach(function(value){\
                        sum += value['count']});\
                    return {count: sum}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 1.0, 'value': {'count': 18177.412499999984}}
{'_id': 2.0, 'value': {'count': 3801.8416999999995}}
{'_id': 3.0, 'value': {'count': 6714.695100000003}}


In [8]:
#The total fare collected from people at different ports of embarkation
mapFunc = Code("function(){\
                    key = this.Embarked;\
                    val = this.Fare;\
                    emit(key,{count: val});}");
reduceFunc = Code("function(key, values){\
                    var sum = 0;\
                    values.forEach(function(value){\
                        sum += value['count']});\
                    return {count: sum}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': None, 'value': {'count': 160.0}}
{'_id': 'C', 'value': {'count': 10072.2962}}
{'_id': 'Q', 'value': {'count': 1022.2543000000001}}
{'_id': 'S', 'value': {'count': 17439.398799999963}}


In [9]:
#The total fare summed over the survivors and the deceased
mapFunc = Code("function(){\
                    key = this.Survived;\
                    val = this.Fare;\
                    emit(key,{count: val});}");
reduceFunc = Code("function(key, values){\
                    var sum = 0;\
                    values.forEach(function(value){\
                        sum += value['count']});\
                    return {count: sum}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 0.0, 'value': {'count': 12142.719899999987}}
{'_id': 1.0, 'value': {'count': 16551.2294}}


In [10]:
# The total count of survivors and deceased
mapFunc = Code("function(){\
                emit(this.Survived,{count: 1});\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 0.0, 'value': {'count': 549.0}}
{'_id': 1.0, 'value': {'count': 342.0}}


In [12]:
# The total number of survivors groups by their gender
mapFunc = Code("function(){\
                key = this.Sex;\
                if(this.Survived == 1){\
                emit(this.Sex,{count: 1});}\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 'female', 'value': {'count': 233.0}}
{'_id': 'male', 'value': {'count': 109.0}}


In [13]:
#The total number of deceased grouped by their genders
mapFunc = Code("function(){\
                key = this.Sex;\
                if(this.Survived == 0){\
                emit(this.Sex,{count: 1});}\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 'female', 'value': {'count': 81.0}}
{'_id': 'male', 'value': {'count': 468.0}}


In [18]:
#The total number of children grouped by their genders
mapFunc = Code("function(){\
                key = this.Sex;\
                if(this.Age < 18){\
                emit(key,{count: 1});}\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 'female', 'value': {'count': 55.0}}
{'_id': 'male', 'value': {'count': 58.0}}


In [23]:
#The total number of survivors and deceased boys
mapFunc = Code("function(){\
                key = this.Survived;\
                if(this.Age < 18 && this.Sex == 'male'){\
                emit(key,{count: 1});}\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 0.0, 'value': {'count': 35.0}}
{'_id': 1.0, 'value': {'count': 23.0}}


In [24]:
#The total number of survivors and deceased girls
mapFunc = Code("function(){\
                key = this.Survived;\
                if(this.Age < 18 && this.Sex == 'female'){\
                emit(key,{count: 1});}\
                }");
reduceFunc = Code("function(key, values){\
                        var total = 0;\
                        values.forEach(function(val){\
                            total += val['count']});\
                        return {count: total}};")
res = titanic.map_reduce(mapFunc, reduceFunc, 'myresults')
for doc in res.find():
    print(doc)

{'_id': 0.0, 'value': {'count': 17.0}}
{'_id': 1.0, 'value': {'count': 38.0}}
