# 1. main()

In [1]:
import pymongo
import pandas as pd

### 1. connecting to client

In [2]:
client = pymongo.MongoClient('localhost:27017')
print(client)

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)


### 2. set database and collection name

In [15]:
databaseName = 'bankDB'
collectionName = 'customer'

### 3. list available databases

In [21]:
print(client.list_database_names())

['ECommerce', 'admin', 'bankDB', 'config', 'local', 'pymongo_db', 'restaurants']


### 4. set db

In [22]:
db = client[databaseName]
print(db)

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'bankDB')


### 5. list all collections in the database

In [23]:
print(db.list_collection_names())

['customer']


### 6. set collection

In [24]:
collection = db[collectionName]
print(collection)

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'bankDB'), 'customer')


# 2. Loading data from JSON to mongoDB

In [25]:
fileLocation = r'C:\Users\Administrator\Downloads\bank_edited.json'

bank_df = pd.read_json(fileLocation)

In [9]:
bank_df

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,technician,married,tertiary,no,825,no,no,cellular,17,nov,977,3,-1,0,unknown,yes
45207,71,retired,divorced,primary,no,1729,no,no,cellular,17,nov,456,2,-1,0,unknown,yes
45208,72,retired,married,secondary,no,5715,no,no,cellular,17,nov,1127,5,184,3,success,yes
45209,57,blue-collar,married,secondary,no,668,no,no,telephone,17,nov,508,4,-1,0,unknown,no


In [26]:
# collection.insert_many(data)
# read line by line and convert into dictionary

records = bank_df.to_dict(orient='records')
if records:
    collection.insert_many(records)

# 3. Questions

## 3.1 Give marketing success rate (No. of people subscribed / total no. of entries) 

In [27]:
# countSubscribed = collection.count_documents({'y': 'yes'})
# totalEntries = collection.count_documents({})
# if totalEntries != 0:
#     print(f"Market success rate = {round(countSubscribed*100/totalEntries, 3)}%")
# else: 
#     print("Empty database")



totalEntries = list(collection.aggregate([{'$count': 'totalEntries'}]))[0]['totalEntries']
countSubscribed = list(collection.aggregate([{"$match": {"y": "yes"}}, {'$count': 'countSubscribed'}]))[0]['countSubscribed']

print(f"Market success rate = {round(countSubscribed*100/totalEntries, 3)}%")

Market success rate = 11.698%


In [12]:
# above thing in mongodb
# we combined two aggregate operations together using $facet

# db.customer.aggregate([
#     {
#         $facet: {
#             totalCount: [{$count: 'total'}],
#             subscribedCount: [{$match: {y: 'yes'}}, {$count: 'subscribed'}]
#         }
#     },
#     {$project: {
#         totalCount: {$arrayElemAt:['$totalCount.total', 0]}, 
#         subscribedCount: {$arrayElemAt:['$subscribedCount.subscribed', 0]},
#         successRate: {$divide: [{$arrayElemAt:['$subscribedCount.subscribed', 0]}, {$arrayElemAt:['$totalCount.total', 0]}]}
#     }}
# ])

## 3.2 Give marketing failure rate

In [28]:
# countNotSubscribed = collection.count_documents({'y': 'no'})
# totalEntries = collection.count_documents({})
# if totalEntries != 0:
#     print(f"Market failure rate = {round(countNotSubscribed*100/totalEntries, 3)}%")
# else: 
#     print("Empty database")


# countNotSubscribed = list(collection.aggregate([{"$match": {"y": "no"}}, {'$count': 'countNotSubscribed'}]))[0]['countNotSubscribed']
# print(f"Market failure rate = {round(countNotSubscribed*100/totalEntries, 3)}%")




result = collection.aggregate([
    {
        '$facet': {
            'totalCount': [{'$count': 'total'}],
            'subscribedCount': [{'$match': {'y': 'no'}}, {'$count': 'notsubscribed'}]
        }
    },
    {
        '$project': {
            'totalCount': {'$arrayElemAt':['$totalCount.total', 0]}, 
            'subscribedCount': {'$arrayElemAt':['$subscribedCount.notsubscribed', 0]},
            'failureRate': {'$divide': [{'$arrayElemAt':['$subscribedCount.notsubscribed', 0]}, {'$arrayElemAt':['$totalCount.total', 0]}]}
        }
    }
])

print(list(result))

[{'totalCount': 45211, 'subscribedCount': 39922, 'failureRate': 0.8830151954170445}]


## 3.3 Give the maximum, mean, median, minimum age of the average targeted customer.

In [50]:
list(
    db.customer.aggregate(
        [
            {'$group': {'_id':'null', 'ages':{'$push':'$age'}}}, 
            {'$project': {
                '_id': 0, 
                'avgAge': {'$avg':"$ages"}, 
                'minAge': {'$min':'$ages'}, 
                'maxAge': {'$max':'$ages'}, 
                'medianAge': {'$median': {'input':'$ages', 'method':'approximate'}}
            }
            },
        ]
    )
)

[{'avgAge': 40.93621021432837, 'minAge': 18, 'maxAge': 95, 'medianAge': 39.0}]

## 3.4 Check if age matters in marketing subscription for deposit

In [29]:
list(
    collection.aggregate([
      {'$match': {'y': 'yes'}},
      {'$group': {'_id': '$age', 'count': {'$sum': 1}}},
      {'$sort': {'count':-1}}
    ]) 
)

[{'_id': 32, 'count': 221},
 {'_id': 30, 'count': 217},
 {'_id': 33, 'count': 210},
 {'_id': 35, 'count': 209},
 {'_id': 31, 'count': 206},
 {'_id': 34, 'count': 198},
 {'_id': 36, 'count': 195},
 {'_id': 29, 'count': 171},
 {'_id': 37, 'count': 170},
 {'_id': 28, 'count': 162},
 {'_id': 38, 'count': 144},
 {'_id': 39, 'count': 143},
 {'_id': 27, 'count': 141},
 {'_id': 26, 'count': 134},
 {'_id': 41, 'count': 120},
 {'_id': 46, 'count': 118},
 {'_id': 40, 'count': 116},
 {'_id': 47, 'count': 113},
 {'_id': 25, 'count': 113},
 {'_id': 42, 'count': 111},
 {'_id': 45, 'count': 106},
 {'_id': 43, 'count': 103},
 {'_id': 49, 'count': 101},
 {'_id': 60, 'count': 98},
 {'_id': 44, 'count': 93},
 {'_id': 59, 'count': 88},
 {'_id': 53, 'count': 85},
 {'_id': 52, 'count': 85},
 {'_id': 54, 'count': 84},
 {'_id': 48, 'count': 82},
 {'_id': 57, 'count': 78},
 {'_id': 51, 'count': 77},
 {'_id': 55, 'count': 76},
 {'_id': 58, 'count': 72},
 {'_id': 50, 'count': 72},
 {'_id': 24, 'count': 68},
 {'_i

## 3.5 Check if marital status mattered for a subscription to deposit

In [30]:
list(
    collection.aggregate([
      {'$match': {'y': 'yes'}},
      {'$group': {'_id': '$marital', 'count': {'$sum': 1}}},
      {'$sort': {'count':-1}}
    ]) 
)

[{'_id': 'married', 'count': 2755},
 {'_id': 'single', 'count': 1912},
 {'_id': 'divorced', 'count': 622}]

## 3.6 Check if age and marital status together mattered for a subscription to deposit scheme

In [31]:
list(
    db.customer.aggregate([
      {'$match': {'y': 'yes'}},
      {'$group': {'_id': {'marital':'$marital', 'age':'$age'}, 'count': {'$sum': 1}}},
      {'$sort': {'count':-1}}
    ])
)

[{'_id': {'marital': 'single', 'age': 30}, 'count': 151},
 {'_id': {'marital': 'single', 'age': 28}, 'count': 138},
 {'_id': {'marital': 'single', 'age': 29}, 'count': 133},
 {'_id': {'marital': 'single', 'age': 32}, 'count': 124},
 {'_id': {'marital': 'single', 'age': 26}, 'count': 121},
 {'_id': {'marital': 'married', 'age': 34}, 'count': 118},
 {'_id': {'marital': 'single', 'age': 31}, 'count': 111},
 {'_id': {'marital': 'single', 'age': 27}, 'count': 110},
 {'_id': {'marital': 'married', 'age': 35}, 'count': 101},
 {'_id': {'marital': 'married', 'age': 36}, 'count': 100},
 {'_id': {'marital': 'single', 'age': 25}, 'count': 99},
 {'_id': {'marital': 'married', 'age': 37}, 'count': 98},
 {'_id': {'marital': 'single', 'age': 33}, 'count': 97},
 {'_id': {'marital': 'married', 'age': 33}, 'count': 97},
 {'_id': {'marital': 'married', 'age': 39}, 'count': 87},
 {'_id': {'marital': 'married', 'age': 32}, 'count': 87},
 {'_id': {'marital': 'married', 'age': 38}, 'count': 86},
 {'_id': {'ma

## 3.7 Find All Records Where Housing Loan is Approved

In [35]:
list(db.customer.find({'housing': 'yes'}))

[{'_id': ObjectId('66c561a994610b691202b35c'),
  'age': 58,
  'job': 'management',
  'marital': 'married',
  'education': 'tertiary',
  'default': 'no',
  'balance': 2143,
  'housing': 'yes',
  'loan': 'no',
  'contact': 'unknown',
  'day': 5,
  'month': 'may',
  'duration': 261,
  'campaign': 1,
  'pdays': -1,
  'previous': 0,
  'poutcome': 'unknown',
  'y': 'no'},
 {'_id': ObjectId('66c561a994610b691202b35d'),
  'age': 44,
  'job': 'technician',
  'marital': 'single',
  'education': 'secondary',
  'default': 'no',
  'balance': 29,
  'housing': 'yes',
  'loan': 'no',
  'contact': 'unknown',
  'day': 5,
  'month': 'may',
  'duration': 151,
  'campaign': 1,
  'pdays': -1,
  'previous': 0,
  'poutcome': 'unknown',
  'y': 'no'},
 {'_id': ObjectId('66c561a994610b691202b35e'),
  'age': 33,
  'job': 'entrepreneur',
  'marital': 'married',
  'education': 'secondary',
  'default': 'no',
  'balance': 2,
  'housing': 'yes',
  'loan': 'yes',
  'contact': 'unknown',
  'day': 5,
  'month': 'may',
 

## 3.8 Find Records with High Account Balance

In [58]:
# ??

# list(db.customer.find({'housing': 'yes'}))
list(
    db.customer.aggregate([
        {'$sort': {'balance':-1}},
        {'$project': {'_id':1, 'age':1, 'job':1, 'balance':1}},
        {'$limit': 10}
    ])
)

[{'_id': ObjectId('66c561aa94610b6912034f91'),
  'age': 51,
  'job': 'management',
  'balance': 102127},
 {'_id': ObjectId('66c561a994610b69120319cf'),
  'age': 59,
  'job': 'management',
  'balance': 98417},
 {'_id': ObjectId('66c561aa94610b6912035cdd'),
  'age': 84,
  'job': 'retired',
  'balance': 81204},
 {'_id': ObjectId('66c561aa94610b691203599a'),
  'age': 84,
  'job': 'retired',
  'balance': 81204},
 {'_id': ObjectId('66c561aa94610b6912035639'),
  'age': 60,
  'job': 'retired',
  'balance': 71188},
 {'_id': ObjectId('66c561a994610b69120300a5'),
  'age': 56,
  'job': 'management',
  'balance': 66721},
 {'_id': ObjectId('66c561a994610b6912030624'),
  'age': 52,
  'job': 'blue-collar',
  'balance': 66653},
 {'_id': ObjectId('66c561a994610b691202ff38'),
  'age': 59,
  'job': 'admin.',
  'balance': 64343},
 {'_id': ObjectId('66c561aa94610b69120354fa'),
  'age': 32,
  'job': 'entrepreneur',
  'balance': 59649},
 {'_id': ObjectId('66c561a994610b691202e5da'),
  'age': 56,
  'job': 'blu

## 3.9 Count Records by Job Type

In [32]:
list(
    collection.aggregate([
      {'$group': {'_id': '$job', 'count': {'$sum': 1}}},
      {'$sort': {'count':-1}}
    ]) 
)

[{'_id': 'blue-collar', 'count': 9732},
 {'_id': 'management', 'count': 9458},
 {'_id': 'technician', 'count': 7597},
 {'_id': 'admin.', 'count': 5171},
 {'_id': 'services', 'count': 4154},
 {'_id': 'retired', 'count': 2264},
 {'_id': 'self-employed', 'count': 1579},
 {'_id': 'entrepreneur', 'count': 1487},
 {'_id': 'unemployed', 'count': 1303},
 {'_id': 'housemaid', 'count': 1240},
 {'_id': 'student', 'count': 938},
 {'_id': 'unknown', 'count': 288}]

## 3.10 Count Married Individuals with Secondary Education

In [59]:
list(
    db.customer.aggregate([
      {'$match': {'education': 'secondary', 'marital': 'married'}},
      {'$group': {'_id': {'marital':'married', 'education': 'secondary'}, 'count': {'$sum': 1}}},
      {'$sort': {'count':-1}}
    ])
)

[{'_id': {'marital': 'married', 'education': 'secondary'}, 'count': 13770}]

## 3.11 Calculate the Average Balance for Each Job Type

In [None]:
list(
    collections.aggregate([
        
    ])
)

## 3.12 Find the Most Common Education Level Among Those with Loans

In [None]:
list(
    collection.aggregate([
      {'$match': {'loan': 'yes'}},
      {'$group': {'_id': '$education', 'count': {'$sum': 1}}},
      {'$sort': {'count':-1}},
      {'$limit': 1}
    ])
)

## 3.13 Find Individuals with Multiple Campaign Contacts

In [63]:
len(list(
    collection.find({'campaign': {'$gt': 1}})
))

27667

## 3.14 [DUPLICATE] Determine success rate of campaign

## 3.15 Identify Anomalies in Account Balances

In [None]:
db.customer.aggregate([
    {'$group': {'_id':null, 
                'avgBalance':{'$balance'}, 
                'stdDevBalance': {'$stdDevSamp': '$balance'}},
    '$lookup': {'from':'customers',
               'pipeline': [{
                   '$project': {}
               }]}
    },
])

## 3.16 Analyze Seasonal Patterns
###### seasonality is something that happens in a particular interval
###### eg: demand of umbrella raises every monsoon season, a pandemic happens every 100 years

In [70]:
list(
    db.customer.aggregate([
        {'$group': {'_id':'$month', 'averageDuration':{'$avg':'$duration'}, 'totalCalls':{'$sum':1}}},
        {'$sort': {'totalCalls':-1, 'averageDuration':-1}}
    ])
)

[{'_id': 'may', 'averageDuration': 260.92227226500074, 'totalCalls': 13766},
 {'_id': 'jul', 'averageDuration': 267.9753444525018, 'totalCalls': 6895},
 {'_id': 'aug', 'averageDuration': 232.40211301424685, 'totalCalls': 6247},
 {'_id': 'jun', 'averageDuration': 243.0878112712975, 'totalCalls': 5341},
 {'_id': 'nov', 'averageDuration': 253.14861460957178, 'totalCalls': 3970},
 {'_id': 'apr', 'averageDuration': 298.09890859481584, 'totalCalls': 2932},
 {'_id': 'feb', 'averageDuration': 248.29822574556437, 'totalCalls': 2649},
 {'_id': 'jan', 'averageDuration': 268.2202423378475, 'totalCalls': 1403},
 {'_id': 'oct', 'averageDuration': 288.30216802168025, 'totalCalls': 738},
 {'_id': 'sep', 'averageDuration': 292.25215889464596, 'totalCalls': 579},
 {'_id': 'mar', 'averageDuration': 244.40041928721175, 'totalCalls': 477},
 {'_id': 'dec', 'averageDuration': 329.32710280373834, 'totalCalls': 214}]

## 3.17 Determine Correlation Between Loan Status and Balance

In [72]:
list(
    db.customer.aggregate([
        {'$group': {'_id':'$loan', 'avgBalance':{'$avg':'$balance'}, 'count':{'$sum':1}}}
    ])
)

[{'_id': 'no', 'avgBalance': 1474.4536307846288, 'count': 37967},
 {'_id': 'yes', 'avgBalance': 774.3099116510215, 'count': 7244}]

## 3.18 Identify the Most Common Day for Successful Campaigns

In [74]:
list(
    db.customer.aggregate([
        {'$match': {'y': 'yes'}},
        {'$group': {'_id': '$day', 'count':{'$sum':1}}},
        {'$sort': {'count': -1}},
        {'$limit': 3}
    ])
)

[{'_id': 30, 'count': 271},
 {'_id': 12, 'count': 244},
 {'_id': 13, 'count': 241}]

## 3.19 Find the Youngest Person with the Highest Account Balance

In [95]:
# below both methods are working correctly

list(
    # db.customer.aggregate([
    #     {'$sort': {'age':1, 'balance':-1}},
    #     {'$limit': 1}
    # ])
    
    db.customer.find().sort({'age':1, 'balance':-1}).limit(1)
)

[{'_id': ObjectId('66c561aa94610b691203527c'),
  'age': 18,
  'job': 'student',
  'marital': 'single',
  'education': 'primary',
  'default': 'no',
  'balance': 1944,
  'housing': 'no',
  'loan': 'no',
  'contact': 'telephone',
  'day': 10,
  'month': 'aug',
  'duration': 122,
  'campaign': 3,
  'pdays': -1,
  'previous': 0,
  'poutcome': 'unknown',
  'y': 'no'}]

## 3.20 Identify Customers with Consistently Low Balances

In [102]:
list(
    db.customer.aggregate([
        {'$match': {'balance': {'$lte':100}}},
        {'$project': {'_id':1, 'age':1, 'balance':1}}
    ])
)

[{'_id': ObjectId('66c561a994610b691202b35d'), 'age': 44, 'balance': 29},
 {'_id': ObjectId('66c561a994610b691202b35e'), 'age': 33, 'balance': 2},
 {'_id': ObjectId('66c561a994610b691202b360'), 'age': 33, 'balance': 1},
 {'_id': ObjectId('66c561a994610b691202b363'), 'age': 42, 'balance': 2},
 {'_id': ObjectId('66c561a994610b691202b368'), 'age': 53, 'balance': 6},
 {'_id': ObjectId('66c561a994610b691202b369'), 'age': 58, 'balance': 71},
 {'_id': ObjectId('66c561a994610b691202b36c'), 'age': 45, 'balance': 13},
 {'_id': ObjectId('66c561a994610b691202b36d'), 'age': 57, 'balance': 52},
 {'_id': ObjectId('66c561a994610b691202b36e'), 'age': 60, 'balance': 60},
 {'_id': ObjectId('66c561a994610b691202b36f'), 'age': 33, 'balance': 0},
 {'_id': ObjectId('66c561a994610b691202b372'), 'age': 32, 'balance': 23},
 {'_id': ObjectId('66c561a994610b691202b373'), 'age': 25, 'balance': 50},
 {'_id': ObjectId('66c561a994610b691202b374'), 'age': 40, 'balance': 0},
 {'_id': ObjectId('66c561a994610b691202b375'

## 3.21 Analyze the Relationship Between Campaign Duration and Outcome

In [103]:
db.customer.create_index({'duration':1, 'y':1})

'duration_1_y_1'

In [108]:
list(
    db.customer.aggregate([
        {'$group': {'_id':['$y'], 'avgDuration':{'$avg':'$duration'}, 'maxDuration':{'$max':'$duration'}, 'minDuration':{'$min':'$duration'}}},
        {'$project': {'_id':1, 'avgDuration':1, 'maxDuration':1, 'minDuration':1}}
    ])
)

[{'_id': ['no'],
  'avgDuration': 221.18280647262162,
  'maxDuration': 4918,
  'minDuration': 0},
 {'_id': ['yes'],
  'avgDuration': 537.2945736434109,
  'maxDuration': 3881,
  'minDuration': 8}]

## 3.22 Detect Loan Trends Among Different Job Categories

In [109]:
db.customer.create_index({'job':1, 'loan':1})

'job_1_loan_1'

In [117]:
list(
    db.customer.aggregate([
        {'$match': {'loan':'yes'}},
        {'$group': {'_id':'$job', 'avgBalance':{'$avg':'$balance'}, 'count':{'$sum':1}}},
        {'$sort': {'count':-1}}
    ])
)

[{'_id': 'blue-collar', 'avgBalance': 691.5700712589073, 'count': 1684},
 {'_id': 'technician', 'avgBalance': 745.4598930481284, 'count': 1309},
 {'_id': 'management', 'avgBalance': 965.7350359138069, 'count': 1253},
 {'_id': 'admin.', 'avgBalance': 693.7941473259334, 'count': 991},
 {'_id': 'services', 'avgBalance': 613.4425837320574, 'count': 836},
 {'_id': 'entrepreneur', 'avgBalance': 1185.7780898876404, 'count': 356},
 {'_id': 'retired', 'avgBalance': 887.0388349514564, 'count': 309},
 {'_id': 'self-employed', 'avgBalance': 745.1397379912664, 'count': 229},
 {'_id': 'housemaid', 'avgBalance': 793.9407894736842, 'count': 152},
 {'_id': 'unemployed', 'avgBalance': 509.5045871559633, 'count': 109},
 {'_id': 'student', 'avgBalance': 1101.8333333333333, 'count': 12},
 {'_id': 'unknown', 'avgBalance': 482.5, 'count': 4}]

In [None]:
# insight: people with loans have lower profile job

## 3.23 Identify the Most Common Outcomes for Married Individuals

In [121]:
list(
    db.customer.aggregate([
        {'$match': {'marital':'married'}},
        {'$group': {'_id':'$y', 'count':{'$sum':1}}},
    ])
)

# insight: for married individuals, outcome is yes for  2755 individuals and no for 24459 individuals.

[{'_id': 'no', 'count': 24459}, {'_id': 'yes', 'count': 2755}]

## 3.24 Find the Distribution of Account Balances Across Different Education Levels

In [123]:
list(
    db.customer.aggregate([
        {'$group': {'_id':'$education', 'avgBalance':{'$avg':'$balance'}, 'count':{'$sum':1}}}
    ])
)

[{'_id': 'primary', 'avgBalance': 1250.9499343161583, 'count': 6851},
 {'_id': 'tertiary', 'avgBalance': 1758.4164348545223, 'count': 13301},
 {'_id': 'unknown', 'avgBalance': 1526.7544426494346, 'count': 1857},
 {'_id': 'secondary', 'avgBalance': 1154.880786139126, 'count': 23202}]

## 3.25 [DUPLICATE] How are account balances distributed among different education levels?

## 3.26 Find ranking of Customers based on Account Balance.