In [1]:
import pandas as pd

In [2]:
FILE_PATH = 'data_sets/bank.csv' 
bank_df = pd.read_csv(FILE_PATH, sep=";")

In [3]:
bank_df

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,33,services,married,secondary,no,-333,yes,no,cellular,30,jul,329,5,-1,0,unknown,no
4517,57,self-employed,married,tertiary,yes,-3313,yes,yes,unknown,9,may,153,1,-1,0,unknown,no
4518,57,technician,married,secondary,no,295,no,no,cellular,19,aug,151,11,-1,0,unknown,no
4519,28,blue-collar,married,secondary,no,1137,no,no,cellular,6,feb,129,4,211,3,other,no


## 1) How many campaign available in this dataset?

In [4]:
campaign = bank_df['campaign'].unique()
available_campaigns = len(campaign)
print("There are {} campaigns available.".format(available_campaigns))

There are 32 campaigns available.


## 2) How many users do we have with housing and personal loan?

In [5]:
condition = (bank_df['housing'] == 'yes') & (bank_df['loan'] == 'yes')
user_count = len(bank_df.loc[condition])
print("There {} users, who have both housing and personal loan.".format(user_count))

There 406 users, who have both housing and personal loan.


## 3) How many person do we have whose age is 60+ ?

In [6]:
condition = bank_df['age'] > 60
user_count = bank_df.loc[condition, 'age'].count()
print("There {} users, who are 60+.".format(user_count))

There 127 users, who are 60+.


## 4) In which month we have targeted most of the customer?

In [7]:
data_existing_months = bank_df['month'].unique()
max_customer = 0
month_of_max_customer = ''
for month in  data_existing_months:
    condition = bank_df['month'] == month
    customers_count_per_month = len(bank_df.loc[condition])
    if customers_count_per_month > max_customer:
        max_customer = customers_count_per_month
        month_of_max_customer = month
print("On the month of `{}` most of customers had targeted, and there are {} customers are targeted".format(month_of_max_customer.upper(), max_customer))          

On the month of `MAY` most of customers had targeted, and there are 1398 customers are targeted


## 5) Which mode of call is giving you more result?

In [8]:
# getting customer count by passed status/mode
def getCountOfCustomersByStatus(status):
    global bank_df
    unique_status_fields = bank_df[status].unique()
    max_customers = 0
    maximum_customers_targetted_state = unique_status_fields[0]
    for each_unique_status in  unique_status_fields:
        condition = bank_df[status] == each_unique_status
        customers_count_per_status = len(bank_df.loc[condition])
        if customers_count_per_status > max_customers:
            max_customers = customers_count_per_status
            maximum_customers_targetted_state = each_unique_status
    return {'max_customers': max_customers, 'state_value': maximum_customers_targetted_state}  

In [9]:
# arranging counted values by modes
def getAnalyzedDictionaryForModes(modes):
    dict = {}
    for mode in modes:
        dict[mode] = getCountOfCustomersByStatus(mode)
    return dict        

In [10]:
modes = ['age', 'job', 'marital', 'contact']
analyzed_values = getAnalyzedDictionaryForModes(modes)

# finding max customer count and its mode 
max_customers = 0
mode = ''
mode_status = ''
for each_result_state in analyzed_values.keys():
    if analyzed_values[each_result_state]['max_customers'] > max_customers:
        max_customers = analyzed_values[each_result_state]['max_customers']
        mode = each_result_state
        mode_status = analyzed_values[each_result_state]['state_value']

            
print("More results on following mode of `{mode}`\'s `{mode_status}` is more result and its no of customers are targetted is `{no_customers}`".format(mode=mode, no_customers=max_customers, mode_status=mode_status))
    

More results on following mode of `contact`'s `cellular` is more result and its no of customers are targetted is `2896`


In [11]:
# varify above result by comparing other modes' customer values
analyzed_values

{'age': {'max_customers': 231, 'state_value': 34},
 'job': {'max_customers': 969, 'state_value': 'management'},
 'marital': {'max_customers': 2797, 'state_value': 'married'},
 'contact': {'max_customers': 2896, 'state_value': 'cellular'}}

## 6) How many entrepreures do we have in this list?

In [12]:
condition = bank_df['job'] == 'entrepreneur'
count_of_entrepreneur = len(bank_df.loc[condition])
print("There are {} entrepreneurs targetted.".format(count_of_entrepreneur))

There are 168 entrepreneurs targetted.


## 7) How many customer do we have with negative balance?

In [13]:
condition =  bank_df['balance'] < 0
negative_balanced_count = len(bank_df.loc[condition])
print("There are {} customers, whose balance amount is negative.".format(negative_balanced_count))

There are 366 customers, whose balance amount is negative.


## 8) Prepare a group of data based on education level

In [14]:
unique_education_levels = bank_df['education'].unique()
unique_education_levels

array(['primary', 'secondary', 'tertiary', 'unknown'], dtype=object)

In [15]:
dict = {}
for level in unique_education_levels:
    cond = bank_df['education'] == level
    count = bank_df.loc[cond,'age'].count()
    data = bank_df.loc[cond]
    dict[level] = {
        'count': count,
        'data' : data
    }
dict        

{'primary': {'count': 678,
  'data':       age            job   marital education default  balance housing loan  \
  0      30     unemployed   married   primary      no     1787      no   no   
  9      43       services   married   primary      no      -88     yes  yes   
  18     25    blue-collar    single   primary      no     -221     yes   no   
  26     55    blue-collar   married   primary      no      627     yes   no   
  36     78        retired  divorced   primary      no      229      no   no   
  ...   ...            ...       ...       ...     ...      ...     ...  ...   
  4480   23    blue-collar   married   primary      no     1158     yes   no   
  4485   53    blue-collar   married   primary      no      238     yes   no   
  4486   37    blue-collar   married   primary      no      378     yes   no   
  4499   45    blue-collar  divorced   primary      no      942      no   no   
  4503   60  self-employed   married   primary      no      362      no  yes   
  
  