In [2]:
library(tidyverse)

Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang
Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.1.1       v purrr   0.3.2  
v tibble  2.1.1       v dplyr   0.8.0.1
v tidyr   0.8.3       v stringr 1.4.0  
v readr   1.3.1       v forcats 0.4.0  
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()


In [3]:
getwd()

In [4]:
setwd("Dados")

In [5]:
bank_data <- read.table("bank-full.csv",
                       header = TRUE, sep = ";")

In [6]:
head(bank_data)

dim(bank_data)

age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
35,management,married,tertiary,no,231,yes,no,unknown,5,may,139,1,-1,0,unknown,no


In [7]:
#Salvando este head

head <- head(bank_data)

In [8]:
# Fazendo o mesmo passo anterior com tidyverse

head <- read.table("bank-full.csv", header = TRUE, sep = ";") %>% head()

head

age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
35,management,married,tertiary,no,231,yes,no,unknown,5,may,139,1,-1,0,unknown,no


In [10]:
# Complicando

# Selecionando apenas uma parte do dataset

bank <- bank_data %>% select(default, loan, y)

head(bank)

default,loan,y
no,no,no
no,no,no
no,yes,no
no,no,no
no,no,no
no,no,no


In [11]:
# Selecionando colunas por intervalos

bank <- bank_data %>% select(default:contact)

head(bank)

default,balance,housing,loan,contact
no,2143,yes,no,unknown
no,29,yes,no,unknown
no,2,yes,yes,unknown
no,1506,yes,no,unknown
no,1,no,no,unknown
no,231,yes,no,unknown


In [12]:
#Removendo colunas

bank <- bank %>% select(-contact)

head(bank)

default,balance,housing,loan
no,2143,yes,no
no,29,yes,no
no,2,yes,yes
no,1506,yes,no
no,1,no,no
no,231,yes,no


In [13]:
# Reordenando colunas - basta fazer o selct na ordem que voce quer

names(bank_data)

bank <- bank_data %>% select(loan, housing, default, previous, y)

head(bank)

loan,housing,default,previous,y
no,yes,no,0,no
no,yes,no,0,no
yes,yes,no,0,no
no,yes,no,0,no
no,no,no,0,no
no,yes,no,0,no


In [14]:
# Se eu quiser escolhrer uma coluna para ser a primeira, e todo o resto vem depois - util na modelagem 

bank_data <- bank_data %>% select(y, everything())

head(bank_data)

y,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
no,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown
no,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown
no,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown
no,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown
no,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown
no,35,management,married,tertiary,no,231,yes,no,unknown,5,may,139,1,-1,0,unknown


In [15]:
# Para adiconar colunas - criar variáveis!

bank_data <- bank_data %>% mutate(
    Married_House = ifelse(
    (marital == "married" & housing == "yes")
    , "yes", "no")
) %>% select(everything(), Married_House)

head(bank_data)

y,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,Married_House
no,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,yes
no,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
no,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,yes
no,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,yes
no,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
no,35,management,married,tertiary,no,231,yes,no,unknown,5,may,139,1,-1,0,unknown,yes


In [16]:
# Para adicionar muitas colunas ao emsmo tempo - útil para a criação de variáveis em modelos

bank_data <- bank_data %>% mutate(
    balance_flag = ifelse(balance >= 1000, "1", "0")
    , default_n_loan = ifelse( (default == "yes" & loan == "yes") , "yes", "no"))

head(bank_data)

y,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,Married_House,balance_flag,default_n_loan
no,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,yes,1,no
no,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no,0,no
no,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,yes,0,no
no,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,yes,1,no
no,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no,0,no
no,35,management,married,tertiary,no,231,yes,no,unknown,5,may,139,1,-1,0,unknown,yes,0,no


In [18]:
summary(bank$balance)

Length  Class   Mode 
     0   NULL   NULL 

In [20]:
# Multiplas alteracoes de uma vez

bank_data <- bank_data %>% mutate(
    neg_balance = ifelse(balance < 0, 1, 0)
    ) %>% rename(
    balance_1k = "balance_flag"
    , default_loan = "default_n_loan"
    )

head(bank_data)

y,age,job,marital,education,default,balance,housing,loan,contact,...,month,duration,campaign,pdays,previous,poutcome,Married_House,balance_1k,default_loan,neg_balance
no,58,management,married,tertiary,no,2143,yes,no,unknown,...,may,261,1,-1,0,unknown,yes,1,no,0
no,44,technician,single,secondary,no,29,yes,no,unknown,...,may,151,1,-1,0,unknown,no,0,no,0
no,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,...,may,76,1,-1,0,unknown,yes,0,no,0
no,47,blue-collar,married,unknown,no,1506,yes,no,unknown,...,may,92,1,-1,0,unknown,yes,1,no,0
no,33,unknown,single,unknown,no,1,no,no,unknown,...,may,198,1,-1,0,unknown,no,0,no,0
no,35,management,married,tertiary,no,231,yes,no,unknown,...,may,139,1,-1,0,unknown,yes,0,no,0


In [21]:
 # Filtrando dados - selecionando apenas os clientes em casados e com casa

bank2 <- bank_data %>% filter(Married_House == "yes") %>% select(-marital, -housing, -Married_House)

head(bank2)

y,age,job,education,default,balance,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,balance_1k,default_loan,neg_balance
no,58,management,tertiary,no,2143,no,unknown,5,may,261,1,-1,0,unknown,1,no,0
no,33,entrepreneur,secondary,no,2,yes,unknown,5,may,76,1,-1,0,unknown,0,no,0
no,47,blue-collar,unknown,no,1506,no,unknown,5,may,92,1,-1,0,unknown,1,no,0
no,35,management,tertiary,no,231,no,unknown,5,may,139,1,-1,0,unknown,0,no,0
no,58,retired,primary,no,121,no,unknown,5,may,50,1,-1,0,unknown,0,no,0
no,53,technician,secondary,no,6,no,unknown,5,may,517,1,-1,0,unknown,0,no,0
