# 딥러닝 따라하기2_분류

* 목적 : 무작정 따라하면서 코드 눈과 손으로 익히기

# 1.환경준비

* 라이브러리 Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# 2.Classification : mobile

## (1) 데이터 전처리

### 1) 데이터 준비

In [2]:
path = "https://raw.githubusercontent.com/DA4BAM/dataset/master/mobile_churn_simple.csv"
data = pd.read_csv(path)
data['CHURN'] = data['CHURN'].map({'STAY':0, 'LEAVE':1})
data.head()

Unnamed: 0,INCOME,OVERAGE,LEFTOVER,HOUSE,HANDSET_PRICE,OVER_15MINS_CALLS_PER_MONTH,AVERAGE_CALL_DURATION,CHURN
0,31953,0,6,313378,161,0,4,0
1,36147,0,13,800586,244,0,6,0
2,27273,230,0,305049,201,16,15,0
3,120070,38,33,788235,780,3,2,1
4,29215,208,85,224784,241,21,1,0


|	구분	|	변수 명	|	내용	|	type	|	비고	|
|	----	|	----	|	----	|	----	|	----	|
|	**Target**	|	**CHURN**	|	이탈여부	|	범주	| 0,1	|
|	feature	|	INCOME	|	소득수준(달러)	|	숫자	|		|
|	feature	|	OVERAGE	|	월평균 초과사용시간(분)	|	숫자	| |
|	feature	|	LEFTOVER	|	월평균 잔여시간(%)	|	숫자	| 	|
|	feature	|	HOUSE	|	집가격(달러)	|	숫자	|	|
|	feature	|	HANDSET_PRICE	|	휴대폰가격(달러)	|	숫자	|		|
|	feature	|	OVER_15MINS_CALLS_PER_MONTH	|	월평균 장기통화 횟수	|	숫자	| 		|
|	feature	|	AVERAGE_CALL_DURATION	|	평균통화시간(분)	|	숫자	|		|

In [3]:
target = 'CHURN'
x = data.drop(target, axis=1)
y = data.loc[:, target]

### 2) 가변수화

### 3) 데이터분할

In [4]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

### 4) Scaling

In [5]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## (2) 모델링
* 필요한 함수들 불러오기
* 모델 선언
* 학습
* 예측
* 성능 검증

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from keras.backend import clear_session

### 1) 모델 선언

In [7]:
nfeatures = x_train.shape[1] #num of columns
nfeatures

7

In [8]:
# 메모리 정리
clear_session()

# Sequential 타입 모델 선언
model = Sequential([ Dense(1, input_shape = (nfeatures,), activation = 'sigmoid') ])

# 모델요약
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 8         
                                                                 
Total params: 8
Trainable params: 8
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.compile(optimizer='adam', loss='binary_crossentropy')

### 2) 학습

In [10]:
model.fit(x_train, y_train)



<keras.callbacks.History at 0x7fa9385ce370>

### 3) 예측

In [13]:
pred = model.predict(x_val)
pred = np.where(pred>= 0.5, 1, 0)



### 4) 검증
만든 모델은 얼마나 정확한지 검증해 봅시다.



In [14]:
print(confusion_matrix(y_val, pred))
print('-'*50)
print(classification_report(y_val, pred))

[[1094  944]
 [ 882 1080]]
--------------------------------------------------
              precision    recall  f1-score   support

           0       0.55      0.54      0.55      2038
           1       0.53      0.55      0.54      1962

    accuracy                           0.54      4000
   macro avg       0.54      0.54      0.54      4000
weighted avg       0.54      0.54      0.54      4000



# 3.Classification : 대학원 지원

## (1) 데이터 전처리

### 1) 데이터 준비

In [15]:
path = "https://raw.githubusercontent.com/DA4BAM/dataset/master/Graduate_apply.csv"
data = pd.read_csv(path)
data.head()

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [16]:
target = 'admit'
x = data.drop(target, axis=1)
y = data.loc[:, target]

### 2) 가변수화

In [17]:
cat_cols = ['rank']
x = pd.get_dummies(x, columns = cat_cols, drop_first = True)

### 3) 데이터분할

In [18]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

### 4) Scaling

In [19]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## (2) 모델링
* 필요한 함수들 불러오기
* 모델 선언
* 학습
* 예측
* 성능 검증

In [20]:
from keras.models import Sequential
from keras.layers import Dense
from keras.backend import clear_session

### 1) 모델 선언

In [21]:
nfeatures = x_train.shape[1] #num of columns
nfeatures

5

In [22]:
# 메모리 정리
clear_session()

# Sequential 타입 모델 선언
model = Sequential([ Dense(1, input_shape = (nfeatures,), activation = 'sigmoid') ])

# 모델요약
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 6         
                                                                 
Total params: 6
Trainable params: 6
Non-trainable params: 0
_________________________________________________________________


In [23]:
model.compile(optimizer='adam', loss='binary_crossentropy')

### 2) 학습

In [24]:
model.fit(x_train, y_train)



<keras.callbacks.History at 0x7fa9c5bddd00>

### 3) 예측

In [25]:
pred = model.predict(x_val)
pred = np.where(pred>= 0.5, 1, 0)



### 4) 검증
만든 모델은 얼마나 정확한지 검증해 봅시다.



In [26]:
print(confusion_matrix(y_val, pred))
print('-'*50)
print(classification_report(y_val, pred))

[[36 19]
 [23  2]]
--------------------------------------------------
              precision    recall  f1-score   support

           0       0.61      0.65      0.63        55
           1       0.10      0.08      0.09        25

    accuracy                           0.48        80
   macro avg       0.35      0.37      0.36        80
weighted avg       0.45      0.47      0.46        80

