<a href="https://colab.research.google.com/github/Stacy067/NLP-and-Image-Classification-for-Behavioral-Finance/blob/main/Photo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 이미지 파일 다운로드

### 부정 이미지

In [None]:
import pandas as pd 
import urllib.request
from urllib.request import Request, urlopen   
import urllib.parse
from urllib.parse import quote

In [None]:
df = pd.read_csv("C:/Users/admin/RNN/text_n.csv", encoding='cp949')

FileNotFoundError: ignored

In [None]:
range(len(df['img_url']))

In [None]:
for m in range(0, 95362): 
    date_img = str(int(df.iloc[m].loc['date']))
    img_link = df.iloc[m].loc['img_url']
    urllib.request.urlretrieve(img_link,"D:/Dropbox/image/negative/" + date_img + "_n_"+ str(m) +".jpg") # download images in address folder

### 긍정 이미지

In [None]:
import pandas as pd 
df = pd.read_csv("C:/Users/admin/RNN/text_p.csv", encoding='cp949')

In [None]:
range(len(df['img_url']))

In [None]:
for m in range(0, 95362): 
    date_img = str(int(df.iloc[m].loc['date']))
    img_link = df.iloc[m].loc['img_url']
    urllib.request.urlretrieve(img_link,"D:/Dropbox/image/negative/" + date_img + "_n_"+ str(m) +".jpg") # download images in address folder

## 모델 전처리

In [None]:
import urllib.request
import zipfile
import numpy as np
from IPython.display import Image

import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications import VGG16

In [None]:
# training dir
TRAINING_DIR = "/content/drive/MyDrive/Colab/BehavioralFinance/sent/"

In [None]:
training_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest', 
    validation_split=0.2
    )
# 증명 사진의 경우 여기에 있는 값 줄임, 일반 사진은 숫자를 넓게, data에 따라 달라짐

In [None]:
training_generator = training_datagen.flow_from_directory(TRAINING_DIR, 
                                                          batch_size=32, 
                                                          target_size=(150, 150), 
                                                          class_mode='binary', 
                                                          subset='training',
                                                         )

Found 27301 images belonging to 2 classes.


In [None]:
validation_generator = training_datagen.flow_from_directory(TRAINING_DIR, 
                                                          batch_size=32, 
                                                          target_size=(150, 150), 
                                                          class_mode='binary',
                                                          subset='validation', 
                                                         )

Found 6825 images belonging to 2 classes.


## CNN

In [None]:
model = Sequential([
    # Conv2D, MaxPooling2D 조합으로 층을 쌓습니다. 첫번째 입력층의 input_shape은 (150, 150, 3)으로 지정합니다.
    Conv2D(64, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2), 
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2), 
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2), 
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2), 
    # 2D -> 1D로 변환을 위하여 Flatten 합니다.
    Flatten(), 
    # 과적합 방지를 위하여 Dropout을 적용합니다.
    Dropout(0.5),
    Dense(512, activation='relu'),
    # Classification을 위한 Softmax 
    # 출력층의 갯수는 클래스의 갯수와 동일하게 맞춰줍니다 (3개), activation도 잊지마세요!
    Dense(1, activation='sigmoid'),
])

In [None]:
model.summary() # parameter는 연산량

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 4, 4, 512)         14714688  
                                                                 
 flatten_3 (Flatten)         (None, 8192)              0         
                                                                 
 dropout_3 (Dropout)         (None, 8192)              0         
                                                                 
 dense_8 (Dense)             (None, 512)               4194816   
                                                                 
 dense_9 (Dense)             (None, 128)               65664     
                                                                 
 dense_10 (Dense)            (None, 1)                 129       
                                                                 
Total params: 18,975,297
Trainable params: 4,260,609
N

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [None]:
checkpoint_path = "tmp_checkpoint.ckpt"
checkpoint = ModelCheckpoint(filepath=checkpoint_path, 
                             save_weights_only=True, 
                             save_best_only=True, 
                             monitor='val_loss', 
                             verbose=1)

In [None]:
epochs=25

In [None]:
model.load_weights(checkpoint_path)

In [None]:
plt.figure(figsize=(12, 9))
plt.plot(np.arange(1, epochs+1), history.history['acc'])
plt.plot(np.arange(1, epochs+1), history.history['loss'])
plt.title('Acc / Loss', fontsize=20)
plt.xlabel('Epochs')
plt.ylabel('Acc / Loss')
plt.legend(['acc', 'loss'], fontsize=15)
plt.show()

## 전이학습

In [None]:
transfer_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3)) 
#imagenet의 학습 분류기를 가져와라, include_top VGG 테이블 아래부분 안가져오겠다, 여기 부분에서 전이학습
transfer_model.trainable=False # 가중치를 freeze

In [None]:
model = Sequential([
    transfer_model, #전이학습 들어갈때
    Flatten(),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid'),
])

In [None]:
model.summary() # parameter는 연산량

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 4, 4, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 8192)              0         
                                                                 
 dropout (Dropout)           (None, 8192)              0         
                                                                 
 dense (Dense)               (None, 32)                262176    
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 14,979,041
Trainable params: 264,353
Non-t

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [None]:
epochs=25

In [None]:
checkpoint_path = "tmp_checkpoint.ckpt"
checkpoint = ModelCheckpoint(filepath=checkpoint_path, 
                             save_weights_only=True, 
                             save_best_only=True, 
                             monitor='val_loss', 
                             verbose=1)

In [None]:
history = model.fit(training_generator, 
                    validation_data=(validation_generator),
                    epochs=epochs,
                    callbacks=[checkpoint],
                    )

Epoch 1/25
Epoch 1: val_loss improved from inf to 0.67603, saving model to tmp_checkpoint.ckpt
Epoch 2/25
Epoch 2: val_loss improved from 0.67603 to 0.67040, saving model to tmp_checkpoint.ckpt
Epoch 3/25
Epoch 3: val_loss did not improve from 0.67040
Epoch 4/25
Epoch 4: val_loss did not improve from 0.67040
Epoch 5/25
Epoch 5: val_loss did not improve from 0.67040
Epoch 6/25
Epoch 6: val_loss did not improve from 0.67040
Epoch 7/25
Epoch 7: val_loss improved from 0.67040 to 0.66496, saving model to tmp_checkpoint.ckpt
Epoch 8/25
Epoch 8: val_loss did not improve from 0.66496
Epoch 9/25
Epoch 9: val_loss did not improve from 0.66496
Epoch 10/25
Epoch 10: val_loss did not improve from 0.66496
Epoch 11/25
Epoch 11: val_loss did not improve from 0.66496
Epoch 12/25
Epoch 12: val_loss improved from 0.66496 to 0.66482, saving model to tmp_checkpoint.ckpt
Epoch 13/25
Epoch 13: val_loss did not improve from 0.66482
Epoch 14/25

In [None]:
model.load_weights(checkpoint_path)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 9))
plt.plot(np.arange(1, epochs+1), history.history['acc'])
plt.plot(np.arange(1, epochs+1), history.history['loss'])
plt.title('Acc / Loss', fontsize=20)
plt.xlabel('Epochs')
plt.ylabel('Acc / Loss')
plt.legend(['acc', 'loss'], fontsize=15)
plt.show()