# 유전자 분석 암진단

유방암(TCGA_BRCA)  
대장암(TCGA_COAD)  
폐선암(TCGA_LUAD)  
갑상선암(TCGA_THCA)

In [None]:
from tensorflow import keras
import numpy as np
import random
import os
from matplotlib import pyplot as plt

In [None]:
class DataReader():
  def __init__(self):
    self.label=[]
    self.train_X=[]
    self.train_Y=[]
    self.test_X=[]
    self.test_Y=[]
    self.read_data()

  def read_data(self):
    files=['TCGA_BRCA.csv','TCGA_COAD.csv','TCGA_LUAD.csv','TCGA_THCA.csv']
    data=[]

    for i, file in enumerate(files):
      book=np.loadtxt(file,delimiter=',',dtype=np.float32).transpose()   #행열 전환
      self.label.append(file[:-4])

      for el in book:
        data.append((el/np.max(el),i))

    random.shuffle(data)

    for i, el in enumerate(data):
      if i<0.8*len(data):
        self.train_X.append(el[0])
        self.train_Y.append(el[1])
      else:
        self.test_X.append(el[0])
        self.test_Y.append(el[1])

    self.train_X=np.asarray(self.train_X)
    self.train_Y=np.asarray(self.train_Y)
    self.test_X=np.asarray(self.test_X)
    self.test_Y=np.asarray(self.test_Y)

    print("\n\nData Read Done")
    print("Training X Size: " + str(self.train_X.shape))
    print("Training Y Size: " + str(self.train_Y.shape))
    print("Test X Size: " +str(self.test_X.shape))
    print("Test Y Size: " +str(self.test_Y.shape)+ '\n\n')

In [None]:
dr=DataReader()



Data Read Done
Training X Size: (2324, 20000)
Training Y Size: (2324,)
Test X Size: (581, 20000)
Test Y Size: (581,)




In [None]:
dr.test_X

array([[7.6610915e-04, 3.0860388e-06, 2.9857425e-04, ..., 4.1147182e-06,
        1.9107723e-04, 5.5857300e-04],
       [2.8894199e-02, 3.2665901e-04, 1.2854637e-02, ..., 4.2344687e-05,
        1.9327324e-02, 8.0091944e-03],
       [3.1922951e-03, 3.0436629e-06, 1.2666710e-03, ..., 7.7613404e-05,
        3.8553062e-05, 1.2027541e-03],
       ...,
       [3.3946824e-03, 1.3234629e-06, 1.1236201e-03, ..., 9.3304137e-05,
        4.6387376e-04, 1.3717694e-03],
       [6.8003978e-03, 4.0770200e-04, 3.9125085e-03, ..., 1.1980716e-04,
        1.4484150e-03, 4.2755064e-03],
       [2.5496429e-03, 3.3289501e-07, 7.8197039e-04, ..., 2.5300022e-05,
        2.2969756e-04, 1.0283127e-03]], dtype=float32)

In [None]:
model=keras.Sequential([
    keras.layers.Dense(20000),
    keras.layers.Dense(2048, activation='relu'),
    keras.layers.Dropout(rate=0.5),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(rate=0.5),
    keras.layers.Dense(4, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [7]:
print("\n\n***************TRAINING START****************")
early_stop=keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history=model.fit(dr.train_X, dr.train_Y, epochs=20, validation_data=(dr.test_X, dr.test_Y), callbacks=[early_stop])



***************TRAINING START****************
Epoch 1/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m652s[0m 8s/step - accuracy: 0.7456 - loss: 1.3310 - val_accuracy: 0.9725 - val_loss: 0.2580
Epoch 2/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m618s[0m 8s/step - accuracy: 0.9864 - loss: 0.0847 - val_accuracy: 0.9587 - val_loss: 0.6926
Epoch 3/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m617s[0m 8s/step - accuracy: 0.9869 - loss: 0.1797 - val_accuracy: 0.9088 - val_loss: 0.7094
Epoch 4/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m623s[0m 8s/step - accuracy: 0.9703 - loss: 0.7519 - val_accuracy: 0.9776 - val_loss: 0.9585
Epoch 5/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m606s[0m 8s/step - accuracy: 0.9849 - loss: 0.4558 - val_accuracy: 0.9931 - val_loss: 0.1731
Epoch 6/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m628s[0m 8s/step - accuracy: 0.9909 - loss: 0.4571 - val_accuracy: 0.9931 - val