In [2]:
import tensorflow as tf
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import BatchNormalization, Conv2D, Activation, Dense, GlobalAveragePooling2D, MaxPooling2D, ZeroPadding2D, Add, Input, Flatten

import time
import os
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib.pyplot as plt
import matplotlib.style as style
import seaborn as sns
import cv2
from utils import *

BATCH_SIZE = 256  # Big enough to measure an F1-score
# Adapt preprocessing and prefetching dynamically
AUTOTUNE = tf.data.experimental.AUTOTUNE
SHUFFLE_BUFFER_SIZE = 8192


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
dir_path = './database_test'

file_list = os.listdir(dir_path)
ID = []
Labels = []

for file_name in file_list:
    file_name.encode('ISO-8859-1')
    label_list = file_name.split('+')
    if (label_list[0] == ''):
        continue
    ID.append(label_list[0])
    del label_list[0]
    # .jpg 제거
    label_list.pop()

    valid_label_list = []
    for label in label_list:
        label = label.lower()
        labels = label.split(', ')
        valid_label_list.extend(labels)

    while True:
        try:
            valid_label_list.remove('')
        except ValueError:
            break

    Labels.append(valid_label_list)

artworks = pd.DataFrame({"ID": ID, "Labels": Labels, "File": file_list})
artworks.dropna()

# Get label frequencies in descending order
label_freq = artworks['Labels'].apply(
    lambda s: [x for x in s]).explode().value_counts().sort_values(ascending=False)


In [None]:
# Create a list of rare labels
# rare = list(label_freq[label_freq < 50].index)
rare = list(label_freq[label_freq < 25].index)
print("Number of used labels:", len(label_freq) - len(rare))
print("Number of ignored labels:", len(rare))
label_number = len(label_freq) - len(rare)

artworks['Labels'] = artworks['Labels'].apply(
    lambda s: [x for x in s if x not in rare])
artworks['Labels'] = artworks['Labels'].apply(
    lambda s: s if len(s) != 0 else np.nan)

artworks.dropna(inplace=True)
artworks.head(10)


Number of used labels: 42
Number of ignored labels: 36


Unnamed: 0,ID,Labels,File
1,100,"[still life, naïve art (primitivism)]","100+still life+Naïve Art (Primitivism)+oil, co..."
2,1000,"[religious painting, baroque]","1000+religious painting+Baroque, Tenebrism+oil..."
3,10000,"[genre painting, post-impressionism]","10000+genre painting+Post-Impressionism+oil, w..."
4,100000,"[abstract, abstract art]",100000+abstract+Abstract Art+.jpg
5,100001,"[abstract, abstract art]",100001+abstract+Abstract Art+.jpg
6,100002,"[abstract, abstract art]",100002+abstract+Abstract Art+.jpg
7,100003,"[abstract, abstract art]",100003+abstract+Abstract Art+.jpg
8,100004,"[abstract, abstract art]",100004+abstract+Abstract Art+.jpg
9,100005,"[abstract, abstract art]",100005+abstract+Abstract Art+.jpg
10,100006,"[abstract, abstract art]",100006+abstract+Abstract Art+.jpg


In [None]:
from sklearn.model_selection import train_test_split
# 파일 경로 및 label 집합으로 train data와 valid data parsing
X_train, X_val, y_train, y_val = train_test_split(
    artworks['File'], artworks['Labels'], test_size=0.1, random_state=23)

X_train = [os.path.join('./database_test', str(f)) for f in X_train]
X_val = [os.path.join('./database_test', str(f)) for f in X_val]
X_train[:3]


['./database_test\\101012+landscape+Nanga (Bunjinga)+.jpg',
 './database_test\\100060+abstract+Abstract Expressionism+.jpg',
 './database_test\\101022+landscape+Nanga (Bunjinga)+.jpg']

In [None]:
# MultiLabelBinarizer로 다중 label 이진화
mlb = MultiLabelBinarizer()
mlb.fit(artworks['Labels'])

# Loop over all labels and show them
N_LABELS = len(mlb.classes_)
for (i, label) in enumerate(mlb.classes_):
    print("{}. {}".format(i, label))

y_train_bin = mlb.transform(y_train)
mlb.fit(y_val)
y_val_bin = mlb.transform(y_val)


0. abstract
1. abstract art
2. abstract expressionism
3. animal painting
4. art informel
5. art nouveau (modern)
6. baroque
7. cityscape
8. color field painting
9. conceptual art
10. concretism
11. cubism
12. expressionism
13. figurative
14. flower painting
15. genre painting
16. hard edge painting
17. illustration
18. impressionism
19. ink and wash painting
20. installation
21. landscape
22. lyrical abstraction
23. minimalism
24. nanga (bunjinga)
25. naïve art (primitivism)
26. neo-impressionism
27. northern renaissance
28. nude painting (nu)
29. op art
30. photo
31. pointillism
32. pop art
33. portrait
34. post-impressionism
35. realism
36. regionalism
37. religious painting
38. romanticism
39. still life
40. surrealism
41. symbolic painting


for _ 는 변수없이 반복문 수행

In [10]:
node_df = pd.DataFrame(columns=['class'])
for (i, label) in enumerate(mlb.classes_):
  node_df.loc[i] = label

node_df.reset_index()
edge_df = pd.DataFrame(columns=['class1', 'class2', 'correlation'])

i = 0
for class1 in mlb.classes_:
  for class2 in mlb.classes_:
    edge_df.loc[i] = [class1, class2, 0]
    i+=1

edge_df.reset_index()


Unnamed: 0,index,class1,class2,correlation
0,0,abstract,abstract,0
1,1,abstract,abstract art,0
2,2,abstract,abstract expressionism,0
3,3,abstract,animal painting,0
4,4,abstract,art informel,0
...,...,...,...,...
1759,1759,symbolic painting,religious painting,0
1760,1760,symbolic painting,romanticism,0
1761,1761,symbolic painting,still life,0
1762,1762,symbolic painting,surrealism,0


In [None]:
adj =[[0 for _ in range(N_LABELS)] for _ in range(N_LABELS)]
for i in range(N_LABELS):
  adj[i][i] = len(y_train_bin)

for label_list in y_train_bin:
  valid_label_list = []
  for idx, label in enumerate(label_list):
    if label == 1:
      valid_label_list.append(idx)
  for i in valid_label_list:
    for j in valid_label_list:
      if i != j:
        adj[i][j] += 1


print(adj)


In [None]:
import tensorflow_gnn as tfgnn

graph_tensor = tfgnn.GraphTensor.from_pieces(
    node_sets= {
    
    }
)