In [1]:
#%pip install autotime
#%load_ext autotime

In [2]:
import tensorflow as tf

# Print list of available devices
devices = tf.config.list_physical_devices()
print("Available devices:", devices)

# Check if GPU is available
if tf.config.list_physical_devices('GPU'):
    print("GPU is available and will be used.")
else:
    print("GPU is not available. The code will run on CPU.")

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
GPU is not available. The code will run on CPU.


In [3]:
import os,re,sys,pickle
import pandas as pd
import tensorflow.keras.backend as K
from gensim.corpora import Dictionary
from keras.preprocessing.sequence import pad_sequences
from hazm import Normalizer, POSTagger, word_tokenize, Lemmatizer, stopwords_list#from konlpy.tag import Twitter,Okt
from parsivar import FindStems
import numpy as np
from keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D,Reshape, Flatten, Dropout, Concatenate
from keras.models import Model
from keras.layers import BatchNormalization #from keras.layers.normalization import BatchNormalization
from keras import regularizers


In [4]:
# Set Directories 

pardir = os.path.dirname(os.getcwd())
PATH = os.path.join(pardir,"data\\caption_all_fa\\")
os.chdir(PATH)
SAVED = os.path.join(pardir,"data\\saved\\")

In [5]:
# Prepare for data

def load_doc(filename):
	file = open(file=filename, mode='r', encoding="utf-8")
	text = file.read()
	file.close()
	return text

# Map filename to image, text, label for train,evaluation and test
def load_clean_descriptions(filename, dataset):
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
		tokens = line.split()
		image_id, image_desc = tokens[0], tokens[1:]
		if image_id in dataset:
			if image_id not in descriptions:
				descriptions[image_id] = list()
			desc = ' '.join(image_desc)
			descriptions[image_id].append(desc)
	return descriptions

# Since we are to freeze the VGG16, we have
# saved corresponding features for our data
# This function is to load those features
def load_photo_features(filename, dataset):
	all_features = pickle.load(open(filename, 'rb'))
	features = {k: all_features[k] for k in dataset}
	return features

def load_clean_class(filename, dataset):
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
		tokens = line.split()
		image_id, image_desc = tokens[0], tokens[1]
		if image_desc in dataset:
			if image_desc not in descriptions:
				descriptions[image_desc] = list()
			descriptions[image_desc].append(image_id)
	return descriptions

# OneHot
def load_class_dummy(filename, dataset):
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
		tokens = line.split()
		image_id, image_desc = tokens[0], tokens[1]
		if image_desc in dataset:
			if image_desc not in descriptions:
				descriptions[image_desc] = list()
			image_id=dummies_dict[image_id]
			descriptions[image_desc].append(image_id)
	return descriptions

#Class to one_hot_vector dictionary 
folder_names = []

for entry_name in os.listdir(PATH):
    entry_path = os.path.join(PATH, entry_name)
    if os.path.isdir(entry_path):
        folder_names.append(entry_name)
folder_names =sorted(folder_names)

dummies = pd.get_dummies(folder_names)
dummies_list=dummies.values.tolist()
dummies_dict=dict(zip(folder_names,dummies_list))

In [6]:
#Image, text, label dictionary for train 
filename = SAVED+'/train_image.txt'
train=[]
with open(file=filename, encoding='utf-8', mode='r') as f:
    for line in f.read().splitlines():
        train=train+[line.split(',')[0][:-4]]

train_descriptions = load_clean_descriptions(SAVED+'flower_text_tagged_fa.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))

with open(SAVED+"train_image_features.pkl","rb") as f:
    train_features = pickle.load(f)

print('Photos: train=%d' % len(train_features))

train_class = load_clean_class(SAVED+'flower_class.txt', train)
print('Descriptions: train=%d' % len(train_class))

class_dummy=load_class_dummy(SAVED+'flower_class.txt', train)

#Image, text, label dictionary for EVAL
filename = SAVED+'/val_image.txt'
val=[]
with open(filename, encoding='utf-8',mode='r') as f:
    for line in f.read().splitlines():
        val=val+[line.split(',')[0][:-4]]

val_descriptions = load_clean_descriptions(SAVED+'flower_text_tagged_fa.txt', val)
print('Descriptions: val=%d' % len(val_descriptions))

with open(SAVED+"eval_image_features.pkl","rb") as f:
    val_features =pickle.load(f)

print('Photos: val=%d' % len(val_features))

val_class = load_clean_class(SAVED+'flower_class.txt', val)
print('Descriptions: val=%d' % len(val_class))
val_class_dummy=load_class_dummy(SAVED+'flower_class.txt', val)


#Image, text, label dictionary for Test 
filename = SAVED+'/test_image.txt'
test=[]
with open(file=filename, encoding='utf-8',mode='r') as f:
    for line in f.read().splitlines():
        test=test+[line.split(',')[0][:-4]]


test_descriptions = load_clean_descriptions(SAVED+'flower_text_tagged_fa.txt', test)
print('Descriptions: test=%d' % len(test_descriptions))

with open(SAVED+"test_image_features.pkl","rb") as f:
    test_features =pickle.load(f)

print('Photos: test=%d' % len(test_features))

test_class = load_clean_class(SAVED+'flower_class.txt', test)
print('Descriptions: test=%d' % len(test_class))
test_class_dummy=load_class_dummy(SAVED+'flower_class.txt', test)

Descriptions: train=5246
Photos: train=5246
Descriptions: train=5246
Descriptions: val=1310
Photos: val=1310
Descriptions: val=1310
Descriptions: test=1633
Photos: test=1633
Descriptions: test=1633


In [7]:
# Load data

punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*؟%؛،ًًًََُّ»«_~'''
def punc(myStr):
  no_punct = ""
  for char in myStr:
    if char not in punctuations:
        no_punct = no_punct + char
  return no_punct

# Load morpheme dictionary for flowers

dictionary2=Dictionary.load(SAVED+"dictionary/flower_dictionary2_fa_lemAndStem")
#flower_dictionary2_fa_lemOnlyWithStopWord
#flower_dictionary2_fa_lemAndStemWithStopWords
#flower_dictionary2_fa_lemAndStem
embedding_path = SAVED + 'skipgram/skigram1_fa_new_200_lemStem.txt'
EMBEDDING_DIM=200
#1 skigram1_fa_new_300_lemStem
#2 skigram1_fa_new_200_lemStem
#3 skigram1_fa_new_100_lemStem
#4 skigram1_fa_new_300_lemOnlyWithStopWords


MAX_SEQUENCE_LENGTH=300
c=str()

normalizer = Normalizer(correct_spacing=True)
lemmatizer = Lemmatizer(joined_verb_parts=False)
tagger = POSTagger(model=SAVED+'pos_tagger.model')
stop_words = stopwords_list()
my_stemmer = FindStems()

def create_phrase(train_features, train_descriptions, class_dummy,MAX_SEQUENCE_LENGTH):

    X_image, X_text, y_class = list(), list(), list()
    #tweet=Okt()

    for key, desc_list in train_descriptions.items():
        c=list()
        for desc in desc_list:

            seq = punc(desc)
            normalized_text = normalizer.normalize(seq)
            tokens = word_tokenize(normalized_text)

            newTokens = []

            tokenAndTag = tagger.tag(tokens)
            newTokenAndTag = []
            for TAT in tokenAndTag:
              newTokenAndTag.append([TAT[0], TAT[1].replace('PRON', 'PRO').replace('ADJ', 'AJ').replace('VERB', 'V').split(',')[0]])
            tokenAndTag = newTokenAndTag

            j = 0
            for token in tokens :
              for lemToken in lemmatizer.lemmatize(word=token, pos=tokenAndTag[j][1]).split('#') :
                  if lemToken != '' :#if lemToken not in stop_words and lemToken != ''  :
                    newTokens.append(lemToken)
              for stemToken in my_stemmer.convert_to_stem(token).split('&') :
                  if  stemToken != ''  :#if stemToken not in stop_words and stemToken != ''  :
                    newTokens.append(stemToken)
              j += 1
            tokens = newTokens
            tokens = [token for token in tokens if token != '']

            b = tagger.tag(tokens)

            aaa=[]
            for y in b:
                aaa.append(y[0])

            c+=aaa
        d=dictionary2.doc2idx(c)
        X_text.append(np.array(d))
        if key in class_dummy:
            y_class.append(np.array(class_dummy[key][0]))
        X_image.append(train_features[key][0])

    X_text=pad_sequences(X_text, maxlen=MAX_SEQUENCE_LENGTH)
    return np.array(X_text),np.array(X_image),np.array(y_class)

In [8]:
#Import images and text into training, validation, and test sets
X_text, X_image, y_class =create_phrase(train_features, train_descriptions, class_dummy,MAX_SEQUENCE_LENGTH)
X_text_val, X_image_val, y_class_val =create_phrase(val_features, val_descriptions, val_class_dummy,MAX_SEQUENCE_LENGTH)
X_text_test, X_image_test, y_class_test =create_phrase(test_features, test_descriptions, test_class_dummy,MAX_SEQUENCE_LENGTH)


In [9]:
# Load pre-trained embedding matrix
word_index=dictionary2.token2id #{'개': 0, '겨울': 1, '과': 2, '과도': 3, '그것': 4, '기': 5, '깊다': 6, '꽃축': 7,
                                #'سگ': 0، 'زمستان': 1، 'میوه': 2، 'انتقال': 3، 'آن': 4، 'ki': 5، 'عمیق': 6، 'محور گل': 7،


embeddings_index = dict()
f = open(file = embedding_path, encoding= "utf-8")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))

embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector


Found 4009 word vectors.


In [10]:
# Model definition

def Modified_m_CNN(DROP_OUT,DROP_OUT2,LAMBDA):
	inputs1 = Input(shape=(4096,))
	x= Reshape((16,1,256))(inputs1)
	x=BatchNormalization()(x)
	conv_x=Conv2D(256, kernel_size=(14,1), padding='valid', kernel_initializer='he_normal', activation='relu')(x)

	conv_x = Dropout(DROP_OUT2)(conv_x)
	Max_x = MaxPool2D(pool_size=(2,1))(conv_x)
	inputs2 = Input(shape=(MAX_SEQUENCE_LENGTH,))
	y = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=True) (inputs2)
	reshape = Reshape((MAX_SEQUENCE_LENGTH,EMBEDDING_DIM,1))(y)
	conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], EMBEDDING_DIM), padding='valid', kernel_initializer='he_normal', activation='relu')(reshape)
	conv_0=BatchNormalization()(conv_0)
	conv_0 = Dropout(DROP_OUT2)(conv_0)
	conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], EMBEDDING_DIM), padding='valid', kernel_initializer='he_normal', activation='relu')(reshape)
	conv_1 =BatchNormalization()(conv_1 )
	conv_1 = Dropout(DROP_OUT2)(conv_1)
	conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], EMBEDDING_DIM), padding='valid', kernel_initializer='he_normal', activation='relu')(reshape)
	conv_2 =BatchNormalization()(conv_2 )
	conv_2 = Dropout(DROP_OUT2)(conv_2)
	maxpool_0 = MaxPool2D(pool_size=(MAX_SEQUENCE_LENGTH - filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')(conv_0)
	maxpool_1 = MaxPool2D(pool_size=(MAX_SEQUENCE_LENGTH - filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')(conv_1)
	maxpool_2 = MaxPool2D(pool_size=(MAX_SEQUENCE_LENGTH - filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')(conv_2)
	concat1 = Concatenate(axis=1)([maxpool_0,Max_x])
	concat2 = Concatenate(axis=1)([maxpool_1,Max_x])
	concat3 = Concatenate(axis=1)([maxpool_2,Max_x])
	concat4 = Concatenate(axis=1)([concat1, concat2 ,concat3 ])
	a=Conv2D(512, kernel_size=(5,1), padding='valid', kernel_initializer='he_normal', activation='relu')(concat4)
	a=BatchNormalization()(a)
	a=Dropout(DROP_OUT2)(a)
	a=MaxPool2D(pool_size=(2,1))(a)
	a = Flatten()(a)
	z = Dropout(DROP_OUT)(a)
	output = Dense(units=102, activation='softmax',kernel_regularizer=regularizers.l2(LAMBDA), kernel_initializer='he_normal')(z)
	model = Model(inputs=[inputs1, inputs2], outputs=output)
	#print(model.summary())
	return model

In [11]:
BATCH_SIZE = 128
EPOCHS = 40
LAMBDA=0.05
DROP_OUT=0.2
DROP_OUT2=0.4
filter_sizes = [2,3,4]
num_filters = 256

#import tensorflow as tf

# Configure GPU options
#gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.165)
#sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))

accuracies = []

repeatNum = 10

for i in range(repeatNum):
    print(f"Iteration {i+1}")

    # Define and compile the model
    model = Modified_m_CNN(DROP_OUT, DROP_OUT2, LAMBDA)
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

    # Fit the model
    model.fit([X_image, X_text], y_class, epochs=EPOCHS,
                        batch_size=BATCH_SIZE, validation_data=([X_image_val, X_text_val], y_class_val), shuffle=True)

    # Evaluate the model
    score = model.evaluate([X_image_test, X_text_test], y_class_test, verbose=1)
    accuracy = score[1] * 100
    accuracies.append(accuracy)
    print("%s: %.2f%%" % (model.metrics_names[1], accuracy))

# Calculate mean and variance of accuracies
mean_accuracy = np.mean(accuracies)
variance_accuracy = np.var(accuracies)

# Print results
formatted_accuracies = [f"{acc:.2f}%" for acc in accuracies]
print(f"Mean Accuracy: {mean_accuracy:.2f}%")
print(f"Variance of Accuracy: {variance_accuracy:.2f}")
print(f"Accuracies: {formatted_accuracies}")

Iteration 1




Epoch 1/40
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m804s[0m 19s/step - accuracy: 0.0693 - loss: 13.9962 - val_accuracy: 0.1206 - val_loss: 9.3242
Epoch 2/40
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m893s[0m 22s/step - accuracy: 0.4155 - loss: 7.0941 - val_accuracy: 0.3359 - val_loss: 6.3626
Epoch 3/40
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m895s[0m 22s/step - accuracy: 0.6307 - loss: 4.1313 - val_accuracy: 0.4489 - val_loss: 5.0641
Epoch 4/40
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m829s[0m 20s/step - accuracy: 0.7645 - loss: 2.7376 - val_accuracy: 0.5137 - val_loss: 4.3970
Epoch 5/40
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m890s[0m 22s/step - accuracy: 0.8406 - loss: 2.0318 - val_accuracy: 0.5092 - val_loss: 4.0370
Epoch 6/40
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m799s[0m 19s/step - accuracy: 0.8750 - loss: 1.6424 - val_accuracy: 0.5679 - val_loss: 3.7775
Epoch 7/40
[1m 4/41[0m [

KeyboardInterrupt: 

In [None]:
#1 skigram1_fa_new_300_lemStem
#[94.18, 94.0, 93.94, 93.88, 93.57, 93.63, 93.57, 93.26, 93.33, 92.96]#Mean Accuracy: 93.63%
#Mean Accuracy: 93.63%
#Variance of Accuracy: 0.13%
#time: 34min 40s (started: 2024-07-18 14:43:38 +00:00)

#2 skigram1_fa_new_200_lemStem
#Mean Accuracy: 92.95%
#Variance of Accuracy: 0.13%
#[92.28, 92.47, 92.84, 92.65, 93.33, 93.2, 93.08, 93.33, 92.96, 93.33]
#time: 26min 12s (started: 2024-07-18 15:30:10 +00:00)


#3

#4 skigram1_fa_new_300_lemOnlyWithStopWords