In [11]:
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io as io
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Conv2D,MaxPooling2D,Flatten
from tensorflow.keras.callbacks import TensorBoard,ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold

In [12]:
#Processing of the label set data into the form used in the model
def labelprocessing(label_trainpath,label_testpath,glass_label_path):
    
    raw_data_train=pd.read_csv(label_trainpath,names=["mixed_information_train"])
    data_unmixed_train = raw_data_train["mixed_information_train"].str.split("\t",expand=True)
    data_unmixed_train.columns=['index','eye_color','face_shape','file_name']
    data_unmixed_train.drop(columns=['index','face_shape','file_name'],inplace=True)
    data_unmixed_train.drop(index=0,inplace=True)
    
    raw_data_test=pd.read_csv(label_testpath,names=["mixed_information_test"])
    data_unmixed_test = raw_data_test["mixed_information_test"].str.split("\t",expand=True)
    data_unmixed_test.columns=['index','eye_color','face_shape','file_name']
    data_unmixed_test.drop(columns=['index','face_shape','file_name'],inplace=True)
    data_unmixed_test.drop(index=0,inplace=True)

    raw_data_newlabel=pd.read_csv(glass_label_path,names=["sunglass"])
    #raw_data_newlabel.head()
    
    
    YTrain=data_unmixed_train
    YTrain_newlabel=raw_data_newlabel
    YTest=data_unmixed_test
    YTrain=YTrain.values
    YTrain_newlabel=YTrain_newlabel.values
    YTest=YTest.values
    YTrain=np.array([int(y) for y in YTrain])
    YTrain_newlabel=np.array([int(y) for y in YTrain_newlabel])
    YTest=np.array([int(y) for y in YTest])
    YTrain=YTrain.reshape(len(YTrain),1)
    YTrain_newlabel=YTrain_newlabel.reshape(len(YTrain_newlabel),1)
    YTest=YTest.reshape(len(YTest),1)
    
    return YTrain,YTest,YTrain_newlabel

In [13]:
#Pre-processing of the input data, splitting the training set into labeled data and data awaiting labeling, in preparation for semi-supervised learning
def datapreprocessing(data_train_path,data_test_path):
    XTrain=[]
    XTrain_newlabel_raw=[]
    XTrain_newlabel=[]
    XTrain_predict=[]
    XTest_temp=[]
    XTest=[]
    coll_train = io.ImageCollection(data_train_path)
    coll_test = io.ImageCollection(data_test_path)
    for img in coll_train:
        intern_train=cv2.resize(img, (120,120))
        XTrain.append(intern_train)
    XTrain=np.array(XTrain)/255
    for img in coll_test:
        intern_test=cv2.resize(img, (120,120))
        XTest_temp.append(intern_test)
    XTest_temp=np.array(XTest_temp)/255
    XTrain_newlabel_raw=XTrain[:500,:,:,:]
    #XTrain_newlabel is the first 500 training data containing labels
    #XTrain_predict is the data waiting to be given a label
    for (i,value) in enumerate(XTrain_newlabel_raw):
        XTrain_newlabel.append(value.flatten())
    for (i,value) in enumerate(XTrain):
        XTrain_predict.append(value.flatten()) 
    for (i,value) in enumerate(XTest_temp):
        XTest.append(value.flatten())
    XTrain_newlabel=np.array(XTrain_newlabel)
    XTrain_predict=np.array(XTrain_predict)
    return XTrain,XTest,XTrain_newlabel,XTrain_predict

In [14]:
#Selecting and testing models
def crossvalidation_judgeglass(XTrain_newlabel,YTrain_newlabel):
    rf=RandomForestClassifier(max_depth=50,n_estimators=100)
    rf.fit(XTrain_newlabel,YTrain_newlabel)  
    return rf          

In [15]:
#Label construction and input data reconstruction
def creatlable_judgeglass(XTrain_predict,rf,YTrain):
    YTrain_glass=[]
    YTrain_noglass=[]
    XTrain_glass=[]
    XTrain_noglass=[]
    creat_label=rf.predict(XTrain_predict)
    creat_label=np.array(creat_label)
    for i in range (len(creat_label)):
        if creat_label[i]==1:
            XTrain_glass.append(XTrain_predict[i])
            YTrain_glass.append(YTrain[i])
        elif creat_label[i]==0:
            XTrain_noglass.append(XTrain_predict[i])
            YTrain_noglass.append(YTrain[i])
    XTrain_glass=np.array(XTrain_glass)
    XTrain_glass=XTrain_glass.reshape(len(XTrain_glass),120,120,4)
    XTrain_noglass=np.array(XTrain_noglass)
    XTrain_noglass=XTrain_noglass.reshape(len(XTrain_noglass),120,120,4)
    YTrain_glass=np.array(YTrain_glass)
    YTrain_noglass=np.array(YTrain_noglass)
    return XTrain_noglass,XTrain_glass,YTrain_noglass,YTrain_glass

In [16]:
#Building Convolutional Neural Networks
def predict_withnoglass(XTrain_noglass,YTrain_noglass):
    conv_num=[3]
    fully_connected_layer_num=[4]
    unit_count=[100]
    for conv in conv_num:
        for fully_connected_layer in fully_connected_layer_num:
            for unit in unit_count:
                logs_file=f'logs/log_{conv}_fully_{fully_connected_layer}_unit_{unit}_conv_{int(time.time())}'
                tensorboard=TensorBoard(log_dir=logs_file)
                #checkpoint
                checkpoint_filepath=f'models/model_{conv}_fully_{fully_connected_layer}_unit_{unit}_'+'{epoch:02d}-{val_accuracy:.2f}.hdf5'
                print(logs_file)
                print(checkpoint_filepath)
                checkpoint=ModelCheckpoint(filepath=checkpoint_filepath,monitor='val_accuracy',model='max',save_best_only=True,verbose=1)
                model=Sequential()
                #convention and maxpooling 
                for i in range(conv):
                    #convloution layer
                    model.add(Conv2D(8,(2,2),activation='relu'))
                    model.add(MaxPooling2D(pool_size=(2,2)))
                #Flatten layer
                model.add(Flatten())
                #fully connected layer
                for i in range(fully_connected_layer):
                    model.add(Dense(unit,activation='relu'))
                #output layer
                model.add(Dense(5,activation='softmax'))
                #compile
                model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics='accuracy')
                #fit
                model.fit(XTrain_noglass,YTrain_noglass,batch_size=32,epochs=5,callbacks=[tensorboard])
    return model

In [17]:
#Return Forecast Results
def acc_Test(model,XTest_noglass,YTest_noglass):
    Yres=[]
    res=model.predict(XTest_noglass)
    for i in range(len(res)):
        Yres.append(np.argmax(res[i]))
    Yres=np.array(Yres)
    acc=np.mean(Yres==YTest_noglass.flatten())
    return acc

In [1]:
#Data paths,where the "glass_label_path" file is the label file for the purpose of semi-supervised learning
label_trainpath=r".\Datasets\cartoon_set\labels.csv"
label_testpath=r".\Datasets\cartoon_set_test\labels.csv"
data_train_path=r".\Datasets\cartoon_set\img\*.png"
data_test_path= r".\Datasets\cartoon_set_test\img\*.png"
glass_label_path=r".\B2\Semi_supervised_learning_labels.csv"

In [2]:
def main(label_trainpath,label_testpath,glass_label_path,data_train_path,data_test_path):
    YTrain,YTest,YTrain_newlabel=labelprocessing(label_trainpath,label_testpath,glass_label_path)
    XTrain,XTest,XTrain_newlabel,XTrain_predict=datapreprocessing(data_train_path,data_test_path)
    rf=crossvalidation_judgeglass(XTrain_newlabel,YTrain_newlabel)
    XTrain_noglass,XTrain_glass,YTrain_noglass,YTrain_glass=creatlable_judgeglass(XTrain_predict,rf,YTrain)
    XTest_noglass,XTest_glass,YTest_noglass,YTest_glass=creatlable_judgeglass(XTest,rf,YTest)
    model=predict_withnoglass(XTrain_noglass,YTrain_noglass)
    acc=acc_Test(model,XTest_noglass,YTest_noglass)
    return acc

In [20]:
acc=main(label_trainpath,label_testpath,glass_label_path,data_train_path,data_test_path)
print("TaskB2: Accuracy of Convolutional Neural Network is {}".format(acc))

  rf.fit(XTrain_newlabel,YTrain_newlabel)


logs/log_3_fully_4_unit_100_conv_1673522923
models/model_3_fully_4_unit_100_{epoch:02d}-{val_accuracy:.2f}.hdf5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
