In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/MyDrive/malimg_dataset.zip -d data

In [None]:
!cp -r "/content/drive/MyDrive/Benign/" "/content/data/malimg_paper_dataset_imgs"

In [None]:
import os
import sys
import os
from math import log
import numpy as np
import scipy as sp
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import dataset
from dataset import load_data,prepare_data
from model import build_model
import seaborn as sns
from sklearn import metrics
from sklearn.metrics import average_precision_score
import tensorflow
from keras.applications import ResNet50
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.models import load_model

In [None]:
class_index = {'Adialer.C': 0,
                'Agent.FYI': 1,
                'Allaple.A': 2,
                'Allaple.L': 3,
                'Alueron.gen!J': 4,
                'Autorun.K': 5,
                'C2LOP.P': 6,
                'C2LOP.gen!g': 7,
                'Dialplatform.B': 8,
                'Dontovo.A': 9,
                'Fakerean': 10,
                'Instantaccess': 11,
                'Lolyda.AA1': 12,
                'Lolyda.AA2': 13,
                'Lolyda.AA3': 14,
                'Lolyda.AT': 15,
                'Malex.gen!J': 16,
                'Obfuscator.AD': 17,
                'Rbot!gen': 18,
                'Skintrim.N': 19,
                'Swizzor.gen!E': 20,
                'Swizzor.gen!I': 21,
                'VB.AT': 22,
                'Wintrim.BX': 23,
                'Yuner.A': 24,
                'Benign':25}


In [None]:
def train(train_gen,val_gen,target_size_custom,save_checkpoints_path,batch_size, epochs):

    resnet = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224,224,3)
    )

    model=build_model(resnet)
    # model = build_model(target_size_custom)

    rlrp = ReduceLROnPlateau(monitor='loss', factor=0.2, verbose=1, patience=5, min_lr=0.000001)

    history=model.fit(train_gen, validation_data=val_gen, batch_size=batch_size, epochs=epochs, callbacks=[rlrp])

    #saving model weights and history
    model.save(f'{save_checkpoints_path}/model.h5')

    hist_df = pd.DataFrame(history.history)

    with open(f"{save_checkpoints_path}/history.json", "w") as outfile:
        hist_df.to_json(outfile)


    print("***Ploting***")
    epochs = [i for i in range(epochs)]
    fig , ax = plt.subplots(1,2)
    train_acc = history.history['accuracy']
    train_loss = history.history['loss']
    val_acc = history.history['val_accuracy']
    val_loss = history.history['val_loss']


    fig.set_size_inches(20,8)
    ax[0].plot(epochs , train_loss , label = 'Training Loss')
    ax[0].plot(epochs , val_loss , label = 'Testing Loss')
    ax[0].set_title('Training & Testing Loss')
    ax[0].legend()
    ax[0].set_xlabel("Epochs")

    ax[1].plot(epochs , train_acc , label = 'Training Accuracy')
    ax[1].plot(epochs , val_acc , label = 'Testing Accuracy')
    ax[1].set_title('Training & Testing Accuracy')
    ax[1].legend()
    ax[1].set_xlabel("Epochs")
    plt.savefig('train_loss_accuracy.png')

In [None]:
def test(test_gen,save_checkpoints_path):

    model=load_model(f"{save_checkpoints_path}/model.h5")

    y_pred=model.predict(test_gen)
    y_test_predicted = np.argmax(np.array(y_pred),axis = 1)
    return y_test_predicted

In [None]:
def confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14):
    df_cm = pd.DataFrame(
        confusion_matrix, index=class_names, columns=class_names,
    )
    fig = plt.figure(figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
gpus = tensorflow.config.list_physical_devices('GPU')
print(gpus)

In [None]:
data_path="data"
img_path="data/malimg_paper_dataset_imgs"
data_csvs="data/csvs"
save_checkpoints_path="data/checkpoint"
batch_size=28
epochs = 20
os.makedirs(save_checkpoints_path,exist_ok=True)
os.makedirs(data_csvs,exist_ok=True)

data_prepare=prepare_data(data_path,img_path,class_index)

if not os.path.exists(f"{data_csvs}/train.csv"):
    data_prepare.create_csv_data()

target_size_custom = (256, 256)


dataloader=load_data(img_path,data_csvs,target_size_custom,batch_size)
train_gen,val_gen=dataloader.train_data()
classes = train_gen.class_indices
with tensorflow.device('GPU'):
  train_gen=train(train_gen,val_gen,target_size_custom,save_checkpoints_path,batch_size, epochs)





In [None]:
test_gen=dataloader.test_data()
y_test_predicted=test(test_gen,save_checkpoints_path)

In [None]:
y_pred=[]
for i in y_test_predicted:
  for key,value in class_index.items():
    if value==i:
      y_pred.append(key)
      break
print(y_pred)

In [None]:
c_matrix = metrics.confusion_matrix(test_gen.classes, y_test_predicted)
df_confusion = pd.crosstab(test_gen.classes, y_test_predicted)
df_confusion.to_csv(os.path.join(data_path,"confusion_matrix.csv"))

confusion_matrix(c_matrix, classes, figsize = (20,7), fontsize=14)

In [None]:
accuracy=metrics.accuracy_score(test_gen.classes, y_test_predicted)
print("accuracy",accuracy)

IoU=metrics.jaccard_score(test_gen.classes, y_test_predicted,average="micro")
f1=metrics.f1_score(test_gen.classes, y_test_predicted,average="micro")
print("micro IoU",IoU)
print("micro f1",f1)

IoU=metrics.jaccard_score(test_gen.classes, y_test_predicted,average="macro")
f1=metrics.f1_score(test_gen.classes, y_test_predicted,average="macro")
print("macro IoU",IoU)
print("macro f1",f1)

In [None]:
report = metrics.classification_report(test_gen.classes, y_test_predicted, target_names=classes,  output_dict=True)
df_report = pd.DataFrame(report).transpose()
print(df_report)