In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import cv2
import random
from random import randint
import time

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#Codes from Kamal Das https://www.kaggle.com/kmldas/beginner-s-guide-image-augmentation-transforms

In [None]:
import torch
from torch.utils.data import Dataset, random_split, DataLoader
import torch.nn.functional as F
import torch.nn as nn

from PIL import Image
from scipy import ndimage

import torchvision
import torchvision.models as models
import torchvision.transforms as T
from torchvision.utils import make_grid
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder

from tqdm.notebook import tqdm

from sklearn.metrics import f1_score

#There is No train/test/submission in that Dataset.

In [None]:
DATA_DIR = '../input/goblin-portraits'


#TRAIN_DIR = DATA_DIR + '/train'                           
#TEST_DIR = DATA_DIR + '/test'                             

#TRAIN_CSV = DATA_DIR + '/labels.csv'                     
#TEST_CSV = DATA_DIR + '/submission.csv'
TRAIN_DIR = DATA_DIR + '/images'

In [None]:
data_df = pd.read_csv('../input/goblin-portraits/artbreederGoblin.csv', encoding='utf8')
data_df.head()

In [None]:
labels_names=data_df["idArtBreeder"].unique()
labels_sorted=labels_names.sort()

labels = dict(zip(range(len(labels_names)),labels_names))
labels 

#To use numbers instead of names for labels. Lets add the numbers as labels to the dataframe

In [None]:
lbl=[]
for i in range(len(data_df["idArtBreeder"])):
    temp=list(labels.values()).index(data_df.idArtBreeder[i])
    lbl.append(temp)

    
data_df['lbl'] = lbl
#data_df['lbl'] = data_df['lbl'].astype(str)
data_df.head()

#Add the path of each image to the file.

In [None]:
path_img=[]
for i in range(len(data_df["idArtBreeder"])):
    temp=TRAIN_DIR + "/" + str(data_df.idArtBreeder[i]) + ".jpg"
    path_img.append(temp)

data_df['path_img'] =path_img
data_df.head()

#EDA

In [None]:
num_images = len(data_df["idAlt"])
print('Number of images in Training file:', num_images)
no_labels=len(labels_names)
print('Number of Goblin breeds in Training file:', no_labels)

#There are too many data therefore I commented the bar below, just to save for another time. 

In [None]:
#bar = data_df["idArtBreeder"].value_counts(ascending=True).plot.barh(figsize = (30,120))
#plt.title("Distribution of the Goblin Breeds", fontsize = 20)
#bar.tick_params(labelsize=16)
#plt.show()

In [None]:
#data_df["idArtBreeder"].value_counts(ascending=False)

#Image Analysis

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=5, figsize=(15, 15),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(data_df.path_img[i]))
    ax.set_title(data_df.idArtBreeder[i])
plt.tight_layout()
plt.show()

#Image Transforms using Python 

In [None]:
#Resizing images

random_img=randint(0,len(data_df.path_img))
img_path=data_df.path_img[random_img]
img= plt.imread(img_path)

plt.imshow(img)
plt.title("Original image")
plt.show()

plt.imshow(cv2.resize(img, (150,150)))
plt.title("After resizing")
plt.show()

In [None]:
#Rotate image

random_img=randint(0,len(data_df.path_img))
img_path=data_df.path_img[random_img]
img= plt.imread(img_path)

plt.imshow(img)
plt.title("Original image")
plt.show()


#rotation angle in degree

rotated1 = ndimage.rotate(img, 90)
plt.imshow(rotated1)
plt.title("Image rotated 90 degrees")
plt.show()

In [None]:
#Resize and rotation

random_img=randint(0,len(data_df.path_img))
img_path=data_df.path_img[random_img]
img= plt.imread(img_path)

plt.imshow(img)
plt.title("Original image")
plt.show()


img=cv2.resize(img, (150,150))
turn =90

fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(16, 4),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(ndimage.rotate(img, i*90))
    ax.set_title("After resizing rotated "+ str(i*90) +" degrees")
plt.tight_layout()
plt.show()

##Image transforms using PyTorch

In [None]:
#Image transforms using PyTorch


imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_tfms = T.Compose([
#this will resize the image 
    T.Resize(256),   
   
#Randomly change the brightness, contrast and saturation of an image
#    T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),    

#this will remove parts (crop) the Image at a random location.   
#    T.RandomCrop(32, padding=4, padding_mode='reflect'),   

#Horizontally flip (rotate by 180 degree) the given image randomly; default is 50% of images
    T.RandomHorizontalFlip(), 
    
#Rotate the image by angle -here by 10%
    T.RandomRotation(10),
    
#convert it to a tensor   
    T.ToTensor(),

#Normalize a tensor image with mean and standard deviation - here with the Imagenet stats
    T.Normalize(*imagenet_stats,inplace=True), 
    
#Randomly selects a rectangle region in an image and erases its pixels.    
    T.RandomErasing(inplace=True)
])

In [None]:
class GoblinDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.df = df
        self.transform = transform
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.df)    
    
    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_idArtBreeder, img_label = row['idArtBreeder'], row['lbl']
        img_fname = self.root_dir + "/" + str(img_idArtBreeder) + ".jpg"
        img = Image.open(img_fname)
        if self.transform:
            img = self.transform(img)
        return img, img_label

In [None]:
data_ds = GoblinDataset(data_df, TRAIN_DIR, transform=train_tfms)

In [None]:
def show_sample(img, target, invert=True):
    if invert:
        plt.imshow(1 - img.permute((1, 2, 0)))
    else:
        plt.imshow(img.permute(1, 2, 0))
    print('Labels:', labels[target])

#Sample Images after Transform

In [None]:
show_sample(*data_ds[241])

In [None]:
show_sample(*data_ds[149])

In [None]:
show_sample(*data_ds[244])

In [None]:
show_sample(*data_ds[4])

In [None]:
show_sample(*data_ds[1])

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/sequential-data/comments
from IPython.display import display,HTML
c1,c2,f1,f2,fs1,fs2=\
'#eb3434','#eb3446','Akronim','Smokum',30,15
def dhtml(string,fontcolor=c1,font=f1,fontsize=fs1):
    display(HTML("""<style>
    @import 'https://fonts.googleapis.com/css?family="""\
    +font+"""&effect=3d-float';</style>
    <h1 class='font-effect-3d-float' style='font-family:"""+\
    font+"""; color:"""+fontcolor+"""; font-size:"""+\
    str(fontsize)+"""px;'>%s</h1>"""%string))
    
    
dhtml('Marília Prata, @mpwolke Was here again' )