In [22]:
import numpy as np 
import shutil # pour les dossiers
import pandas as pd
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import time
import os
import copy

In [23]:
def metrique(path):
    files = os.listdir(path);
    contenu = [];
    for file in files:
        if file.endswith('.json'):
            openFile = open(path+file, "r");
            contenu.append(json.loads(openFile.read())["Image"]);
            openFile.close();
    return contenu;

In [24]:
chemin = "./train/";
contenu = [];
if not os.path.isdir(chemin):
    print('Rajouter le dossier train dans le dossier courant :) ! ');
else :
    contenu = metrique(chemin);
df = pd.DataFrame(contenu)
# on laisse Species car c'est égale au nombre de classe
df.drop(["Genus","ClassId","Family","Vote","Location","Latitude","Longitude","Date","Author","Content","MediaId", "LearnTag", "ImageId2014","ObservationId2014","YearInCLEF","ObservationId"], axis='columns', inplace=True)
df = df.drop_duplicates()
classe  = df.to_numpy();
print(len(classe))


50


In [25]:
X = [];
Y = [];
files = os.listdir("./train")
for file in files:
    if file.endswith('.jpg'):
        X.append(file);
        nameFile = file.split('.')[0];
        for file_class in files:
            nameFileAutre = file_class.split('.')[0];
            if nameFile==nameFileAutre and file_class.endswith('.json'):
                fichierSrc = open("./train/"+file_class, "r");
                contenu = fichierSrc.read();
                monJson = json.loads(contenu)
                Y.append(monJson["Image"]["Species"])
                break;


In [26]:
# on a bien une classe par image !
if(len(X) == len(Y)):
    print("OK")
else:
    print("KO")

OK


In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42, stratify=Y)
y_train[0]

'Salix caprea L.'

In [28]:
# Création des répertoires ! 
directory = "MLP"
for classe in y_train : 
    if(not os.path.isdir('./MLP/train/'+classe)):
        os.mkdir('./MLP/train/'+classe)
        
for classe in y_test : 
    if(not os.path.isdir('./MLP/val/'+classe)):
        os.mkdir('./MLP/val/'+classe)
        
# on met les images dans les répertoires precédents

# on supprime le contenu

directorys=os.listdir('./MLP/train/')
for i in range(0,len(directory)):
    files=os.listdir('./MLP/train/'+directorys[i])
    for j in range(0,len(files)):
        os.remove('./MLP/train/'+directorys[i]+'/'+files[j])

directorys=os.listdir('./MLP/val/')
for i in range(0,len(directory)):
    files=os.listdir('./MLP/val/'+directorys[i])
    for j in range(0,len(files)):
        os.remove('./MLP/val/'+directorys[i]+'/'+files[j])

# on met le contenu
shutil.copyfile('./train/'+X_train[0],'./MLP/train/'+y_train[0]+'/'+X_train[0])

print('./MLP/train/'+y_train[0]+X_train[0])
print(X_train[0])
for i in range(len(X_train)):
    shutil.copyfile('./train/'+X_train[i],'./MLP/train/'+y_train[i]+'/'+X_train[i])
    
for i in range(len(X_test)):
    shutil.copyfile('./train/'+X_test[i],'./MLP/val/'+y_test[i]+'/'+X_test[i])
    

./MLP/train/Salix caprea L.30108.jpg
30108.jpg


In [29]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
data_dir = 'MLP'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

class_names = image_datasets['train'].classes
#print(pd.DataFrame(class_names))



In [35]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP,self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(4,3*256),
            nn.ReLU(),
            nn.Linear(3*256,10)
        )
        
    def forward(self,x):
        #convert tensor (128,1,28,28) --> (128,1*28*28)
        x=x.view(x.size(0), -1)
        x=self.layers(x)
        return x

In [38]:
model = MLP()
print(model)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
loss_fn = nn.CrossEntropyLoss()

mean_train_losses = []
mean_valid_losses = []
epochs = 15

for epoch in range(epochs):
    
    train_losses=[]
    valid_losses=[]
    
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()  # Set model to training mode
            for images,labels in dataloaders[phase]:
            
                optimizer.zero_grad()
                print(images)
                outputs = model(images)
                loss = loss_fn(outputs,labels)
                loss.backward()
                optimizer.step()
            
                train_losses.append(loss.item())
            
                if(i*128) % (128*100) == 0:
                    print('{i * 128} / 50000')
                
            model.eval()
            correct = 0
            total = 0
        else:
            with torch.no_grad():   # Set model to evaluate mode
                for i, (images,labels) in dataloaders[phase]:
                    outputs = model(images)
                    loss = loss_fn(outputs,labels)
                    
                    valid_losses.append(loss.item())
                    
                    _, predicted = torch.max(outputs.data, 1)
                    correct += (predicted == labels).sum().item()
                    total += labels.size(0)
            mean_train_losses.append(np.mean(train_losses))
            mean_valid_losses.append(np.mean(valid_losses))
            
            accuracy = 100*correct/total
            valid_acc_list.append(accuracy)
            print('epoch : {}, train loss : {:.4f}, valid loss : {:.4f}, valid acc : {:.2f}%'\
         .format(epoch+1, np.mean(train_losses), np.mean(valid_losses), accuracy))

MLP(
  (layers): Sequential(
    (0): Linear(in_features=4, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)
tensor([[[[-1.7240, -1.7412, -1.7412,  ..., -1.4158, -1.4329, -1.4500],
          [-1.6727, -1.6898, -1.6898,  ..., -1.3644, -1.3987, -1.4329],
          [-1.6213, -1.6213, -1.6384,  ..., -1.3302, -1.3644, -1.3987],
          ...,
          [ 0.9303,  1.0331,  1.0844,  ..., -1.7240, -1.7240, -1.7069],
          [ 0.9303,  1.0331,  1.1015,  ..., -1.7069, -1.6898, -1.6898],
          [ 0.9132,  0.9988,  1.0673,  ..., -1.7069, -1.6898, -1.6898]],

         [[-1.4405, -1.4580, -1.4580,  ..., -1.1954, -1.2129, -1.2304],
          [-1.4055, -1.4230, -1.4230,  ..., -1.1604, -1.1779, -1.2129],
          [-1.3704, -1.3529, -1.3880,  ..., -1.1253, -1.1253, -1.1604],
          ...,
          [-0.2850, -0.0399,  0.1352,  ..., -1.6506, -1.6681, -1.6856],
          [-0.3200, -0.0749,  0.1001,  ..., -1.6331, -1.6331, -1.6681],
    

RuntimeError: size mismatch, m1: [4 x 150528], m2: [4 x 100] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197

In [21]:
print(dataset_sizes)

{'val': 1147, 'train': 2327}
