In [None]:
import numpy as np
import pandas as pd
import os
import random
from PIL import Image, ImageFilter, ImageOps    

In [None]:
# base_dir = "../input/food-101"
# food_dir = os.path.join(base_dir, "food-101.zip")

# with zipfile.ZipFile(food_dir,"r") as z:
#     z.extractall()

In [None]:
img_dir = '../input/data-foodd/FooDD'

In [None]:
_, categories, _ = next(os.walk(fr'{img_dir}/cropped'))

In [None]:
filenames = []
y_full = []

for category in categories:
    _, _, fruits = next(os.walk(fr'{img_dir}/cropped/{category}'))
    fruits = [fr'{img_dir}/cropped/{category}/{filename}' for filename in fruits]
    
    filenames += fruits
    y_full += [category] * len(fruits)

Преобразование всех изображений в массив

In [None]:
X = []
for img_path in filenames:
    img = Image.open(img_path)
    X.append(np.array(img).flatten())
    
X = np.array(X)

Кодирование категорий в числа

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()
le.fit(y_full)

In [None]:
le.classes_

In [None]:
le.transform(["Apple"])

In [None]:
y = le.transform(y_full)

Разбиение исходных данных

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)

In [None]:
X_train.shape

# Логистическая регрессия

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf = LogisticRegression()
clf.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
print(fr'Accuracy: {100 * accuracy_score(y_test, clf.predict(X_test)):.3f} %')

# Гистограммный подход

In [None]:
def data(filenames, classNumbers):
    lst = []
    
    for i in range(len(filenames)):
        file = filenames[i]
        img = Image.open(file)
        n = len(np.asarray(img)[:,:,0]) // 2
        
        r = np.asarray(img)[:, :, 0]
        g = np.asarray(img)[:, :, 1]
        b = np.asarray(img)[:, :, 2]

        rmean = r.mean()
        gmean = g.mean()
        bmean = b.mean()

        rstd = r.std()
        gstd = g.std()
        bstd = b.std()

        rQuantile25 = np.quantile(r, 0.25)
        gQuantile25 = np.quantile(g, 0.25)
        bQuantile25 = np.quantile(b, 0.25)

        rmedian= np.median(r)
        gmedian = np.median(g)
        bmedian = np.median(b)

        rQuantile75 = np.quantile(r, 0.75)
        gQuantile75= np.quantile(g, 0.75)
        bQuantile75 = np.quantile(b, 0.75)
        
        
        rDifference = np.asarray(img)[:n, :, 0].mean() - np.asarray(img)[n:, :, 0].mean()
        gDifference = np.asarray(img)[:n, :, 1].mean() - np.asarray(img)[n:, :, 1].mean()
        bDifference = np.asarray(img)[:n, :, 2].mean() - np.asarray(img)[n:, :, 2].mean()

        lst.append([
            rmean, gmean, bmean,
            rstd, gstd, bstd,
            rQuantile25, gQuantile25, bQuantile25,
            rmedian, gmedian, bmedian,
            rQuantile75, gQuantile75, bQuantile75,
            rDifference, gDifference, bDifference,
            y[i]
        ])

    return lst

In [None]:
from os import listdir

In [None]:
columns = [
    'rMean', 'gMean', 'bMean',
    'rStd', 'gStd', 'bStd',
    'rQuantile25', 'gQuantile25', 'bQuantile25',
    'rMedian', 'gMedian', 'bMedian',
    'rQuantile75', 'gQuantile75', 'bQuantile75',
    'rDifference', 'gDifference', 'bDifference',
    'target'
]

if 'data_gist.csv' not in listdir(img_dir):
    data_gist_save = data(filenames, y)
    pd.DataFrame(data_gist_save, columns = columns).to_csv(fr'{img_dir}/data_gist.csv')

In [None]:
data_gist = pd.read_csv(fr'{img_dir}/data_gist.csv', index_col = 0)
data_gist

In [None]:
X_train_gist, X_test_gist, y_train_gist, y_test_gist = train_test_split(
    data_gist.drop(['target'], axis = 1),
    data_gist['target'],
    test_size=0.33,
    random_state=42
)

In [None]:
clf = LogisticRegression(solver='liblinear')
clf.fit(X_train_gist, y_train_gist)

In [None]:
print(fr'Accuracy: {100 * accuracy_score(y_test_gist, clf.predict(X_test_gist)):.3f} %')

# NeuralNet

In [None]:
import torch
import torch.nn as nn

In [None]:
if torch.cuda.is_available():
    dev = "cuda:0" 
else:  
    dev = "cpu"  
device = torch.device(dev) 

In [None]:
device

In [None]:
from torch.utils.data import Dataset, DataLoader

class FoodDataset(Dataset):
    def __init__(self, images, labels):
        self.labels = labels
        self.images = images

    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, idx):
        return (self.images[idx], self.labels[idx])

In [None]:
batch_size = 50
train_dataset = FoodDataset(torch.FloatTensor(X_train.reshape(X_train.shape[0], 3, 100, 100)), torch.LongTensor(y_train))
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)

test_dataset = FoodDataset(torch.FloatTensor(X_test.reshape(X_test.shape[0], 3, 100, 100)), torch.LongTensor(y_test))
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = True)

In [None]:
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [None]:
example_data.shape

In [None]:
example_targets.shape

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.layer_1 = nn.Linear(input_size, hidden_size)
        self.layer_2 = nn.Linear(hidden_size, num_classes)
        
        self.relu = nn.ReLU()
        
    def forward(self, inputs):
        output_1 = self.relu(self.layer_1(inputs))
        output = self.layer_2(output_1)
        
        return output

In [None]:
model = NeuralNet(3 * 100 * 100, 500, len(categories)).to(device)
print(model)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
np.random.seed(0)
torch.manual_seed(0)

total_step = len(train_loader)
for epoch in range(10):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.reshape(-1, 100 * 100 * 3).to(device)

        outputs = model(images)
        loss = criterion(outputs, labels.to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 10 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 5, i+1, total_step, loss.item()))

In [None]:
# На этапе предсказания не требуется вычислять градиенты:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 100 * 100 * 3).to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        labels = labels.to(device)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(fr'Accuracy: {100 * correct / total:.3f} %')

# Сверточная нейронная сеть

In [None]:
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1)
        self.conv_layer2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1)
        self.conv_layer3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1)
        self.conv_layer4 = nn.Conv2d(in_channels=120, out_channels=256, kernel_size=5, stride=1)
        self.pooling_layer1 = nn.AvgPool2d(kernel_size=2)
        self.pooling_layer2 = nn.AvgPool2d(kernel_size=2)
        
        self.linear_layer1 = nn.Linear(in_features=38880, out_features=84)
        self.linear_layer2 = nn.Linear(in_features=84, out_features=num_classes)
        
        self.tanh = nn.ReLU()
        
    def forward(self, inputs):
        output = self.tanh(self.conv_layer1(inputs))
        output = self.pooling_layer1(output)
        output = self.tanh(self.conv_layer2(output))
        output = self.pooling_layer2(output)
        output = self.tanh(self.conv_layer3(output))
        output = torch.flatten(output, 1)
        
        output = self.tanh(self.linear_layer1(output))
        output = self.linear_layer2(output)
        
        return output

In [None]:
model = LeNet5(len(categories)).to(device)
print(model)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
# Torch RNG
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# Python RNG
np.random.seed(seed)
random.seed(seed)

total_step = len(train_loader)
for epoch in range(20):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 10 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 5, i+1, total_step, loss.item()))

In [None]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        labels = labels.to(device)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(fr'Accuracy: {100 * correct / total:.3f}%')

# Resnet

In [None]:
import torchvision.models as models

In [None]:
class PretrainedCNN(nn.Module):
    def __init__(self, hidden_size, num_classes):
        super(PretrainedCNN, self).__init__()
#         self.train = train
        self.resnet = models.resnet18(pretrained=True) # TODO
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, hidden_size) # TODO
        self.fc = nn.Linear(hidden_size, num_classes)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, images):
#         for name, param in self.resnet.named_parameters():
#             if 'fc.weight' in name or 'fc.bias' in name:
#                 param.requires_grad = True
#             else:
#                 param.requires_grad = self.train
                
        resnet_features = self.dropout(self.relu(self.resnet(images)))
                
        return self.fc(resnet_features)

In [None]:
modelResnet = PretrainedCNN(20000, len(categories)).to(device)
print(modelResnet)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modelResnet.parameters(), lr=0.001)

In [None]:
np.random.seed(0)
torch.manual_seed(0)
model.train()

total_step = len(train_loader)
for epoch in range(30):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        outputs = modelResnet(images)
        loss = criterion(outputs, labels.to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 10 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 5, i+1, total_step, loss.item()))

In [None]:
modelResnet.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = modelResnet(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        labels = labels.to(device)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(fr'Accuracy: {100 * correct / total:.3f}%')

In [None]:
torch.save(modelResnet, 'model.pt')