In [1]:
from pathlib import Path
import random

import matplotlib.pyplot as plt
import numpy as np
import PIL

from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
import joblib

In [None]:
dataset = Path('../Rice_Image_Dataset')

arborio = list(dataset.glob('Arborio/*'))
basmati = list(dataset.glob('Basmati/*'))
ipsala = list(dataset.glob('Ipsala/*'))
jasmine = list(dataset.glob('Jasmine/*'))
karacadag = list(dataset.glob('Karacadag/*'))

arborio_train, arborio_test = train_test_split(arborio, test_size=0.2, random_state=random.randint(0,111))
basmati_train, basmati_test = train_test_split(basmati, test_size=0.2, random_state=random.randint(0,111))
ipsala_train, ipsala_test = train_test_split(ipsala, test_size=0.2, random_state=random.randint(0,111))
jasmine_train, jasmine_test = train_test_split(jasmine, test_size=0.2, random_state=random.randint(0,111))
karacadag_train, karacadag_test = train_test_split(karacadag, test_size=0.2, random_state=random.randint(0,111))

In [None]:
clf = SGDClassifier()

STEP = 1000
COUNT = 12000 # 15000 * 0.8

bottom = 0
top = bottom + STEP

while top <= COUNT:
    batch = arborio_train[bottom:top] + basmati_train[bottom:top] + ipsala_train[bottom:top] + jasmine_train[bottom:top] + karacadag_train[bottom:top]

    raw_images = []
    labels = []

    for path in batch:
        with PIL.Image.open(str(path)).convert('L') as img:
            raw_image = np.array(img.getdata()).flatten()
            label = str(path).split('/')[2]
            raw_images.append(raw_image)
            labels.append(label)

    X = np.array(raw_images)
    y = np.array(labels)

    clf.partial_fit(X, y, classes=['Arborio', 'Basmati', 'Ipsala', 'Jasmine', 'Karacadag'])

    bottom += STEP
    top += STEP

In [6]:
STEP = 1000
COUNT = 3000

bottom = 0
top = bottom + STEP

acc_list = []

while top <= COUNT:
    batch = arborio_test[bottom:top] + basmati_test[bottom:top] + ipsala_test[bottom:top] + jasmine_test[bottom:top] + karacadag_test[bottom:top]

    raw_images = []
    labels = []

    for path in batch:
        with PIL.Image.open(str(path)).convert('L') as img:
            raw_image = np.array(img.getdata()).flatten()
            label = str(path).split('/')[2]
            raw_images.append(raw_image)
            labels.append(label)

    X = np.array(raw_images)
    y = np.array(labels)

    acc = clf.score(X, y)
    print(acc)
    acc_list.append(acc)

    bottom += STEP
    top += STEP

print(sum(acc_list) / len(acc_list))

0.9362
0.9364
0.9364
0.9363333333333334


In [None]:
joblib.dump(clf, './objects/svm.pkl')

['./objects/log_loss.pkl']