# Import bibliotek

In [34]:
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd
import re
from scipy.misc import imread
from sklearn import svm, mixture
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

# Wczytanie danych

In [2]:
def is_valid_image_name(image_name):
    return re.match(r'^[iopz]_\d+\.jpg$', image_name)

In [3]:
data_path = "Dane"
wrong_filenames_path = "wrong_filenames.txt"
wrong_sizes_path = "wrong_sizes.txt"
with open(wrong_filenames_path, "w") as wrong_filenames:
    with open(wrong_sizes_path, "w") as wrong_sizes:
        languages = [f for f in listdir(data_path)]
        data = []
        for language in languages:
            language_path = join(data_path, language)
            image_names = [f for f in listdir(language_path) if isfile(join(language_path, f))]
            for image_name in image_names:
                image_path = join(language_path, image_name)
                if is_valid_image_name(image_name):
                    image = imread(image_path)
                    image_shape = image.shape[:2]
                    if image_shape != (15, 15):
                        wrong_sizes.write(image_path + "\n")
                        continue
                    letter, index = image_name.split("_")
                    index = index.split(".")[0]
                    data.append([language, letter, index, image])
                else:
                    wrong_filenames.write(image_path + "\n")

In [4]:
rows_cnt = len(data)
print("Data size: {}".format(rows_cnt))
data = np.array(data, dtype = "object")
# data = np.array([np.array(row, dtype = "object") for row in data])
data = pd.DataFrame(data=data, columns = ["Language", "Letter", "Index", "Image"])

Data size: 856


In [5]:
print(data)

      Language Letter Index                                              Image
0    ormiański      o     4  [[253, 254, 252, 254, 254, 254, 254, 255, 255,...
1    ormiański      p    22  [[255, 253, 252, 255, 254, 242, 227, 252, 253,...
2    ormiański      z    25  [[254, 253, 253, 254, 255, 249, 255, 249, 255,...
3    ormiański      z     8  [[245, 246, 250, 146, 36, 38, 104, 224, 225, 1...
4    ormiański      o    13  [[255, 254, 248, 255, 252, 249, 253, 254, 252,...
5    ormiański      p    26  [[255, 246, 255, 251, 255, 251, 253, 250, 255,...
6    ormiański      z    20  [[253, 255, 252, 249, 255, 255, 249, 255, 255,...
7    ormiański      p    20  [[252, 255, 251, 255, 252, 251, 255, 254, 253,...
8    ormiański      z    15  [[253, 255, 250, 249, 70, 69, 144, 152, 149, 2...
9    ormiański      z     1  [[244, 248, 250, 104, 36, 45, 38, 204, 61, 54,...
10   ormiański      i     4  [[255, 255, 254, 251, 126, 139, 248, 255, 255,...
11   ormiański      i     1  [[249, 248, 250, 245, 2

In [48]:
def classify(C, gamma, X, y):
    rows_cnt = len(y)
    clf = svm.SVC(C = 0.5, gamma = 0.2)
    train = np.random.uniform(0, 1, rows_cnt)
    indices = np.arange(0, rows_cnt)[train <= 0.8]
    X_train = X[indices]
    y_train = y[indices]
    indices = np.arange(0, rows_cnt)[train > 0.8]
    X_test = X[indices]
    y_test = y[indices]
    clf = svm.SVC(C = C, gamma = gamma)
    clf.fit(X_train, y_train)
    print(clf.score(X_train, y_train))
    print(confusion_matrix(y_train, clf.predict(X_train)))
    print(clf.score(X_test, y_test))
    print(confusion_matrix(y_test, clf.predict(X_test)))

In [60]:
c_text = widgets.FloatText(
    min=0.000001,
    value=2.5,
    description='C:',
    disabled=False,
    step = 0.001,
    max=10.0
)
c_slider = widgets.FloatSlider(
    min=0.000001,
    description='C:',
    step = 0.001,
    max=10.0
)
gamma_text = widgets.FloatText(
    min=0.000001,
    value=2.5,
    description='gamma:',
    step = 0.001,
    disabled=False,
    max=10.0
)
gamma_slider = widgets.FloatSlider(
    min=0.000001,
    description='gamma:',
    step = 0.001,
    max=10.0
)
clink = widgets.jslink((c_text, 'value'), (c_slider, 'value'))
gammalink = widgets.jslink((gamma_text, 'value'), (gamma_slider, 'value'))
ui = widgets.HBox([widgets.VBox([c_text, c_slider]), widgets.VBox([gamma_text, gamma_slider])])
X = data["Image"].values
X = np.concatenate(X).reshape(X.shape[0], 225)
y = data["Letter"].values
cl = lambda C, gamma: classify(C, gamma, X, y)
out = widgets.interactive_output(cl, {'C': c_text, 'gamma': gamma_text})
display(ui, out)

A Jupyter Widget

A Jupyter Widget

In [56]:
def classify_with_pca(C, gamma, X, y, n):
    pca = PCA(n_components=n)
    pca.fit(X)
    X_pca = pca.transform(X)
    classify(C, gamma, X_pca, y)

In [62]:
c_text = widgets.FloatText(
    min=0.000001,
    value=0.6,
    description='C:',
    disabled=False,
    step = 0.001,
    max=10.0
)
c_slider = widgets.FloatSlider(
    min=0.000001,
    description='C:',
    step = 0.001,
    max=10.0
)
gamma_text = widgets.FloatText(
    min=0.000001,
    value=0.3,
    description='gamma:',
    disabled=False,
    step = 0.001,
    max=10.0
)
gamma_slider = widgets.FloatSlider(
    min=0.000001,
    description='gamma:',
    step = 0.001,
    max=10.0
)
n = widgets.IntText(
    value=5,
    description='n:',
    disabled=False,
)
clink = widgets.jslink((c_text, 'value'), (c_slider, 'value'))
gammalink = widgets.jslink((gamma_text, 'value'), (gamma_slider, 'value'))
ui = widgets.HBox([widgets.VBox([c_text, c_slider]), widgets.VBox([gamma_text, gamma_slider])])
cl_pca = lambda C, gamma, n: classify_with_pca(C, gamma, X, y, n)
out = widgets.interactive_output(cl_pca, {'C': c_text, 'gamma': gamma_text, 'n' : n})
display(ui, n, out)

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget