In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import utils
import numpy as np

##### Объединяем созданные csv-файлы в один

In [2]:
files = ['Dan_Radcliffe.csv', 'Emma_Watson.csv', 'Rupert_Grint.csv', 'Harrison_Ford.csv', 'Will_Smith.csv', 'Halle_Berry.csv']
dfs = []
for file in files:
    df = pd.read_csv(file, index_col = 0)
    dfs.append(df)

In [3]:
df = pd.concat(dfs)
df

Unnamed: 0,img_path,class,descriptor
0,Dan_Radcliffe\10.PNG,Dan_Radcliffe,-0.01745985820889473 0.10399903357028961 0.059...
1,Dan_Radcliffe\100.PNG,Dan_Radcliffe,-0.10777680575847626 0.07684307545423508 0.040...
2,Dan_Radcliffe\101.PNG,Dan_Radcliffe,-0.10304909199476242 0.08664718270301819 0.038...
3,Dan_Radcliffe\102.PNG,Dan_Radcliffe,-0.06392291933298111 0.09306670725345612 0.059...
4,Dan_Radcliffe\104.PNG,Dan_Radcliffe,-0.10605598986148834 0.07336850464344025 -0.03...
...,...,...,...
585,Halle_Berry\995.jpg,Halle_Berry,-0.1652878373861313 0.13762687146663666 0.0526...
586,Halle_Berry\996.jpg,Halle_Berry,-0.12461502104997635 0.10574141144752502 0.033...
587,Halle_Berry\997.jpg,Halle_Berry,-0.12324288487434387 0.11619621515274048 0.040...
588,Halle_Berry\998.jpg,Halle_Berry,-0.1603611260652542 0.08367139101028442 0.0606...


In [4]:
label_to_names = {
    0 : 'Harrison_Ford',
    1 : 'Dan_Radcliffe',
    2 : 'Emma_Watson',
    3 : 'Rupert_Grint',
    4 : 'Will_Smith',
    5 : 'Halle_Berry',
}
names_to_label = {
   'Harrison_Ford' : 0 ,
    'Dan_Radcliffe': 1,
    'Emma_Watson': 2,
    'Rupert_Grint': 3,
    'Will_Smith' :4,
    'Halle_Berry' : 5,
}

In [5]:
df['labels'] = df['class'].map(names_to_label)
df

Unnamed: 0,img_path,class,descriptor,labels
0,Dan_Radcliffe\10.PNG,Dan_Radcliffe,-0.01745985820889473 0.10399903357028961 0.059...,1
1,Dan_Radcliffe\100.PNG,Dan_Radcliffe,-0.10777680575847626 0.07684307545423508 0.040...,1
2,Dan_Radcliffe\101.PNG,Dan_Radcliffe,-0.10304909199476242 0.08664718270301819 0.038...,1
3,Dan_Radcliffe\102.PNG,Dan_Radcliffe,-0.06392291933298111 0.09306670725345612 0.059...,1
4,Dan_Radcliffe\104.PNG,Dan_Radcliffe,-0.10605598986148834 0.07336850464344025 -0.03...,1
...,...,...,...,...
585,Halle_Berry\995.jpg,Halle_Berry,-0.1652878373861313 0.13762687146663666 0.0526...,5
586,Halle_Berry\996.jpg,Halle_Berry,-0.12461502104997635 0.10574141144752502 0.033...,5
587,Halle_Berry\997.jpg,Halle_Berry,-0.12324288487434387 0.11619621515274048 0.040...,5
588,Halle_Berry\998.jpg,Halle_Berry,-0.1603611260652542 0.08367139101028442 0.0606...,5


In [6]:
X =[]

##### Записываем дескрипторы в список X

In [7]:
for i, row in df.iterrows():
    desc = row['descriptor']
    values = [float(x) for x in desc.split()]
    values = np.array(values)
    X.append(values)
X = np.array(X)    

##### Записываем классы в список Y (целевая переменнная)

In [8]:
Y = df['labels'].values
#Y = utils.to_categorical(Y, 4)

In [9]:
type(X),type(Y)

(numpy.ndarray, numpy.ndarray)

In [10]:
Y[300]

1

##### Делим выборку на train, test

In [11]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 73)

##### Применяем логистическую регрессию для классификации

In [12]:
model = LogisticRegression(random_state=73)

In [13]:
model.fit(x_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=73, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [14]:
model.score(x_test, y_test)

0.9924471299093656

In [2]:
kfold = StratifiedKFold(n_splits=5, random_state=73)
model = LogisticRegression(random_state=73)
result = cross_val_score(model, X, Y, cv=kfold)
result.mean()*100.0

In [16]:
y_test = utils.to_categorical(y_test, 6)
y_train = utils.to_categorical(y_train, 6)

##### Создаем полносвязную сеть для классификации

In [3]:
# Создаем последовательную модель
nn = Sequential()
# Входной полносвязный слой
nn.add(Dense(64, input_dim=128, activation="relu"))
# Выходной полносвязный слой
nn.add(Dense(6, activation="softmax"))

In [18]:
nn.compile(loss="categorical_crossentropy", optimizer="SGD", metrics=["accuracy"])

print(nn.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                8256      
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 390       
Total params: 8,646
Trainable params: 8,646
Non-trainable params: 0
_________________________________________________________________
None


In [4]:
history = nn.fit(x_train, y_train, 
                    batch_size=32, 
                    epochs=100,
                 validation_data = (x_test, y_test),
                    verbose=1)

In [23]:
nn.save('my_nnet.h5')