In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid') # Plot style
plt.rcParams['figure.figsize'] = (12.0, 8.0)

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from tqdm import tqdm 

In [11]:
# Создаем список DataFrame
root_dir = 'Mashtots/Train/Train'

dfs = []

# Прогресс-бар
for folder, _, files in os.walk(root_dir):
    if not files:
        continue

    label = int(os.path.basename(folder))

    for image in tqdm(files, desc=f"Processing {folder}"):
        image_path = os.path.join(folder, image)

        if not os.path.isfile(image_path):
            continue

        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            feature_vector = img.flatten()

            # Создаем временный DataFrame
            new_df = pd.DataFrame([[*feature_vector, label]], columns=columns)
            dfs.append(new_df)

# Конкатенация всех DataFrame в один
train_df = pd.concat(dfs, ignore_index=True)

Processing Mashtots/Train/Train\0: 100%|█████████████████████████████████████████████| 854/854 [01:59<00:00,  7.14it/s]
Processing Mashtots/Train/Train\1: 100%|█████████████████████████████████████████████| 870/870 [02:03<00:00,  7.05it/s]
Processing Mashtots/Train/Train\10: 100%|████████████████████████████████████████████| 886/886 [02:04<00:00,  7.14it/s]
Processing Mashtots/Train/Train\11: 100%|████████████████████████████████████████████| 904/904 [02:06<00:00,  7.13it/s]
Processing Mashtots/Train/Train\12: 100%|████████████████████████████████████████████| 900/900 [02:08<00:00,  7.03it/s]
Processing Mashtots/Train/Train\13: 100%|████████████████████████████████████████████| 927/927 [02:11<00:00,  7.06it/s]
Processing Mashtots/Train/Train\14: 100%|████████████████████████████████████████████| 785/785 [01:49<00:00,  7.16it/s]
Processing Mashtots/Train/Train\15: 100%|████████████████████████████████████████████| 931/931 [02:10<00:00,  7.14it/s]
Processing Mashtots/Train/Train\16: 100%

In [12]:
train_df

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_4087,pixel_4088,pixel_4089,pixel_4090,pixel_4091,pixel_4092,pixel_4093,pixel_4094,pixel_4095,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70055,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
70056,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
70057,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
70058,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9


In [18]:
current_columns = train_df.columns

# Генерируем новые названия столбцов с цифрами вместо "pixel_"
new_columns = [str(i) for i in range(64*64)] + ["label"]

columns_mapping = dict(zip(current_columns, new_columns))

train_df = train_df.rename(columns=columns_mapping)

In [19]:
train_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4087,4088,4089,4090,4091,4092,4093,4094,4095,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70055,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
70056,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
70057,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
70058,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9


In [20]:
train_df.to_csv('train_df.csv', index=False)

In [None]:
data = pd.read_csv('Mashtots/train_df.csv')

# Разделение данных на признаки и метки
X = data.drop('label', axis=1)
y = data['label']


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# DecisionTreeClassifier

In [None]:
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_val)


In [None]:
# Оценка точности
accuracy = accuracy_score(y_val, y_pred)
print(f'Accuracy: {accuracy}')

In [None]:
test_data = pd.read_csv('Mashtots/new_test.csv')

In [None]:
test_data.head()

In [None]:
X_test = test_data
# Прогнозирование на тестовом наборе
y_pred_test = model.predict(X_test)

In [None]:
y_pred_test

In [None]:
y_pred_test = model.predict(X_test)
submission_df = pd.DataFrame({'Id': np.arange(1, X_test.shape[0]+1),
                              'Category': y_pred_test})
submission_df.to_csv('my_submission.csv', index=False)

In [None]:
submission_df

# RandomForestClassifier

In [None]:
# Создание и обучение RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred_val = model.predict(X_val)

accuracy_val = accuracy_score(y_val, y_pred_val)
print(f'Accuracy on validation set: {accuracy_val}')

In [None]:
y_pred_test2 = model.predict(X_test)
y_pred_test2

In [None]:
y_pred_test2 = model.predict(X_test)
submission_df = pd.DataFrame({'Id': np.arange(1, X_test.shape[0]+1),
                              'Category': y_pred_test2})
submission_df.to_csv('my_submission1.csv', index=False)

In [None]:
submission_df

# GaussianNB

In [None]:
model = GaussianNB()
model.fit(X_train, y_train)

In [None]:
y_pred_val = model.predict(X_val)

accuracy_val = accuracy_score(y_val, y_pred_val)
print(f'Accuracy on validation set: {accuracy_val}')

In [None]:
y_pred_test3 = model.predict(X_test)
y_pred_test3

In [None]:
y_pred_test3 = model.predict(X_test)
submission_df = pd.DataFrame({'Id': np.arange(1, X_test.shape[0]+1),
                              'Category': y_pred_test3})
submission_df.to_csv('my_submission1.csv', index=False)

In [None]:
submission_df

# CNN


In [30]:
pip install tensorflow==2.6.0

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement tensorflow==2.6.0 (from versions: 2.12.0rc0, 2.12.0rc1, 2.12.0, 2.12.1, 2.13.0rc0, 2.13.0rc1, 2.13.0rc2, 2.13.0, 2.13.1, 2.14.0rc0, 2.14.0rc1, 2.14.0, 2.14.1, 2.15.0rc0, 2.15.0rc1, 2.15.0)
ERROR: No matching distribution found for tensorflow==2.6.0


Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/93/21/9b035a4f823d6aee2917c75415be9a95861ff3d73a0a65e48edbf210cec1/tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata (3.6 kB)
Collecting tensorflow-intel==2.15.0 (from tensorflow)
  Obtaining dependency information for tensorflow-intel==2.15.0 from https://files.pythonhosted.org/packages/4c/48/1a5a15517f18eaa4ff8d598b1c000300b20c1bb0e624539d702117a0c369/tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata (5.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.15.0->tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/01/e4/dc0a1dcc4e74e08d7abedab278c795eef54a224363bb18f5692f416d834f/absl_py-2.0.0-py3-none-any.whl.metadata
  Using cached absl_py-2.0.0-py3-none-

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, models, applications, callbacks

In [3]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)


NameError: name 'LabelEncoder' is not defined

In [None]:
# Разделение данных на тренировочный и валидационный наборы
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Преобразование данных в формат, подходящий для сверточной нейронной сети
X_train = X_train.values.reshape(-1, 64, 64, 1)
X_val = X_val.values.reshape(-1, 64, 64, 1)

In [None]:
# Создание модели ResNet
base_model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(64, 64, 3))
model = models.Sequential()
model.add(layers.UpSampling2D((3, 3)))
model.add(layers.UpSampling2D((3, 3)))
model.add(layers.UpSampling2D((3, 3)))
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

# Компиляция модели
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Обратные вызовы для сохранения модели и остановки обучения при необходимости
callbacks_list = [
    callbacks.ModelCheckpoint('model.h5', save_best_only=True, monitor='val_loss', mode='min'),
    callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min')
]


In [None]:
# Обучение модели
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val), callbacks=callbacks_list)

In [None]:
# Визуализация процесса обучения
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()