In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from catboost import CatBoostClassifier

In [3]:
# Загрузка объединённого датасета
df = pd.read_csv('combined_data_all.csv')  # Замените на фактический путь к вашему объединённому CSV файлу

In [4]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,206,439,275,860,2306,2934,2619,3185,1551,762,0
1,222,457,343,860,2306,2934,2834,3185,1551,762,0
2,287,582,348,990,2232,2651,3707,3260,1423,691,0
3,306,576,356,990,2232,2651,3528,3260,1423,691,0
4,179,346,288,581,1978,2199,1867,2026,889,487,0
...,...,...,...,...,...,...,...,...,...,...,...
269041407,614,855,885,792,306,301,249,155,46,51,1
269041408,583,817,852,767,330,321,264,194,63,51,1
269041409,570,800,840,767,330,321,277,194,63,51,1
269041410,592,830,877,780,310,305,262,169,49,57,1


In [5]:
# Вычисление дополнительных признаков
df['NDWI'] = (df.iloc[:, 1] - df.iloc[:, 6]) / (df.iloc[:, 1] + df.iloc[:, 6])  # green - nir
df['NDMI'] = (df.iloc[:, 6] - df.iloc[:, 8]) / (df.iloc[:, 6] + df.iloc[:, 8])  # nir - mir
df['MNDWI'] = (df.iloc[:, 1] - df.iloc[:, 8]) / (df.iloc[:, 1] + df.iloc[:, 8])  # green - mir
df['WRI'] = (df.iloc[:, 1] + df.iloc[:, 2]) / (df.iloc[:, 6] + df.iloc[:, 8])  # green + red / nir + mir
df['NDVI'] = (df.iloc[:, 6] - df.iloc[:, 2]) / (df.iloc[:, 6] + df.iloc[:, 2])  # nir - red / nir + red
df['AWEI'] = 4 * (df.iloc[:, 1] - df.iloc[:, 8]) - (0.25 * df.iloc[:, 6] + 2.75 * df.iloc[:, 9])  # 4*(green - mir) - (0.25 * nir + 2.75 * swir)

In [6]:
# Определение признаков и целевой переменной
X = df.iloc[:, :10].join(df[['NDWI', 'NDMI', 'MNDWI', 'WRI', 'NDVI', 'AWEI']])  # Признаки
y = df.iloc[:, 10]  # Целевая переменная (проверьте индекс целевой переменной)

In [9]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,NDWI,NDMI,MNDWI,WRI,NDVI,AWEI
0,206,439,275,860,2306,2934,2619,3185,1551,762,-0.712884,0.256115,-0.558794,0.171223,0.809952,-7198.25
1,222,457,343,860,2306,2934,2834,3185,1551,762,-0.722273,0.292588,-0.544821,0.182440,0.784073,-7180.00
2,287,582,348,990,2232,2651,3707,3260,1423,691,-0.728608,0.445224,-0.419451,0.181287,0.828360,-6191.00
3,306,576,356,990,2232,2651,3528,3260,1423,691,-0.719298,0.425167,-0.423712,0.188245,0.816684,-6170.25
4,179,346,288,581,1978,2199,1867,2026,889,487,-0.687302,0.354862,-0.439676,0.230044,0.732715,-3978.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269041407,614,855,885,792,306,301,249,155,46,51,0.548913,0.688136,0.897891,5.898305,-0.560847,3033.50
269041408,583,817,852,767,330,321,264,194,63,51,0.511563,0.614679,0.856818,5.103976,-0.526882,2809.75
269041409,570,800,840,767,330,321,277,194,63,51,0.485608,0.629412,0.853998,4.823529,-0.504029,2738.50
269041410,592,830,877,780,310,305,262,169,49,57,0.520147,0.684887,0.888510,5.488746,-0.539947,2901.75


In [11]:
df = None

In [12]:
# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

IOStream.flush timed out


In [13]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,NDWI,NDMI,MNDWI,WRI,NDVI,AWEI
223544635,532,645,865,990,1119,1215,1406,1355,2398,2094,-0.371039,-0.260778,-0.576076,0.396951,0.238221,-13122.00
217507415,651,696,822,1057,1082,1136,1164,1437,2126,1783,-0.251613,-0.292401,-0.506733,0.461398,0.172205,-10914.25
173381895,384,542,444,559,389,374,337,331,305,240,0.233220,0.049844,0.279811,1.535826,-0.137004,203.75
214669573,628,786,783,1187,1935,2146,2210,2305,2278,1645,-0.475300,-0.015152,-0.486945,0.349599,0.476779,-11044.25
249846069,736,863,1040,1199,1327,1431,1578,1560,2646,1936,-0.292913,-0.252841,-0.508122,0.450521,0.205500,-12850.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133120466,289,501,258,710,1947,2150,2602,2428,1136,530,-0.677087,0.392188,-0.387905,0.203050,0.819580,-4648.00
133224038,214,492,201,729,2351,2704,2868,2799,1181,531,-0.707143,0.416646,-0.411835,0.171153,0.869013,-4933.25
127521863,174,336,164,541,1534,1797,1882,1933,919,438,-0.697024,0.343806,-0.464542,0.178508,0.839687,-4007.00
250975502,676,786,962,1151,1274,1368,1426,1483,2673,2062,-0.289331,-0.304221,-0.545533,0.426445,0.194305,-13575.00


In [14]:
# Обучение модели CatBoost
model = CatBoostClassifier(iterations=200, learning_rate=0.1, depth=10, verbose=0)
model.fit(X_train, y_train)

<catboost.core.CatBoostClassifier at 0x7f4f00c65780>

In [15]:
# Предсказание на тестовом наборе
y_pred = model.predict(X_test)

In [16]:
# Оценка производительности модели
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[51869369   114055]
 [  238155  1586704]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00  51983424
           1       0.93      0.87      0.90   1824859

    accuracy                           0.99  53808283
   macro avg       0.96      0.93      0.95  53808283
weighted avg       0.99      0.99      0.99  53808283



In [17]:
# Сохранение модели в файл
model.save_model("catboost_model.cbm")