In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix


In [3]:
# Загрузка объединённого датасета
df = pd.read_csv('combined_data_all.csv')  # Замените на фактический путь к вашему объединённому CSV файлу


In [4]:
# Вычисление дополнительных признаков
df['NDWI'] = (df.iloc[:, 1] - df.iloc[:, 6]) / (df.iloc[:, 1] + df.iloc[:, 6])  # green - nir
df['NDMI'] = (df.iloc[:, 6] - df.iloc[:, 8]) / (df.iloc[:, 6] + df.iloc[:, 8])  # nir - mir
df['MNDWI'] = (df.iloc[:, 1] - df.iloc[:, 8]) / (df.iloc[:, 1] + df.iloc[:, 8])  # green - mir
df['WRI'] = (df.iloc[:, 1] + df.iloc[:, 2]) / (df.iloc[:, 6] + df.iloc[:, 8])  # green + red / nir + mir
df['NDVI'] = (df.iloc[:, 6] - df.iloc[:, 2]) / (df.iloc[:, 6] + df.iloc[:, 2])  # nir - red / nir + red
df['AWEI'] = 4 * (df.iloc[:, 1] - df.iloc[:, 8]) - (0.25 * df.iloc[:, 6] + 2.75 * df.iloc[:, 9])  # 4*(green - mir) - (0.25 * nir + 2.75 * swir)


In [5]:
# Определение признаков и целевой переменной
X = df[['NDWI', 'NDMI', 'MNDWI', 'WRI', 'NDVI', 'AWEI']]  # Признаки
y = df.iloc[:, 10]  # Целевая переменная (проверьте индекс целевой переменной)


In [6]:
# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Сохраняем только каждую 500-ю строку
df_filtered = df.iloc[::500, :]

# Сохраняем результат в новый файл
output_path = "filtered_dataset.csv"
df_filtered.to_csv(output_path, index=False)

In [8]:
df_filtered

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,NDWI,NDMI,MNDWI,WRI,NDVI,AWEI
0,206,439,275,860,2306,2934,2619,3185,1551,762,0,-0.712884,0.256115,-0.558794,0.171223,0.809952,-7198.25
500,216,390,326,672,1526,1774,2144,2075,1218,576,0,-0.692186,0.275431,-0.514925,0.212968,0.736032,-5432.00
1000,170,377,266,732,1807,2122,2146,2391,1287,642,0,-0.701149,0.250218,-0.546875,0.187300,0.779436,-5942.00
1500,247,558,414,1082,2273,2564,2601,2925,2124,1084,0,-0.646724,0.100952,-0.583893,0.205714,0.725373,-9895.25
2000,194,496,378,905,1896,2204,2602,2571,1739,828,0,-0.679793,0.198802,-0.556152,0.201336,0.746309,-7899.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269039000,632,835,1114,1219,1015,1007,1256,1062,729,505,1,-0.201339,0.265491,0.067775,0.981864,0.059916,-1278.75
269039500,626,944,1078,992,427,375,323,214,33,33,1,0.490134,0.814607,0.932446,5.679775,-0.538901,3472.50
269040000,646,968,1106,1001,437,408,344,203,49,35,1,0.475610,0.750636,0.903638,5.277354,-0.525517,3493.75
269040500,605,904,1036,922,394,353,287,192,33,29,1,0.518052,0.793750,0.929562,6.062500,-0.566138,3332.50


In [None]:
# Обучение модели логистической регрессии
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train, y_train)

In [None]:
# Предсказание на тестовом наборе
y_pred = logistic_model.predict(X_test)

In [None]:
# Оценка производительности модели
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[51882976   100448]
 [  425270  1399589]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99  51983424
           1       0.93      0.77      0.84   1824859

    accuracy                           0.99  53808283
   macro avg       0.96      0.88      0.92  53808283
weighted avg       0.99      0.99      0.99  53808283



In [11]:
# Сохранение модели в файл (если требуется)
import joblib
joblib.dump(logistic_model, "logistic_regression_model.joblib")

['logistic_regression_model.joblib']