In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler

In [8]:
data = pd.read_csv("data/Frogs_MFCCs.csv")
data.head()

Unnamed: 0,MFCCs_ 1,MFCCs_ 2,MFCCs_ 3,MFCCs_ 4,MFCCs_ 5,MFCCs_ 6,MFCCs_ 7,MFCCs_ 8,MFCCs_ 9,MFCCs_10,...,MFCCs_17,MFCCs_18,MFCCs_19,MFCCs_20,MFCCs_21,MFCCs_22,Family,Genus,Species,RecordID
0,1.0,0.152936,-0.105586,0.200722,0.317201,0.260764,0.100945,-0.150063,-0.171128,0.124676,...,-0.108351,-0.077623,-0.009568,0.057684,0.11868,0.014038,Leptodactylidae,Adenomera,AdenomeraAndre,1
1,1.0,0.171534,-0.098975,0.268425,0.338672,0.268353,0.060835,-0.222475,-0.207693,0.170883,...,-0.090974,-0.05651,-0.035303,0.02014,0.082263,0.029056,Leptodactylidae,Adenomera,AdenomeraAndre,1
2,1.0,0.152317,-0.082973,0.287128,0.276014,0.189867,0.008714,-0.242234,-0.219153,0.232538,...,-0.050691,-0.02359,-0.066722,-0.025083,0.099108,0.077162,Leptodactylidae,Adenomera,AdenomeraAndre,1
3,1.0,0.224392,0.118985,0.329432,0.372088,0.361005,0.015501,-0.194347,-0.098181,0.270375,...,-0.136009,-0.177037,-0.130498,-0.054766,-0.018691,0.023954,Leptodactylidae,Adenomera,AdenomeraAndre,1
4,1.0,0.087817,-0.068345,0.306967,0.330923,0.249144,0.006884,-0.265423,-0.1727,0.266434,...,-0.048885,-0.053074,-0.08855,-0.031346,0.10861,0.079244,Leptodactylidae,Adenomera,AdenomeraAndre,1


In [9]:
# Выбор признаков и целевой переменной
X = data.iloc[:, :22]  # первые 22 колонки — MFCCs
y = data['Species']     # или 'Family', 'Genus', 'RecordID' — уточнить!

# Разделение на train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Масштабирование для KNN и логистической регрессии
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 1. Случайный лес
rf = RandomForestClassifier(n_estimators=10, random_state=42)
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
f1_rf = f1_score(y_test, pred_rf, average='weighted')  # если мультикласс
print(f"F1 RandomForest: {f1_rf:.2f}")

F1 RandomForest: 0.97


In [12]:
# 2. Стекинг
estimators = [
    ('rf', RandomForestClassifier(n_estimators=10, random_state=31)),
    ('knn', KNeighborsClassifier(n_neighbors=11)),
    ('nb', GaussianNB())
]
stacking = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(random_state=42)
)
stacking.fit(X_train_scaled, y_train)
pred_stack = stacking.predict(X_test_scaled)
f1_stack = f1_score(y_test, pred_stack, average='weighted')
print(f"F1 Stacking: {f1_stack:.2f}")

F1 Stacking: 0.98
