In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

data = pd.read_csv('../voice.csv')

X = data.iloc[:, :-1]  # Все столбцы, кроме последнего
y = data['label']      # Последний столбец

y = y.map({'male': 0, 'female': 1})

num_experiments = 5
n_e = [16, 17, 18, 19, 20]
l_r = [0.1, 0.1, 0.1, 0.1, 0.1]
m_d = [3, 3, 3, 4, 5]

# хранения результатов метрик для подсчёта среднего
accuracy_results = []
precision_results = []
recall_results = []
f1_results = []

def evaluate(X, y, n_e=20, l_r=0.1, m_d=5):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    clf = RandomForestClassifier(n_estimators=n_e, max_depth=m_d)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    # оценка модели
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    return accuracy, precision, recall, f1

idx = 0
for i in range(num_experiments):
    idx += 1
    print("experiment #", idx)
    print("n_e", n_e[i])
    print("l_r", l_r[i])
    print("m_d", m_d[i])
    accuracy, precision, recall, f1 = evaluate(X = X, y = y, n_e=n_e[i], l_r=l_r[i], m_d=m_d[i])

    accuracy_results.append(accuracy)
    precision_results.append(precision)
    recall_results.append(recall)
    f1_results.append(f1)
    print("###############################\n")

# вычисление средних значений
average_accuracy = sum(accuracy_results) / num_experiments
average_precision = sum(precision_results) / num_experiments
average_recall = sum(recall_results) / num_experiments
average_f1 = sum(f1_results) / num_experiments

# средние значения
print(f'Средняя точность (Accuracy) по {num_experiments} экспериментам: {average_accuracy:.2f}')
print(f'Средняя точность (Precision) по {num_experiments} экспериментам: {average_precision:.2f}')
print(f'Средняя полнота (Recall) по {num_experiments} экспериментам: {average_recall:.2f}')
print(f'Средняя F1-мера по {num_experiments} экспериментам: {average_f1:.2f}')

experiment # 1
n_e 16
l_r 0.1
m_d 3
###############################

experiment # 2
n_e 17
l_r 0.1
m_d 3
###############################

experiment # 3
n_e 18
l_r 0.1
m_d 3
###############################

experiment # 4
n_e 19
l_r 0.1
m_d 4
###############################

experiment # 5
n_e 20
l_r 0.1
m_d 5
###############################

Средняя точность (Accuracy) по 5 экспериментам: 0.98
Средняя точность (Precision) по 5 экспериментам: 0.98
Средняя полнота (Recall) по 5 экспериментам: 0.98
Средняя F1-мера по 5 экспериментам: 0.98


In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.datasets import make_classification  # Импортируем make_classification

# Генерация синтетических данных
X, y = make_classification(n_samples=3000, n_features=20, random_state=42)

num_experiments = 5
n_e = [16, 17, 18, 19, 20]
l_r = [0.1, 0.1, 0.1, 0.1, 0.1]
m_d = [3, 3, 3, 4, 5]

# хранения результатов метрик для подсчёта среднего
accuracy_results = []
precision_results = []
recall_results = []
f1_results = []

def evaluate(X, y, n_e=20, l_r=0.1, m_d=5):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    clf = RandomForestClassifier(n_estimators=n_e, max_depth=m_d)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    # оценка модели
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    return accuracy, precision, recall, f1

idx = 0
for i in range(num_experiments):
    idx += 1
    print("experiment #", idx)
    print("n_e", n_e[i])
    print("l_r", l_r[i])
    print("m_d", m_d[i])
    accuracy, precision, recall, f1 = evaluate(X = X, y = y, n_e=n_e[i], l_r=l_r[i], m_d=m_d[i])

    accuracy_results.append(accuracy)
    precision_results.append(precision)
    recall_results.append(recall)
    f1_results.append(f1)
    print("###############################\n")

# вычисление средних значений
average_accuracy = sum(accuracy_results) / num_experiments
average_precision = sum(precision_results) / num_experiments
average_recall = sum(recall_results) / num_experiments
average_f1 = sum(f1_results) / num_experiments

# средние значения
print(f'Средняя точность (Accuracy) по {num_experiments} экспериментам: {average_accuracy:.2f}')
print(f'Средняя точность (Precision) по {num_experiments} экспериментам: {average_precision:.2f}')
print(f'Средняя полнота (Recall) по {num_experiments} экспериментам: {average_recall:.2f}')
print(f'Средняя F1-мера по {num_experiments} экспериментам: {average_f1:.2f}')


experiment # 1
n_e 16
l_r 0.1
m_d 3
###############################

experiment # 2
n_e 17
l_r 0.1
m_d 3
###############################

experiment # 3
n_e 18
l_r 0.1
m_d 3
###############################

experiment # 4
n_e 19
l_r 0.1
m_d 4
###############################

experiment # 5
n_e 20
l_r 0.1
m_d 5
###############################

Средняя точность (Accuracy) по 5 экспериментам: 0.97
Средняя точность (Precision) по 5 экспериментам: 0.98
Средняя полнота (Recall) по 5 экспериментам: 0.97
Средняя F1-мера по 5 экспериментам: 0.97
