# Импорт библиотек

In [18]:
# Импорт библиотек и метрик

import pandas as pd
from sklearn.model_selection import train_test_split #  функция разбиения на тренировочную и тестовую выборку
from sklearn.preprocessing import MinMaxScaler # Объект Нормализации от Scikit-learn
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import ShuffleSplit # при кросс-валидации случайно перемешиваем данные

# Метрики
from sklearn.metrics import accuracy_score  as acc
from sklearn.metrics import f1_score as f1
from sklearn.metrics import precision_score as pre
from sklearn.metrics import recall_score as re
import warnings
warnings.filterwarnings("ignore")

# Загрузка данных

In [6]:
train = pd.read_csv('/content/df_train.csv', delimiter = ',')
test = pd.read_csv('/content/df_test.csv', delimiter = ',')

# Делим на тренировочную и тестовую

In [9]:
# не забываем удалить целевую переменную из признаков
X_train,y_train = train.drop(columns = ['quality']).values,train['quality'].values
X_test,y_test = test.drop(columns = ['quality']).values,test['quality'].values

# нормализуем
scaler  = MinMaxScaler()
scaler.fit(X_train) # для тренировочных сначала "обучаем"
X_train = scaler.transform(X_train) # потом преобразуем  transform
X_test  = scaler.transform(X_test)  # для тестовых - просто transform

# Обучаем модель

In [20]:
# создаем объект класса с указанием гиперпараметров
model = RandomForestClassifier()

# обучаем на тренировочных данных
model.fit(X_train, y_train)

# предсказываем на тестовых данных
y_predict=model.predict(X_test)

# выбираем метрики которые будем оценивать
# в виде словаря
scoring = {'acc': 'accuracy',
           'f1': 'f1_macro',
           'pre': 'precision_macro',
           're': 'recall_macro'}

# используем функцию кросс-валидации

scores = cross_validate(model, # какую модель тестируем
                        X_train, y_train, # на каких данных
                      scoring=scoring,  #на каких метриках
                      # как разбиваем данные при кросс-валидации
                      cv=ShuffleSplit(n_splits=5, random_state=42))


print('Результаты Кросс-валидации')
DF_cv_RFC = pd.DataFrame(scores)
display(DF_cv_RFC)
print('\n')
print(DF_cv_RFC.mean()[2:])

print('Accuracy: %.1f' % acc(y_test ,y_predict))
print('F1-score: %.1f' % f1(y_test, y_predict, average='macro'))
print('Precision : %.4f' %  pre(y_test, y_predict, average='macro'))

Результаты Кросс-валидации


Unnamed: 0,fit_time,score_time,test_acc,test_f1,test_pre,test_re
0,0.507066,0.012238,0.709091,0.417182,0.414286,0.43357
1,0.319886,0.011973,0.663636,0.528227,0.601047,0.495173
2,0.325124,0.012356,0.736364,0.358956,0.350929,0.368265
3,0.321005,0.011983,0.754545,0.561933,0.554735,0.581667
4,0.333187,0.018275,0.7,0.451768,0.508333,0.426379




test_acc    0.712727
test_f1     0.463613
test_pre    0.485866
test_re     0.461011
dtype: float64
Accuracy: 0.6
F1-score: 0.3
Precision : 0.2592


In [29]:
import datetime
now = datetime.datetime.now()
print(now)

2023-11-02 12:26:53.696405


In [32]:
df_scores = pd.DataFrame([[now, acc(y_test,y_predict), f1(y_test,y_predict, average='macro'), pre(y_test,y_predict, average='macro')]])

In [33]:
df_scores.to_csv('scores.csv', index=False)

In [35]:
df_scores.columns = ['time', 'Accuracy', 'F1', 'Precision']
df_scores

Unnamed: 0,time,Accuracy,F1,Precision
0,2023-11-02 12:26:53.696405,0.583166,0.266598,0.259225


In [36]:
from sklearn.linear_model import LogisticRegression
# создаем объект класса с указанием гиперпараметров
model = LogisticRegression()

# обучаем на тренировочных данных
model.fit(X_train, y_train)

# предсказываем на тестовых данных
y_predict=model.predict(X_test)

# выбираем метрики которые будем оценивать
# в виде словаря
scoring = {'acc': 'accuracy',
           'f1': 'f1_macro',
           'pre': 'precision_macro',
           're': 'recall_macro'}

# используем функцию кросс-валидации

scores = cross_validate(model, # какую модель тестируем
                        X_train, y_train, # на каких данных
                      scoring=scoring,  #на каких метриках
                      # как разбиваем данные при кросс-валидации
                      cv=ShuffleSplit(n_splits=5, random_state=42))


print('Результаты Кросс-валидации')
DF_cv_RFC = pd.DataFrame(scores)
display(DF_cv_RFC)
print('\n')
print(DF_cv_RFC.mean()[2:])

print('Accuracy: %.1f' % acc(y_test ,y_predict))
print('F1-score: %.1f' % f1(y_test, y_predict, average='macro'))
print('Precision : %.4f' %  pre(y_test, y_predict, average='macro'))

Результаты Кросс-валидации


Unnamed: 0,fit_time,score_time,test_acc,test_f1,test_pre,test_re
0,0.044254,0.004242,0.545455,0.281943,0.294275,0.290719
1,0.044345,0.004101,0.554545,0.291438,0.303022,0.295324
2,0.039161,0.004167,0.572727,0.248007,0.251882,0.249764
3,0.039657,0.004096,0.581818,0.416408,0.431818,0.433333
4,0.043508,0.004639,0.563636,0.274668,0.283088,0.275544




test_acc    0.563636
test_f1     0.302493
test_pre    0.312817
test_re     0.308937
dtype: float64
Accuracy: 0.6
F1-score: 0.3
Precision : 0.2780


In [37]:
df_new_scores = pd.DataFrame([[now, acc(y_test,y_predict), f1(y_test,y_predict, average='macro'), pre(y_test,y_predict, average='macro')]])

In [39]:
df_new_scores.columns = ['time', 'Accuracy', 'F1', 'Precision']
df_new_scores

Unnamed: 0,time,Accuracy,F1,Precision
0,2023-11-02 12:26:53.696405,0.621242,0.279093,0.277989


In [40]:
df_scores = df_scores.append(df_new_scores, ignore_index=True)

In [41]:
df_scores

Unnamed: 0,time,Accuracy,F1,Precision
0,2023-11-02 12:26:53.696405,0.583166,0.266598,0.259225
1,2023-11-02 12:26:53.696405,0.621242,0.279093,0.277989


In [42]:
df_scores.to_csv('scores.csv', index=False)