In [None]:
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import make_column_transformer
from sklearn.metrics import r2_score, mean_squared_error

import pandas as pd 
import numpy as np 
import joblib


red_wine_df = pd.read_csv('data/Red.csv')

X = red_wine_df[['Country', 'Region', 'Price']]
y = red_wine_df['Rating']

ct = make_column_transformer(
    (OrdinalEncoder(), ['Region']),
    (StandardScaler(), ['Price']), 
    (OneHotEncoder(), ['Country'])
)

print(ct)

pipeline_wine = Pipeline(
    [('ct', ct), ('rfr', RandomForestRegressor(random_state=42))]
    )

def rmse(y_hat, y):
    return mean_squared_error(y_hat, y, squared = False)

wine_test = pd.read_csv('data/Red_test.csv')

x_test = wine_test[['Country', 'Region', 'Price']]
y_test = wine_test['Rating']

y_pred = pipeline_wine.predict(x_test)

print('Метрика RMSE для тестового набора данных составляет: {}'.format(
    round(rmse(y_pred, y_test), 4))
      )

pipeline_wine.set_params(rfr__n_estimators = 200)

y_pred_n_estim_200 = pipeline_wine.predict(x_test)

print('Метрика RMSE для тестового набора данных составляет: {}'.format(
    round(rmse(y_pred_n_estim_200, y_test), 4))
      )
