In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline

# 1. 데이터 로드
data = pd.read_csv('../data/Clean_White_data.csv')

# 2. 데이터 전처리
# 필요한 컬럼 선택
features = data[['body', 'texture', 'sweetness', 'flavor1', 'flavor2', 'flavor3']]
target = data['wine_rating']

# 범주형 데이터 전처리 및 전처리 파이프라인 생성
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', sparse_output=False), ['flavor1', 'flavor2', 'flavor3'])
    ],
    remainder='passthrough'
)

# 파이프라인 정의
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler()),  # 스케일링
    ('model', GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42))
])

# 3. 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# 4. 모델 학습
pipeline.fit(X_train, y_train)

# 5. 평가
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# 6. 예측 함수
def predict_wine_score(body, texture, sweetness, flavor1, flavor2, flavor3):
    # 입력 데이터 전처리
    input_data = pd.DataFrame([[body, texture, sweetness, flavor1, flavor2, flavor3]],
                              columns=['body', 'texture', 'sweetness', 'flavor1', 'flavor2', 'flavor3'])
    
    # 예측
    score = pipeline.predict(input_data)
    return score[0]

# 임의의 데이터로 예측
example_score = predict_wine_score(85, 40, 55, 'oaky', 'tree fruit', 'citrus')
print(f'Predicted Wine Score: {example_score}')


Mean Squared Error: 0.040233572108812954
R^2 Score: 0.407309520322619
Predicted Wine Score: 3.855766531105602
