In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# 1. 데이터 로드
data = pd.read_csv('../data/Clean_White_data.csv)

# 2. 데이터 전처리
# 필요한 컬럼 선택
features = data[['body', 'texture', 'sweetness', 'flavor1', 'flavor2', 'flavor3']]
target = data['wine_rating']

# 범주형 데이터 전처리
label_encoder = LabelEncoder()
for col in ['flavor1', 'flavor2', 'flavor3']:
    features[col] = label_encoder.fit_transform(features[col])

# 데이터 정규화
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# 3. 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# 4. 모델 정의 및 학습
model = SVR(kernel='rbf', C=100, epsilon=0.1)
model.fit(X_train, y_train)

# 5. 평가
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# 6. 예측 함수
def predict_wine_score(body, texture, sweetness, flavor1, flavor2, flavor3):
    # 입력 데이터 전처리
    input_data = pd.DataFrame([[body, texture, sweetness, flavor1, flavor2, flavor3]],
                              columns=['body', 'texture', 'sweetness', 'flavor1', 'flavor2', 'flavor3'])
    input_data_encoded = input_data.copy()
    for col in ['flavor1', 'flavor2', 'flavor3']:
        input_data_encoded[col] = label_encoder.transform(input_data_encoded[col])
    input_data_scaled = scaler.transform(input_data_encoded)
    
    # 예측
    score = model.predict(input_data_scaled)
    return score[0]

# 임의의 데이터로 예측
example_score = predict_wine_score(85, 40, 55, 'oaky', 'tree fruit', 'citrus')
print(f'Predicted Wine Score: {example_score}')


SyntaxError: unterminated string literal (detected at line 8) (681697167.py, line 8)