In [124]:
#데이터 로드
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

file_path = "./wine.csv"
df = pd.read_csv(file_path)

In [126]:
#결측치 확인
missing_values = df.isnull().sum()
print(missing_values)

Wine                    0
Alcohol                 0
Malic.acid              0
Ash                     0
Acl                     0
Mg                      0
Phenols                 0
Flavanoids              0
Nonflavanoid.phenols    0
Proanth                 0
Color.int               0
Hue                     0
OD                      0
Proline                 0
dtype: int64


In [128]:
import numpy as np
from sklearn.model_selection import train_test_split

import seaborn as sns

In [130]:
# 특성(X)과 타겟(y) 데이터 정의
X = df.drop(columns=['Wine'], axis = 1) 
y = df['Wine']  

In [132]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [134]:
import seaborn as sns
import matplotlib.pyplot as plt

In [136]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# 데이터 스케일링
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

# 모델 학습
model = RandomForestClassifier(max_depth=5, random_state=42)
model.fit(X_train_scaled, y_train)

# 예측
y_pred = model.predict(X_test_scaled)

# 정확도 평가
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

#하이퍼파라미터 튜닝
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': [100, 200, 300, 400, 500], 'max_depth': [5, 10, 15, 20],'min_samples_split': [2, 5, 10]}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring="accuracy")
grid_search.fit(X_train_scaled, y_train)

print("최적 하이퍼파라미터:", grid_search.best_params_)
print("최고 정확도:", grid_search.best_score_)

# 혼동 행렬 출력
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 1.0000
최적 하이퍼파라미터: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 100}
최고 정확도: 0.9785714285714286
Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]


In [137]:
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

model = DecisionTreeClassifier(max_depth = 3, random_state=42)

# X_train을 DataFrame으로 유지
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

# 모델 학습
model = DecisionTreeClassifier(max_depth=5, random_state=42)
model.fit(X_train_scaled, y_train)

# 예측
y_pred = model.predict(X_test_scaled)

# 정확도 평가
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

#하이퍼파라미터 튜닝
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

param_dist = {'max_depth': randint(3, 10), 'min_samples_split': randint(2, 10)}

random_search = RandomizedSearchCV(model, param_dist, n_iter=10, cv=5, scoring="accuracy")
random_search.fit(X_train_scaled, y_train)

print("최적 하이퍼파라미터:", random_search.best_params_)
print("최고 정확도:", random_search.best_score_)

# 혼동 행렬 출력
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.9444
최적 하이퍼파라미터: {'max_depth': 7, 'min_samples_split': 7}
최고 정확도: 0.9224137931034484
Confusion Matrix:
[[13  1  0]
 [ 0 14  0]
 [ 1  0  7]]


In [138]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd

# StandardScaler 객체 정의
scaler = StandardScaler()

# X_train을 DataFrame으로 유지
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

# SVC 모델 학습
model = SVC(random_state=42)
model.fit(X_train_scaled, y_train)

# 예측
y_pred = model.predict(X_test_scaled)

# 정확도 평가
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

param_grid = {'C': [0.001, 0.01, 0.1, 1, 10], 'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['scale', 'auto']}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring="accuracy")
grid_search.fit(X_train_scaled, y_train)

print("최적 하이퍼파라미터:", grid_search.best_params_)
print("최고 정확도:", grid_search.best_score_)


# 혼동 행렬 출력
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 1.0000
최적 하이퍼파라미터: {'C': 0.01, 'gamma': 'scale', 'kernel': 'linear'}
최고 정확도: 0.9857142857142858
Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]


In [146]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

# 데이터 스케일링
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 로지스틱 회귀 모델 학습 (max_iter 늘리고, solver 변경)
model = LogisticRegression(solver='liblinear', max_iter=5000, random_state=42)
model.fit(X_train_scaled, y_train)

# 예측
y_pred = model.predict(X_test_scaled)

# 정확도 평가
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# 하이퍼파라미터 튜닝: 'solver'와 'penalty' 조건을 맞추기 위해 필터링
from sklearn.model_selection import GridSearchCV
param_grid = [{'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100], 'penalty': ['l1'], 'solver': ['liblinear']}, {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100], 'penalty': ['l2'], 'solver': ['lbfgs']}]

# GridSearchCV로 최적화
grid_search = GridSearchCV(model, param_grid, cv=5, scoring="accuracy")
grid_search.fit(X_train_scaled, y_train)

print("최적 하이퍼파라미터:", grid_search.best_params_)
print("최고 정확도:", grid_search.best_score_)

# 혼동 행렬 출력
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 1.0000
최적 하이퍼파라미터: {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}
최고 정확도: 0.9928571428571429
Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
