In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# 1. 데이터 불러오기 (세미콜론 구분자)
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/winequality-red.csv", sep=";")

# 2. 입력(X)과 타겟(y) 분리
X = df.drop(columns=["quality"])
y = df["quality"]

# 3. 선형회귀 모델 학습
model = LinearRegression()
model.fit(X, y)

# 4. 기울기(가중치)와 절편 출력
print("절편 (intercept):", model.intercept_)
print("기울기 (coefficients):")
for feature, coef in zip(X.columns, model.coef_):
    print(f"  {feature}: {coef}")


절편 (intercept): 21.96520844944863
기울기 (coefficients):
  fixed acidity: 0.024990552671674163
  volatile acidity: -1.0835902586934347
  citric acid: -0.1825639484107161
  residual sugar: 0.01633126976547504
  chlorides: -1.8742251580992022
  free sulfur dioxide: 0.0043613333090966044
  total sulfur dioxide: -0.00326457970306826
  density: -17.88116383249607
  pH: -0.4136531438217573
  sulphates: 0.9163344127211345
  alcohol: 0.2761976992268876


In [3]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# 1. 데이터 불러오기 (세미콜론 구분자)
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/winequality-white.csv", sep=";")

# 2. 입력(X)과 타겟(y) 분리
X = df.drop(columns=["quality"])
y = df["quality"]

# 3. 선형회귀 모델 학습
model = LinearRegression()
model.fit(X, y)

# 4. 기울기(가중치)와 절편 출력
print("절편 (intercept):", model.intercept_)
print("기울기 (coefficients):")
for feature, coef in zip(X.columns, model.coef_):
    print(f"  {feature}: {coef}")

절편 (intercept): 150.19284248121528
기울기 (coefficients):
  fixed acidity: 0.06551996135475305
  volatile acidity: -1.8631770921609137
  citric acid: 0.022090200679816308
  residual sugar: 0.08148280263769919
  chlorides: -0.24727653669074925
  free sulfur dioxide: 0.00373276519233684
  total sulfur dioxide: -0.00028574741871545406
  density: -150.28418060049725
  pH: 0.6863437418226751
  sulphates: 0.6314764727092783
  alcohol: 0.1934756972048691


In [8]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/winequality-white.csv", sep=";")
print(df.columns)


Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score

# -----------------------------
# 1) 데이터 준비
# -----------------------------
cols = ['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality']



df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/winequality-white.csv", sep=";").dropna()

X = df.drop(columns=["quality"])
y = df["quality"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# -----------------------------
# 2) 모델 구성
# -----------------------------
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)
lr_clf = LogisticRegression(max_iter=500)

# 선형회귀 모델 추가
lr_reg = LinearRegression()

# -----------------------------
# 3) 모델 학습
# -----------------------------
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr_clf.fit(X_train, y_train)

# 선형회귀 모델 학습
lr_reg.fit(X_train, y_train)

# -----------------------------
# 4) 모델 평가
# -----------------------------
dt_acc = accuracy_score(y_test, dt.predict(X_test))
rf_acc = accuracy_score(y_test, rf.predict(X_test))
lr_acc = accuracy_score(y_test, lr_clf.predict(X_test))

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")

# -----------------------------
# 5) 선형회귀 결과 (기울기와 절편)
# -----------------------------
print("\n=== Linear Regression Coefficients ===")
print("절편 (intercept):", lr_reg.intercept_)
print("기울기 (coefficients):")
for feature, coef in zip(X.columns, lr_reg.coef_):
    print(f"  {feature}: {coef}")


=== Test Accuracy ===
Decision Tree : 0.5908
Random Forest : 0.6776
Logistic Reg. : 0.5173

=== Linear Regression Coefficients ===
절편 (intercept): 142.58862718711305
기울기 (coefficients):
  fixed acidity: 0.056318456899041504
  volatile acidity: -1.9131972360427856
  citric acid: 0.11761102560503686
  residual sugar: 0.07904465338475164
  chlorides: -0.5979029845788384
  free sulfur dioxide: 0.0033563138099406697
  total sulfur dioxide: -3.599701143186529e-05
  density: -142.67857070588474
  pH: 0.6849509639905399
  sulphates: 0.6019287009639587
  alcohol: 0.20480309660133825


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
