# 인공지능개론 1주차 과제

붓꽃 데이터를 입력받아 DT, LR, SVM, RP 학습하고 구하는 프로그램 작성

In [198]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

# 엑셀 데이터 로드 함수
def load_data(file_path):
    x = df.iloc[:, :-1]  # 마지막 열을 제외한 특성 데이터
    y = df.iloc[:, -1]   # 마지막 열(타겟 데이터)
    return x, y

file_path = 'iris.csv'  # 데이터 파일 경로
df = pd.read_csv(file_path)  # 엑셀 파일을 데이터프레임으로 읽기
df.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [200]:
x, y = load_data(file_path)  # 데이터 로드

In [202]:
# 데이터를 훈련 세트와 테스트 세트로 분할 (80% 훈련, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [204]:
# 데이터 정규화 (평균 0, 표준편차 1로 변환)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [224]:
# Decision Tree (의사결정나무 분류 모델)
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier()
dt_model.fit(X_train_scaled, y_train)  # 모델 학습

dt_pred = dt_model.predict(X_test_scaled)  # 테스트 데이터 예측
dt_acc = accuracy_score(y_test, dt_pred)  # 정확도 평가
print(f'Decision Tree Accuracy: {dt_acc:.4f}\n')
print(classification_report(y_test, dt_pred), '\n')
print(confusion_matrix(y_test, dt_pred))

Decision Tree Accuracy: 1.0000

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30
 

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [226]:
# Logistic Regression (로지스틱 회귀 분류 모델)
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train_scaled, y_train)
lr_pred = lr_model.predict(X_test_scaled)
lr_acc = accuracy_score(y_test, lr_pred)
print(f'Logistic Regression Accuracy: {lr_acc:.4f}\n')
print(classification_report(y_test, lr_pred), '\n')
print(confusion_matrix(y_test, lr_pred))

Logistic Regression Accuracy: 1.0000

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30
 

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [228]:
# SVM (서포트 벡터 머신 분류 모델)
from sklearn.svm import SVC

svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)
svm_pred = svm_model.predict(X_test_scaled)
svm_acc = accuracy_score(y_test, svm_pred)
print(f'SVM Accuracy: {svm_acc:.4f}\n')
print(classification_report(y_test, svm_pred), '\n')
print(confusion_matrix(y_test, svm_pred))

SVM Accuracy: 1.0000

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30
 

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [230]:
# Random Projection (무작위 투사를 이용한 차원 축소 후 분류)
from sklearn.random_projection import GaussianRandomProjection

rp_model = GaussianRandomProjection(n_components=4, random_state=42)
X_train_proj = rp_model.fit_transform(X_train_scaled)  # 훈련 데이터 차원 축소
X_test_proj = rp_model.transform(X_test_scaled)  # 테스트 데이터 차원 축소

# 차원 축소된 데이터로 로지스틱 회귀 분류기 학습
rp_classifier = LogisticRegression(max_iter=200)
rp_classifier.fit(X_train_proj, y_train)
rp_pred = rp_classifier.predict(X_test_proj)
rp_acc = accuracy_score(y_test, rp_pred)
print(f'Random Projection Accuracy: {rp_acc:.4f}\n')
print(classification_report(y_test, rp_pred),'\n')
print(confusion_matrix(y_test, rp_pred))

Random Projection Accuracy: 1.0000

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30
 

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
