# diabetes_regression (DT, RF, SVR, Linear)

## import Libraries

In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

## Data Preparation & Scaling

In [41]:
df = pd.read_csv("https://github.com/MyungKyuYi/AI-class/raw/main/diabetes.csv")
print(df.head())
print("\n=====================================================\n")

df = df.drop(columns=['Outcome']) # Outcome 컬럼 삭제

# 결측치 확인
print(df.isnull().sum())
print("\n=====================================================\n")

X = df.drop(['BMI'], axis=1).values # Feature
Y = df['BMI'].values # 예측해야 할 컬럼

# 학습 데이터 & 테스트 데이터 분할
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=15)

# 분할된 데이터의 shape을 출력
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
print("\n=====================================================\n")

# 데이터 표준화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
dtype: int64


(537, 7)
(537,)
(231, 7)
(231,)




## Train & Test & Report

In [44]:
LinearModel = LinearRegression()
LinearModel.fit(X_train, Y_train)
LinearPreds = LinearModel.predict(X_test)
print('LinearModel 평균제곱근오차', mean_squared_error(LinearPreds, Y_test))
print("\n=====================================================\n")

# Decision Tree (DT)
DTModel = DecisionTreeRegressor()
DTModel.fit(X_train, Y_train)
DTPreds = DTModel.predict(X_test)
print('DecisionTreeModel 평균제곱근오차', mean_squared_error(DTPreds, Y_test))
print("\n=====================================================\n")

# Random Forest (RF)
RFModel = RandomForestRegressor()
RFModel.fit(X_train, Y_train)
RFPreds = RFModel.predict(X_test)
print('RandomForestModel 평균제곱근오차', mean_squared_error(RFPreds, Y_test))
print("\n=====================================================\n")

# Support Vector Regression (SVR)
SVRModel = SVR()
SVRModel.fit(X_train, Y_train)
SVRPreds = SVRModel.predict(X_test)
print('SVRModel 평균제곱근오차', mean_squared_error(SVRPreds, Y_test))
print("\n=====================================================\n")

LinearModel 평균제곱근오차 55.75902522987775


DecisionTreeModel 평균제곱근오차 96.89865800865799


RandomForestModel 평균제곱근오차 56.37892907792207


SVRModel 평균제곱근오차 53.98275588968201


