<a href="https://colab.research.google.com/github/KIM14957/STE2023/blob/main/ste_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

# 합성데이터 로딩


In [None]:
df=pd.read_csv('/content/drive/MyDrive/STE2.csv', sep=",")

# 데이터전처리

In [None]:
df.shape


In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.head()

# 상관관계 분석 

In [None]:
df.corr()

In [None]:
import matplotlib
import matplotlib.pyplot as plt

In [None]:
sns.heatmap(df.corr(), annot=True)
plt.title("STE FEATURE CORRATION MAP")

In [None]:
cols=['fat','armr','ct','conj','lossr']

# 산점도 행렬 시각화 


In [None]:
sns.pairplot(df[cols],height=2.5)

# 상관관계 행렬을 이용한 분석 


In [None]:
cm=np.corrcoef(df[cols].values.T)
sns.set(font_scale=1.5)
hm=sns.heatmap(cm,
               cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size':15}, yticklabels=cols, xticklabels=cols)
plt.tight_layout()
plt.show()

# boxplot 시각화 


In [None]:
sns.boxplot('dn','lossr', data=df)

# One Hot Encoding

In [None]:
from sklearn.preprocessing import OneHotEncoder

In [None]:
y=pd.get_dummies(df.dn, prefix='dh')
y.head()

In [None]:
df=pd.concat([df,y],axis=1)
df=df.drop(['dn'],axis=1)
df.head()

# 회귀 트리 


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor

In [None]:
y_target=df['lossr']
x_data=df.drop(['lossr'],axis=1, inplace=False)

In [None]:
rf=RandomForestRegressor(random_state=0,n_estimators=1000)
neg_mse_scores=cross_val_score(rf,x_data,y_target,scoring="neg_mean_squared_error",cv=5)
rmse_scores=np.sqrt(-1*neg_mse_scores)
avg_rmse=np.mean(rmse_scores)

In [None]:
print('교차검즘의 평균 RMSE: {0:.3f}'.format(avg_rmse))

In [None]:
from sklearn.metrics import r2_score

In [None]:
model=RandomForestRegressor().fit(x_data,y_target)

In [None]:
y_pred=model.predict(x_data)

In [None]:
r_sq=r2_score(y_target,y_pred)

In [None]:
print("R2 value:",r_sq)

# 학습결과를 트리로 보기 


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import export_graphviz
import graphviz

rf = RandomForestRegressor(n_estimators=100)
rf.fit(x_data, y_target)

In [None]:
dot_data = export_graphviz(rf.estimators_[0], out_file=None, 
                           feature_names=x_data.columns, 
                           filled=True, rounded=True,  
                           special_characters=True)
graph = graphviz.Source(dot_data)
graph

# 피처별 중요도 시각화 


In [None]:
feature_series=pd.Series(data=rf.feature_importances_,index=x_data.columns)
feature_series=feature_series.sort_values(ascending=False)
sns.barplot(x=feature_series,y=feature_series.index)

# 회귀트리 Regressor가 어떻게 예측값을 판단하는지 시각화하기 

In [None]:
df_sample =df[['armr','lossr']]
df_sample=df_sample.sample(n=500,random_state=0)
print(df_sample.shape)
plt.figure()
plt.scatter(df_sample.armr,df_sample.lossr,c='darkorange')

# 선형회귀와 랜덤포레스트 회귀선 비교 

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [None]:
lr_reg1=LinearRegression()
lr_reg2=RandomForestRegressor(max_depth=2)
lr_reg3=RandomForestRegressor(max_depth=7)

In [None]:
x_test=np.arange(0.1,0.9,0.01).reshape(-1,1)

In [None]:
x_feature=df_sample['armr'].values.reshape(-1,1)
y_target=df_sample['lossr'].values.reshape(-1,1)

In [None]:
lr_reg1.fit(x_feature,y_target)
lr_reg2.fit(x_feature,y_target)
lr_reg3.fit(x_feature,y_target)

In [None]:
pred_lr1=lr_reg1.predict(x_test)
pred_lr2=lr_reg2.predict(x_test)
pred_lr3=lr_reg3.predict(x_test)


# 모델별 회귀선 시각화 


In [None]:
fig,axs=plt.subplots(1,3, figsize=(15,5))

axs[0].set_title('LR')
axs[0].scatter(df.armr,df.lossr,s=5)
axs[0].plot(x_test,pred_lr1,color="red")

axs[1].set_title('RandomForest(max_depth=2)')
axs[1].scatter(df.armr,df.lossr,s=5)
axs[1].plot(x_test,pred_lr2,label="max_depth:7",color="orange")

axs[2].set_title('RandomForest(max_depth=7)')
axs[2].scatter(df.armr,df.lossr,s=5)
axs[2].plot(x_test,pred_lr3,label="max_depth:2",color="green")

