In [92]:
!pip install pymysql
!sudo apt-get install -y fonts-nanum # 나눔 폰트 설치 (설치 후 런타임 재시작하고 코드 실행)
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Reading package lists... Done
Building dependency tree       
Reading state information... Done
fonts-nanum is already the newest version (20170925-1).
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'sudo apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 20 not upgraded.
/usr/share/fonts: caching, new cache contents: 0 fonts, 1 dirs
/usr/share/fonts/truetype: caching, new cache contents: 0 fonts, 3 dirs
/usr/share/fonts/truetype/humor-sans: caching, new cache contents: 1 fonts, 0 dirs
/usr/share/fonts/truetype/liberation: caching, new cache contents: 16 fonts, 0 dirs
/usr/share/fonts/truetype/nanum: caching, new cache contents: 10 fonts, 0 dirs
/usr/local/share/fonts: caching, new cache contents: 0 fonts, 0 dirs
/root/.local/share/fonts: skipping, no such directory
/root/.fonts: skipping, no such directory

In [93]:
import itertools
import pandas as pd
from sqlalchemy import create_engine
import pymysql
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import matplotlib.font_manager as fm
from matplotlib import rc
import warnings
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import time
import statsmodels.api as sm
from glob import glob

warnings.filterwarnings(action='ignore')
mpl.rcParams['axes.unicode_minus'] = False
rc('font', family='NanumBarunGothic')

endpoint = 'dao.c51deksujiip.ap-northeast-2.rds.amazonaws.com'
schema = 'dao'
db_connection_str = 'mysql+pymysql://admin:ekfkawnl@{}/{}'.format(endpoint, schema)
try :
    db_connection = create_engine(db_connection_str)
    conn = db_connection.connect()
except :
    print('fail to connect db')

def load_data_from_rds(tabel_name):
  sql = "SELECT * FROM {}".format(tabel_name)
  df = pd.read_sql(sql, db_connection)
  return df

In [94]:
def preprocess_df(df, onehot_col=None, scaling_col=None, drop_col = None, labeling_col = None, scaling_func=MinMaxScaler) :
    """
    :param df: 사용할 데이터프레임
    :param onehot_col: default = None, 원핫인코딩할 컬럼 리스트
    :param scaling_col: default = None, 스케일링할 컬럼 리스트
    :param drop_col: default = None, 사용하지 않을 컬럼 리스트
    :param labeling_col: default = None, 라벨링이 필요한 컬럼 리스트
    :param scaling_func: 스케일링시 사용할 컬럼, MinMaxScaler or StandardScaler
    :return: 전처리 된 데이터 프레임
    """
    if drop_col is not None :
        df = df.drop(drop_col, axis=1)
    if onehot_col is not None:
        df = pd.get_dummies(df, colums=onehot_col)
    if scaling_col is not None :
        for c in scaling_col :
            scaler = scaling_func()
            scaler.fit(df[[c]])
            df[c] = scaler.transform(df[[c]])
    if labeling_col is not None :
        for c in labeling_col :
            encoder = LabelEncoder()
            encoder.fit(df[c])
            df[c] = encoder.transform(df[c])
    return df

In [95]:
# GridSearchCV의 최적 하이퍼파라미터로 학습된 Estimator 반환하는 함수
def optimizing_lr(clf, train_x, train_y):
  parameters = {'penalty': ['l1','l2','elasticnet','none'],
                'C': np.arange(0,1,0.1),
                'random_state': range(3,7,2)}
  start = time.time()
  grid_cv = GridSearchCV(clf, param_grid=parameters, n_jobs=-1, scoring='accuracy', cv=5)
  grid_cv.fit(train_x, train_y)

  #최적의 하이퍼 파라미터 출력
  print('GridSearch 수행시간 : ', time.time()-start)
  print('최적 하이퍼 파라미터 :',grid_cv.best_params_)

  return grid_cv.best_estimator_

In [96]:
def logistic_regression(train_x, train_y, feature_importance = False, optimizing = False):

  # logistic regression
  lr_clf = LogisticRegression()

  # optimizing parameters
  if optimizing :
    lr_clf = optimizing_lr(lr_clf, train_x, train_y)
  else:
    lr_clf.fit(train_x, train_y)

  return lr_clf

In [97]:
# RFECV로 선택된 변수를 반환하는 함수
def rfecv_lr(clf, train_x, train_y):
  min_features_to_select=1
  rfe_cv = RFECV(clf, step=1, cv=5, min_features_to_select=min_features_to_select)
  rfe_cv.fit(train_x, train_y)

  # 최적의 변수 개수 출력
  print("Optimal number of features : %d" % rfe_cv.n_features_)

  # plt.figure()
  # plt.xlabel("Number of features selected")
  # plt.ylabel("Cross validation score (accuracy)")
  # plt.plot(
  #   range(min_features_to_select, len(rfe_cv.grid_scores_) + min_features_to_select),
  #   rfe_cv.grid_scores_,)
  # plt.show()

  return train_x.columns[rfe_cv.support_]

In [98]:
def evalutation_lf(test_y, pred_y):
  # 평가
  acc = accuracy_score(test_y, pred_y)
  f1 = f1_score(test_y, pred_y, average='macro')

  result = pd.DataFrame([['Logistic Regression RFECV', acc, f1]],
                         columns = ['Model', 'Accuracy', 'F1Score'])
  display(result)

In [99]:
def summary_lr(x, y):
  con_x = sm.add_constant(x)
  model = sm.OLS(y, con_x)
  result = model.fit()
  print(result.summary())

In [100]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [101]:
overpass_list = glob('/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/*overpass*.csv')
overpass_list = [p.replace('\\', '/') for p in overpass_list]
print(overpass_list)
tunnel_list = glob('/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/*tunnel*.csv')
tunnel_list = [p.replace('\\', '/') for p in tunnel_list]
print(tunnel_list)

['/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn2_median.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn5_drop.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn3_median.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn5_median.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_mean_drop.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_mean_median.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn2_drop.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn1_drop.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn3_drop.csv', '/content/drive/MyDrive/Colab Notebooks/데청캠_2팀/MainData/prepro/overpass_knn4_median.csv', '/content/drive/MyDrive/Colab Notebooks

In [102]:
onehot_col = ['지형기호(2.3km)', '최우점식물군락']
scaling_col = ['폭(m)', '연장(m)', '주변 로드킬 빈도', '산책로까지의 최단 거리(km)', '농가까지의 거리(km)', '도로 최고제한속도(km/h)',
               '하천거리(km)', '주변 동물종 개수', '등산로까지 최단거리(km)', '유도울타리_연장_m', '유도울타리_높이_m', '교통량', '건물까지거리(km)']
drop_col = ['번호', '위도', '경도', '생태통로_유형', '이용확인종_수', '유도울타리_위도_2.3km', '유도울타리_경도_2.3km', '식물군락명']
scaling_col_tunnel = scaling_col + ['높이(m)', '개방도']

# 육교형

In [103]:
for case in overpass_list :
  df = pd.read_csv(case, encoding='cp949')
  name = case.split('/')[-1].split('.')[0]
  df = preprocess_df(df, drop_col=drop_col, labeling_col=onehot_col, scaling_col=scaling_col, scaling_func=StandardScaler)
 
  # x, y
  x=df.drop('생태통로_효율성', axis=1)
  y=df['생태통로_효율성']

  # train & test set
  train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, stratify=df['생태통로_효율성'])

  # logistic regression
  lr_clf =logistic_regression(train_x, train_y, optimizing=True)
  pred_y = lr_clf.predict(test_x)

  # evaluation
  evalutation_lf(test_y, pred_y)

  # summary
  summary_lr(train_x, train_y)

  # rfecv
  selected_x = rfecv_lr(lr_clf, train_x, train_y)

  # RFECV로 선택된 변수로 logistic regression
  lr_clf = logistic_regression(train_x[selected_x], train_y, optimizing=True)
  pred_y = lr_clf.predict(test_x[selected_x])

  # evaluation
  evalutation_lf(test_y, pred_y)

  # summary
  summary_lr(train_x[selected_x], train_y)


GridSearch 수행시간 :  13.121912240982056
최적 하이퍼 파라미터 : {'C': 0.5, 'penalty': 'l2', 'random_state': 3}


Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.533333,0.414656


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.162
Model:                            OLS   Adj. R-squared:                  0.086
Method:                 Least Squares   F-statistic:                     2.119
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00457
Time:                        09:04:54   Log-Likelihood:                -220.32
No. Observations:                 240   AIC:                             482.6
Df Residuals:                     219   BIC:                             555.7
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.0622      0.252  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.666667,0.487356


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.154
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.284
Time:                        09:05:04   Log-Likelihood:                -240.97
No. Observations:                 240   AIC:                             485.9
Df Residuals:                     238   BIC:                             492.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9423      0.043     22.010      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.155
Model:                            OLS   Adj. R-squared:                  0.027
Method:                 Least Squares   F-statistic:                     1.214
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.257
Time:                        09:05:17   Log-Likelihood:                -170.59
No. Observations:                 146   AIC:                             381.2
Df Residuals:                     126   BIC:                             440.9
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.6209      0.402  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.007
Method:                 Least Squares   F-statistic:                 1.565e-07
Date:                Fri, 19 Aug 2022   Prob (F-statistic):               1.00
Time:                        09:05:24   Log-Likelihood:                -182.86
No. Observations:                 146   AIC:                             369.7
Df Residuals:                     144   BIC:                             375.7
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9042      0.170      5.326      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.566667,0.499125


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.120
Model:                            OLS   Adj. R-squared:                  0.040
Method:                 Least Squares   F-statistic:                     1.495
Date:                Fri, 19 Aug 2022   Prob (F-statistic):             0.0847
Time:                        09:05:31   Log-Likelihood:                -226.20
No. Observations:                 240   AIC:                             494.4
Df Residuals:                     219   BIC:                             567.5
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.2099      0.254  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.583333,0.534181


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.069
Model:                            OLS   Adj. R-squared:                  0.053
Method:                 Least Squares   F-statistic:                     4.345
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00208
Time:                        09:05:39   Log-Likelihood:                -232.99
No. Observations:                 240   AIC:                             476.0
Df Residuals:                     235   BIC:                             493.4
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               0.9363      0.042     

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.566667,0.498105


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.156
Model:                            OLS   Adj. R-squared:                  0.079
Method:                 Least Squares   F-statistic:                     2.027
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00728
Time:                        09:05:46   Log-Likelihood:                -221.17
No. Observations:                 240   AIC:                             484.3
Df Residuals:                     219   BIC:                             557.4
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.5589      0.245  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.65,0.612195


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                  0.045
Method:                 Least Squares   F-statistic:                     4.719
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00322
Time:                        09:05:53   Log-Likelihood:                -234.56
No. Observations:                 240   AIC:                             477.1
Df Residuals:                     236   BIC:                             491.0
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.9506      0.042     22.664   

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.145
Model:                            OLS   Adj. R-squared:                  0.016
Method:                 Least Squares   F-statistic:                     1.120
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.339
Time:                        09:06:00   Log-Likelihood:                -171.47
No. Observations:                 146   AIC:                             382.9
Df Residuals:                     126   BIC:                             442.6
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.6816      0.408  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.004
Method:                 Least Squares   F-statistic:                    0.4551
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.501
Time:                        09:06:04   Log-Likelihood:                -182.63
No. Observations:                 146   AIC:                             369.3
Df Residuals:                     144   BIC:                             375.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0007      0.160      6.272      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.633333,0.550792


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.151
Model:                            OLS   Adj. R-squared:                  0.073
Method:                 Least Squares   F-statistic:                     1.947
Date:                Fri, 19 Aug 2022   Prob (F-statistic):             0.0108
Time:                        09:06:11   Log-Likelihood:                -221.92
No. Observations:                 240   AIC:                             485.8
Df Residuals:                     219   BIC:                             558.9
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.4383      0.240  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.65,0.597599


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.075
Model:                            OLS   Adj. R-squared:                  0.059
Method:                 Least Squares   F-statistic:                     4.740
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00107
Time:                        09:06:17   Log-Likelihood:                -232.24
No. Observations:                 240   AIC:                             474.5
Df Residuals:                     235   BIC:                             491.9
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               0.9388      0.042     

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.324324,0.308972


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.126
Model:                            OLS   Adj. R-squared:                 -0.006
Method:                 Least Squares   F-statistic:                    0.9563
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.516
Time:                        09:06:23   Log-Likelihood:                -173.03
No. Observations:                 146   AIC:                             386.1
Df Residuals:                     126   BIC:                             445.7
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.3782      0.443  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.237797


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.053
Model:                            OLS   Adj. R-squared:                  0.040
Method:                 Least Squares   F-statistic:                     3.990
Date:                Fri, 19 Aug 2022   Prob (F-statistic):             0.0206
Time:                        09:06:27   Log-Likelihood:                -178.90
No. Observations:                 146   AIC:                             363.8
Df Residuals:                     143   BIC:                             372.7
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.8948      0.069     12.969   

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.160
Model:                            OLS   Adj. R-squared:                  0.034
Method:                 Least Squares   F-statistic:                     1.267
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.217
Time:                        09:06:34   Log-Likelihood:                -170.10
No. Observations:                 146   AIC:                             380.2
Df Residuals:                     126   BIC:                             439.9
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.7080      0.464  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.007
Method:                 Least Squares   F-statistic:                   0.02848
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.866
Time:                        09:06:38   Log-Likelihood:                -182.85
No. Observations:                 146   AIC:                             369.7
Df Residuals:                     144   BIC:                             375.7
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9302      0.170      5.466      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.122
Model:                            OLS   Adj. R-squared:                 -0.010
Method:                 Least Squares   F-statistic:                    0.9228
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.556
Time:                        09:06:45   Log-Likelihood:                -173.35
No. Observations:                 146   AIC:                             386.7
Df Residuals:                     126   BIC:                             446.4
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.7699      0.443  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.006
Method:                 Least Squares   F-statistic:                   0.08249
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.774
Time:                        09:06:49   Log-Likelihood:                -182.82
No. Observations:                 146   AIC:                             369.6
Df Residuals:                     144   BIC:                             375.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9491      0.172      5.528      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.45,0.368976


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.187
Model:                            OLS   Adj. R-squared:                  0.112
Method:                 Least Squares   F-statistic:                     2.512
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000580
Time:                        09:06:57   Log-Likelihood:                -216.77
No. Observations:                 240   AIC:                             475.5
Df Residuals:                     219   BIC:                             548.6
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.5959      0.236  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.483333,0.381961


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.067
Model:                            OLS   Adj. R-squared:                  0.059
Method:                 Least Squares   F-statistic:                     8.444
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000287
Time:                        09:07:03   Log-Likelihood:                -233.29
No. Observations:                 240   AIC:                             472.6
Df Residuals:                     237   BIC:                             483.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.9362      0.042     22.506   

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.082
Model:                            OLS   Adj. R-squared:                 -0.057
Method:                 Least Squares   F-statistic:                    0.5889
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.909
Time:                        09:07:10   Log-Likelihood:                -176.65
No. Observations:                 146   AIC:                             393.3
Df Residuals:                     126   BIC:                             453.0
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.5026      0.418  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.405405,0.192308


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.007
Method:                 Least Squares   F-statistic:                 0.0003641
Date:                Fri, 19 Aug 2022   Prob (F-statistic):              0.985
Time:                        09:07:14   Log-Likelihood:                -182.86
No. Observations:                 146   AIC:                             369.7
Df Residuals:                     144   BIC:                             375.7
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9071      0.171      5.300      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.65,0.579249


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.163
Model:                            OLS   Adj. R-squared:                  0.087
Method:                 Least Squares   F-statistic:                     2.133
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00426
Time:                        09:07:21   Log-Likelihood:                -220.19
No. Observations:                 240   AIC:                             482.4
Df Residuals:                     219   BIC:                             555.5
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.2849      0.241  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.6,0.548084


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.077
Model:                            OLS   Adj. R-squared:                  0.065
Method:                 Least Squares   F-statistic:                     6.565
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000279
Time:                        09:07:28   Log-Likelihood:                -231.93
No. Observations:                 240   AIC:                             471.9
Df Residuals:                     236   BIC:                             485.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.9503      0.041     22.917   

# 터널형

In [104]:
for case in tunnel_list :
  df = pd.read_csv(case, encoding='cp949')
  name = case.split('/')[-1].split('.')[0]
  df = preprocess_df(df, drop_col=drop_col, labeling_col=onehot_col, scaling_col=scaling_col, scaling_func=StandardScaler)
 
  # x, y
  x=df.drop('생태통로_효율성', axis=1)
  y=df['생태통로_효율성']

  # train & test set
  train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, stratify=df['생태통로_효율성'])

  # logistic regression
  lr_clf =logistic_regression(train_x, train_y, optimizing=True)
  pred_y = lr_clf.predict(test_x)

  # evaluation
  evalutation_lf(test_y, pred_y)

  # summary
  summary_lr(train_x, train_y)

  # rfecv
  selected_x = rfecv_lr(lr_clf, train_x, train_y)

  # RFECV로 선택된 변수로 logistic regression
  lr_clf = logistic_regression(train_x[selected_x], train_y, optimizing=True)
  pred_y = lr_clf.predict(test_x[selected_x])

  # evaluation
  evalutation_lf(test_y, pred_y)

  # summary
  summary_lr(train_x[selected_x], train_y)

GridSearch 수행시간 :  6.430907249450684
최적 하이퍼 파라미터 : {'C': 0.7000000000000001, 'penalty': 'l2', 'random_state': 3}


Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.52,0.398551


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.398
Model:                            OLS   Adj. R-squared:                  0.229
Method:                 Least Squares   F-statistic:                     2.356
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00363
Time:                        09:07:34   Log-Likelihood:                -102.59
No. Observations:                  97   AIC:                             249.2
Df Residuals:                      75   BIC:                             305.8
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.6758      1.489  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.56,0.412698


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.131
Model:                            OLS   Adj. R-squared:                  0.122
Method:                 Least Squares   F-statistic:                     14.30
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000273
Time:                        09:07:39   Log-Likelihood:                -120.37
No. Observations:                  97   AIC:                             244.7
Df Residuals:                      95   BIC:                             249.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.1837      0.113     10.496      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.675,0.592716


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.536
Model:                            OLS   Adj. R-squared:                  0.460
Method:                 Least Squares   F-statistic:                     7.034
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           1.12e-13
Time:                        09:07:48   Log-Likelihood:                -108.08
No. Observations:                 157   AIC:                             262.2
Df Residuals:                     134   BIC:                             332.5
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.4538      0.951  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.675,0.59156


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.520
Model:                            OLS   Adj. R-squared:                  0.450
Method:                 Least Squares   F-statistic:                     7.371
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           1.06e-13
Time:                        09:07:57   Log-Likelihood:                -110.71
No. Observations:                 157   AIC:                             263.4
Df Residuals:                     136   BIC:                             327.6
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.5765      0.954  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.6,0.491917


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.433
Model:                            OLS   Adj. R-squared:                  0.340
Method:                 Least Squares   F-statistic:                     4.651
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           9.70e-09
Time:                        09:08:04   Log-Likelihood:                -123.81
No. Observations:                 157   AIC:                             293.6
Df Residuals:                     134   BIC:                             363.9
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.1197      1.227  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.7,0.687037


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.237
Model:                            OLS   Adj. R-squared:                  0.206
Method:                 Least Squares   F-statistic:                     7.752
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           2.84e-07
Time:                        09:08:11   Log-Likelihood:                -147.14
No. Observations:                 157   AIC:                             308.3
Df Residuals:                     150   BIC:                             329.7
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               1.0477      0.233     

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.775,0.712753


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.412
Model:                            OLS   Adj. R-squared:                  0.316
Method:                 Least Squares   F-statistic:                     4.273
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           6.68e-08
Time:                        09:08:18   Log-Likelihood:                -126.62
No. Observations:                 157   AIC:                             299.2
Df Residuals:                     134   BIC:                             369.5
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.1551      0.853  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.725,0.658382


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.246
Model:                            OLS   Adj. R-squared:                  0.221
Method:                 Least Squares   F-statistic:                     9.863
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           3.55e-08
Time:                        09:08:24   Log-Likelihood:                -146.16
No. Observations:                 157   AIC:                             304.3
Df Residuals:                     151   BIC:                             322.7
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.2893      0.082  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.52,0.459091


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.457
Model:                            OLS   Adj. R-squared:                  0.305
Method:                 Least Squares   F-statistic:                     3.010
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000245
Time:                        09:08:31   Log-Likelihood:                -97.517
No. Observations:                  97   AIC:                             239.0
Df Residuals:                      75   BIC:                             295.7
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.1420      1.850  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.52,0.390013


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.187
Model:                            OLS   Adj. R-squared:                  0.178
Method:                 Least Squares   F-statistic:                     21.81
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           9.88e-06
Time:                        09:08:35   Log-Likelihood:                -117.14
No. Observations:                  97   AIC:                             238.3
Df Residuals:                      95   BIC:                             243.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.2088      0.105     11.490      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.725,0.709259


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.417
Model:                            OLS   Adj. R-squared:                  0.322
Method:                 Least Squares   F-statistic:                     4.366
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           4.15e-08
Time:                        09:08:43   Log-Likelihood:                -125.92
No. Observations:                 157   AIC:                             297.8
Df Residuals:                     134   BIC:                             368.1
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.7694      0.982  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.625,0.597267


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.290
Model:                            OLS   Adj. R-squared:                  0.247
Method:                 Least Squares   F-statistic:                     6.682
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           5.45e-08
Time:                        09:08:51   Log-Likelihood:                -141.42
No. Observations:                 157   AIC:                             302.8
Df Residuals:                     147   BIC:                             333.4
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.6353      0.157  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.8,0.775744


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.379
Model:                            OLS   Adj. R-squared:                  0.277
Method:                 Least Squares   F-statistic:                     3.718
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           1.20e-06
Time:                        09:08:58   Log-Likelihood:                -130.94
No. Observations:                 157   AIC:                             307.9
Df Residuals:                     134   BIC:                             378.2
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.4862      1.053  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.8,0.775744


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.379
Model:                            OLS   Adj. R-squared:                  0.277
Method:                 Least Squares   F-statistic:                     3.718
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           1.20e-06
Time:                        09:09:07   Log-Likelihood:                -130.94
No. Observations:                 157   AIC:                             307.9
Df Residuals:                     134   BIC:                             378.2
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.4862      1.053  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.44,0.385741


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.424
Model:                            OLS   Adj. R-squared:                  0.262
Method:                 Least Squares   F-statistic:                     2.626
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00120
Time:                        09:09:13   Log-Likelihood:                -100.44
No. Observations:                  97   AIC:                             244.9
Df Residuals:                      75   BIC:                             301.5
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.0178      1.427  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.6,0.444005


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.114
Model:                            OLS   Adj. R-squared:                  0.104
Method:                 Least Squares   F-statistic:                     12.19
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000729
Time:                        09:09:17   Log-Likelihood:                -121.31
No. Observations:                  97   AIC:                             246.6
Df Residuals:                      95   BIC:                             251.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.1611      0.113     10.261      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.8,0.736721


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.414
Model:                            OLS   Adj. R-squared:                  0.317
Method:                 Least Squares   F-statistic:                     4.296
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           5.95e-08
Time:                        09:09:24   Log-Likelihood:                -126.45
No. Observations:                 157   AIC:                             298.9
Df Residuals:                     134   BIC:                             369.2
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.0114      0.858  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.775,0.719469


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.175
Model:                            OLS   Adj. R-squared:                  0.153
Method:                 Least Squares   F-statistic:                     8.042
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           6.58e-06
Time:                        09:09:30   Log-Likelihood:                -153.28
No. Observations:                 157   AIC:                             316.6
Df Residuals:                     152   BIC:                             331.8
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               1.2978      0.084     

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.48,0.414314


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.408
Model:                            OLS   Adj. R-squared:                  0.242
Method:                 Least Squares   F-statistic:                     2.461
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00236
Time:                        09:09:37   Log-Likelihood:                -101.74
No. Observations:                  97   AIC:                             247.5
Df Residuals:                      75   BIC:                             304.1
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.1392      1.484  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.44,0.305556


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.390
Model:                            OLS   Adj. R-squared:                  0.239
Method:                 Least Squares   F-statistic:                     2.587
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00180
Time:                        09:09:46   Log-Likelihood:                -103.22
No. Observations:                  97   AIC:                             246.4
Df Residuals:                      77   BIC:                             297.9
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.4231      1.464  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.52,0.384717


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.431
Model:                            OLS   Adj. R-squared:                  0.272
Method:                 Least Squares   F-statistic:                     2.710
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000846
Time:                        09:09:55   Log-Likelihood:                -99.783
No. Observations:                  97   AIC:                             243.6
Df Residuals:                      75   BIC:                             300.2
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.4652      1.766  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.56,0.406208


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.133
Model:                            OLS   Adj. R-squared:                  0.124
Method:                 Least Squares   F-statistic:                     14.63
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000234
Time:                        09:09:59   Log-Likelihood:                -120.22
No. Observations:                  97   AIC:                             244.4
Df Residuals:                      95   BIC:                             249.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.1975      0.115     10.459      0.0

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.64,0.476383


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.388
Model:                            OLS   Adj. R-squared:                  0.216
Method:                 Least Squares   F-statistic:                     2.262
Date:                Fri, 19 Aug 2022   Prob (F-statistic):            0.00534
Time:                        09:10:06   Log-Likelihood:                -103.37
No. Observations:                  97   AIC:                             250.7
Df Residuals:                      75   BIC:                             307.4
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.2676      1.496  

Unnamed: 0,Model,Accuracy,F1Score
0,Logistic Regression RFECV,0.6,0.44958


                            OLS Regression Results                            
Dep. Variable:               생태통로_효율성   R-squared:                       0.330
Model:                            OLS   Adj. R-squared:                  0.235
Method:                 Least Squares   F-statistic:                     3.455
Date:                Fri, 19 Aug 2022   Prob (F-statistic):           0.000374
Time:                        09:10:15   Log-Likelihood:                -107.71
No. Observations:                  97   AIC:                             241.4
Df Residuals:                      84   BIC:                             274.9
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.3599      0.973  