In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib import rcParams

In [None]:
# matplotlib 에서 한글 및 음수 표현이 깨지는 현상 수정

# mac os는 다음 코드를 주석처리
rcParams['font.family'] = 'Malgun Gothic'

# windows는 다음 코드를 주석처리
# rcParams['font.family'] = 'AppleGothic'

rcParams['axes.unicode_minus'] = False

In [None]:
### 데이터 체크 클래스 선언

class DataCheck() :
    
    # 생성 시 기존 데이터를 넣어둠
    def __init__(self, df) :
        self.raw_df = df
    
    # 정보 출력
    def print_info(self, df = None) :
        if df is None :         # 데이터를 전달하지 않으면 기존 데이터를 활용
            df = self.raw_df

        print('------ Data Info -----')
        df.info()
        print('\n----- Data Describe -----')
        display(df.describe())

    # 각 컬럼들의 값 출력
    def print_value_counts(self, df = None) :
        if df is None :         # 데이터를 전달하지 않으면 기존 데이터를 활용
            df = self.raw_df
        
        for col in df.columns:
            if df[col].nunique() > 15 :
                continue
            print(df[col].value_counts())
            print("-" * 20)

In [None]:
### 데이터 전처리 클래스 선언

class DataPreprocessing () :

    # 생성 시 기존 데이터를 넣어둠
    def __init__(self, df) :
        self.raw_df = df

    # 수정 전의 데이터를 불러옴
    def call_raw_data(self) :
        return self.raw_df


In [None]:
### 데이터 시각화 클래스 선언

class DataVisualize () :

    # 생성 시 기존 데이터를 넣어둠
    def __init__(self, df) :
        self.raw_df = df

In [None]:
### 데이터 로드

input_file_path = './data/Tree_data.csv'

df = pd.read_csv(input_file_path)

### 인스턴스 생성

dc = DataCheck(df)
dp = DataPreprocessing(df)
dv = DataVisualize(df)

In [None]:
# 정보 출력

dc.print_info(df)

------ Data Info -----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2783 entries, 0 to 2782
Data columns (total 24 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   No           2783 non-null   int64  
 1   Plot         2783 non-null   int64  
 2   Subplot      2783 non-null   object 
 3   Species      2783 non-null   object 
 4   Light_ISF    2783 non-null   float64
 5   Light_Cat    2783 non-null   object 
 6   Core         2783 non-null   int64  
 7   Soil         2783 non-null   object 
 8   Adult        2783 non-null   object 
 9   Sterile      2783 non-null   object 
 10  Conspecific  2783 non-null   object 
 11  Myco         2783 non-null   object 
 12  SoilMyco     2783 non-null   object 
 13  PlantDate    2783 non-null   object 
 14  AMF          2783 non-null   float64
 15  EMF          1283 non-null   float64
 16  Phenolics    2783 non-null   float64
 17  Lignin       2783 non-null   float64
 18  NSC          2783 non-nul

Unnamed: 0,No,Plot,Light_ISF,Core,AMF,EMF,Phenolics,Lignin,NSC,Census,Time,Event
count,2783.0,2783.0,2783.0,2783.0,2783.0,1283.0,2783.0,2783.0,2783.0,2783.0,2783.0,2782.0
mean,3914.513834,9.561624,0.085707,2016.64894,20.553069,26.47675,1.933105,15.759792,14.219641,15.28207,53.487244,0.570453
std,2253.515063,5.203659,0.025638,0.477387,12.309587,16.63689,1.969842,6.779607,4.298271,9.166555,32.082942,0.4951
min,3.0,1.0,0.032,2016.0,0.0,0.0,-1.35,2.23,4.3,4.0,14.0,0.0
25%,1971.0,5.0,0.066,2016.0,13.4,13.78,0.17,10.355,11.605,7.0,24.5,0.0
50%,3932.0,10.0,0.082,2017.0,18.0,27.72,0.75,14.04,12.66,13.0,45.5,1.0
75%,5879.0,14.0,0.1,2017.0,24.445,35.71,3.78,21.115,17.275,18.0,63.0,1.0
max,7772.0,18.0,0.161,2017.0,100.0,87.5,6.1,32.77,29.45,33.0,115.5,1.0


In [None]:
# 컬럼값 출력

dc.print_value_counts(df)

Subplot
A    701
D    666
B    663
C    646
E    107
Name: count, dtype: int64
--------------------
Species
Acer saccharum     751
Prunus serotina    749
Quercus alba       673
Quercus rubra      610
Name: count, dtype: int64
--------------------
Light_Cat
Med     1474
Low     1005
High     304
Name: count, dtype: int64
--------------------
Core
2017    1806
2016     977
Name: count, dtype: int64
--------------------
Soil
Sterile                  423
Prunus serotina          413
Quercus rubra            402
Acer saccharum           397
Populus grandidentata    391
Quercus alba             381
Acer rubrum              376
Name: count, dtype: int64
--------------------
Sterile
Non-Sterile    2360
Sterile         423
Name: count, dtype: int64
--------------------
Conspecific
Heterospecific    1974
Sterilized         423
Conspecific        386
Name: count, dtype: int64
--------------------
Myco
AMF    1500
EMF    1283
Name: count, dtype: int64
--------------------
SoilMyco
AMF        1186
