In [2]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import numpy as np
import warnings
warnings.filterwarnings('ignore')

plt.rcParams['font.family'] ='Malgun Gothic'
plt.rcParams['axes.unicode_minus'] =False

In [3]:
df = pd.read_csv("../output/2324_growth.csv")
df['plot'] = ((df['ID'] - 1) // 10) + 1
df['year'] = '20' + (df['date'].astype(str).str[0:2])
df['year'] = df['year'].astype(int)
df['초장(cm)'] = df.apply(lambda row: row['간장(cm)'] + row['수장(cm)'] if pd.isna(row['초장(cm)'])else row['초장(cm)'], axis=1)
df['생육단계_숫자'] = pd.Categorical(df['생육단계'], categories=['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기'], ordered=True).codes

FileNotFoundError: [Errno 2] No such file or directory: '../output/2324_growth.csv'

In [None]:
df['year-plot'] = df['year'].astype(str) + '-' + df['plot'].astype(str)

In [None]:
df['파종'] = df['plot'].apply(lambda x: '광산' if x <= 4 else '세조')
df['시비'] = df['plot'].apply(lambda x: '추비' if x in [2, 3, 6,7 ] else '기비')
df['파종_시비'] = df['파종'] + '_' + df['시비']
df

In [None]:
def polynomial_regression(df, x, y, degree):
    df = df[[x, y]].dropna()
    x_features = []
    y_features = []
    
    x_features.extend(df[x])
    y_features.extend(df[y])
    
    x_real = np.reshape(x_features, (-1, 1))
    y_real = np.reshape(y_features, (-1, 1))
    
    polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
    x_poly = polynomial_features.fit_transform(x_real)
    
    model = LinearRegression()
    model.fit(x_poly, y_real)
    
    return model.intercept_[0], model.coef_[0][0], model.coef_[0][1], model.coef_[0][2]

In [None]:
y_feature_name = 'NDVI'
fig, axes = plt.subplots(1, 2, figsize=(12, 6), sharey=True)

for i, year in enumerate([2023, 2024]):
    ax = axes[i]
    df_target = df[df['year'] == year]
    poly_values = polynomial_regression(df_target, '생육단계_숫자', y_feature_name, 3)
    poly_series = df_target['생육단계_숫자'].apply(lambda x: x * poly_values[1] + x ** 2 * poly_values[2] + x ** 3 * poly_values[3] + poly_values[0])
    sns.scatterplot(x='생육단계', y=y_feature_name, data=df_target, color='black', ax=ax);
    sns.lineplot(x='생육단계', y=poly_series, data=df_target, color='red', ax=ax);
    ax.set_title(f'{year}년')

fig.suptitle(f'생육단계별 {y_feature_name} 추이');

In [None]:
y_variables = ['초장(cm)', 'LAI', 'SPAD', 'NDVI', 'GNDVI', 'RVI', 'CVI', 'NDRE'] 

df_mean = df.groupby(['year-plot', '생육단계'])[y_variables].mean().reset_index()
df_mean['year'] = df_mean['year-plot'].str.split('-').str[0].astype(int)
df_mean['plot'] = df_mean['year-plot'].str.split('-').str[1]
from pandas.api.types import CategoricalDtype
df_mean['생육단계'] = df_mean['생육단계'].astype(CategoricalDtype(categories=['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기'], ordered=True))

In [None]:
df_year = df.groupby(['year', '생육단계'])[y_variables].mean().reset_index()
from pandas.api.types import CategoricalDtype
df_year['생육단계'] = df_year['생육단계'].astype(CategoricalDtype(categories=['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기'], ordered=True))
df_year

In [None]:
fig, axes = plt.subplots(3, 3, figsize=(15, 12))  # 3x3 서브플롯 (마지막은 빈 칸)
palette = {2023: 'blue', 2024: 'orange'}

# 첫 번째 행: 첫 3개의 y값
for i, y in enumerate(y_variables[:3]):
    sns.scatterplot(x='생육단계', y=y, style='plot', hue='year', data=df_mean, ax=axes[0, i], palette=palette, legend='brief')
    sns.lineplot(x='생육단계', y=y, hue='year', data=df_year, ax=axes[0, i], palette=palette, legend=False)
    axes[0, i].set_title(y)

# 두 번째 행: 중간 3개의 y값
for i, y in enumerate(y_variables[3:6]):
    sns.scatterplot(x='생육단계', y=y, style='plot', hue='year', data=df_mean, ax=axes[1, i], palette=palette, legend=False)
    sns.lineplot(x='생육단계', y=y, hue='year', data=df_year, ax=axes[1, i], palette=palette, legend=False)
    axes[1, i].set_title(y)

# 세 번째 행: 마지막 2개의 y값
for i, y in enumerate(y_variables[6:]):
    sns.scatterplot(x='생육단계', y=y, style='plot', hue='year', data=df_mean, ax=axes[2, i], palette=palette, legend=False)
    sns.lineplot(x='생육단계', y=y, hue='year', data=df_year, ax=axes[2, i], palette=palette, legend=False)
    axes[2, i].set_title(y)

# 빈 서브플롯에 범례 추가
axes[2, 2].axis('off')  # 빈 서브플롯 숨기기
handles, labels = axes[0, 0].get_legend_handles_labels()  # 범례 핸들과 레이블 가져오기
fig.legend(handles, labels, loc='center', bbox_to_anchor=(0.8,0.2), ncol=1)

plt.tight_layout()
plt.show()


In [None]:
fig, axes = plt.subplots(3, 3, figsize=(15, 9))  # 3x3 서브플롯 (마지막은 빈 칸)

# 마커 스타일 설정
marker_styles = {2023: 'o', 2024: 'o'}  # 마커 모양
line_styles = {2023: '--', 2024: '-'}  # 선 스타일

# 첫 번째 행: 첫 3개의 y값
for i, y in enumerate(y_variables[:3]):
    for year in [2023, 2024]:
        data_subset = df_mean[df_mean['year'] == year]
        sns.scatterplot(
            x='생육단계',
            y=y,
            data=data_subset,
            ax=axes[0, i],
            style='plot',
            markers=True,
            color='black',
            edgecolor='black',
            legend=False,
            facecolors='none' if year == 2023 else 'black',  # 2023 비우기, 2024 채우기
        )
        sns.lineplot(
            x='생육단계',
            y=y,
            data=df_year[df_year['year'] == year],
            ax=axes[0, i],
            color='green',
            linestyle=line_styles[year],
            linewidth=2
        )
    axes[0, i].set_title(y)

# 두 번째 행: 중간 3개의 y값
for i, y in enumerate(y_variables[3:6]):
    for year in [2023, 2024]:
        data_subset = df_mean[df_mean['year'] == year]
        sns.scatterplot(
            x='생육단계',
            y=y,
            data=data_subset,
            ax=axes[1, i],
            style='plot',
            markers=True,
            color='black',
            edgecolor='black',
            legend=False,
            facecolors='none' if year == 2023 else 'black',
        )
        sns.lineplot(
            x='생육단계',
            y=y,
            data=df_year[df_year['year'] == year],
            ax=axes[1, i],
            color='orange',
            linestyle=line_styles[year],
            linewidth=2
        )
    axes[1, i].set_title(y)

# 세 번째 행: 마지막 2개의 y값
for i, y in enumerate(y_variables[6:]):
    for year in [2023, 2024]:
        data_subset = df_mean[df_mean['year'] == year]
        sns.scatterplot(
            x='생육단계',
            y=y,
            data=data_subset,
            ax=axes[2, i],
            style='plot',
            markers=True,
            color='black',
            edgecolor='black',
            legend=False,
            facecolors='none' if year == 2023 else 'black',
        )
        sns.lineplot(
            x='생육단계',
            y=y,
            data=df_year[df_year['year'] == year],
            ax=axes[2, i],
            color='orange',
            linestyle=line_styles[year],
            linewidth=2
        )
    axes[2, i].set_title(y)

# 빈 서브플롯에 범례 추가
axes[2, 2].axis('off')
fig.legend(
    handles=[
        plt.Line2D([0], [0], color='red', linestyle='--', marker='o', markersize=8, markerfacecolor='none', markeredgecolor='black', label='2023'),
        plt.Line2D([0], [0], color='red', linestyle='-', marker='o', markersize=8, markerfacecolor='black', markeredgecolor='black', label='2024')
    ],
    loc='upper center',  # 상단에 위치
    bbox_to_anchor=(0.5, 1.05),  # 범례 위치 조정
    ncol=2  # 범례를 가로로 배열
)

plt.tight_layout()
plt.savefig('../output/timeseries.png')


In [None]:
for i, y in enumerate(y_variables[6:]):
    sns.scatterplot(x='생육단계', y=y, hue='year-plot', data=df_mean, ax=axes[2, i], legend=False)
    axes[2, i].set_title(y)

In [None]:
def draw_corr(stage, df, ax):
    df_stage = df[df['생육단계'] == stage]
    df_stage = df_stage.dropna(axis=1, how='all')
    # cols = [col for col in df_stage.columns if col not in ['ID', 'date', 'plot', 'year', '생육단계', '생육단계_숫자']]
    # cols.reverse()
    cols = ['NDVI', 'CVI', 'GNDVI', 'RVI', 'NDRE', 'SPAD', 'LAI', '초장(cm)']
    df_corr = df_stage[cols].corr()
    
    mask = np.zeros_like(df_corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    
    sns.heatmap(df_corr, 
                cmap = 'RdYlBu_r', 
                annot = True,
                mask=mask, 
                linewidths=.5,
                cbar_kws={"shrink": .5},
                vmin = -1,vmax = 1,
                ax=ax, cbar=False
               )    
    ax.set_title(stage)
    ax.grid(False)

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()  # 2D 배열을 1D로 변환

# for year in [2023, 2024]:
# year = 2024
for i, stage in enumerate(['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기']):
    # df_year = df[df['year'] == year]
    # draw_corr(stage, df_year, axes[i])
    draw_corr(stage, df, axes[i])
plt.tight_layout()
plt.show()

In [None]:
# def draw_corr(stage, df_corr):
#     # fig, ax = plt.subplots( figsize=(7,7) )
#     
#     mask = np.zeros_like(df_corr, dtype=np.bool)
#     mask[np.triu_indices_from(mask)] = True
#     
#     sns.heatmap(df_corr, 
#                 cmap = 'RdYlBu_r', 
#                 annot = True,
#                 mask=mask, 
#                 linewidths=.5,
#                 cbar_kws={"shrink": .5},
#                 vmin = -1,vmax = 1
#                )
#     plt.title(f"{stage} 변수 상관관계")
#     plt.grid(False)
#     plt.show()
# for stage in ['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기']:
    # df_stage = df[df['생육단계'] == stage]
    # df_stage = df_stage.dropna(axis=1, how='all')
    # cols = [col for col in df_stage.columns if col not in ['ID', 'date', 'plot', 'year', '생육단계', '생육단계_숫자']]
    # cols.reverse()
    # df_corr = df_stage[cols].corr()
    
    # draw_corr(stage, df_corr)

In [None]:
# df = df[df['year'] == 2024]
df = df.dropna(axis=1, how='all')
# cols = [col for col in df_stage.columns if col not in ['ID', 'date', 'plot', 'year', '생육단계', '생육단계_숫자']]
# cols.reverse()
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

cols = ['NDVI', 'CVI', 'GNDVI', 'RVI', 'NDRE', 'SPAD', 'LAI', '초장(cm)']
df_corr = df[cols].corr()

mask = np.zeros_like(df_corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# 히트맵 생성
ax = sns.heatmap(df_corr, 
                 cmap='RdYlBu_r', 
                 annot=True,
                 mask=mask, 
                 linewidths=.5,
                 cbar_kws={"shrink": .5},
                 vmin=-1, vmax=1,
                 cbar=False)

# 각 텍스트의 색상 설정
for text in ax.texts:
    # 텍스트 값에 따라 색상 결정
    if text.get_text() in ['NDVI', 'CVI', 'GNDVI', 'RVI', 'NDRE']:
        text.set_color('green')  # 초록색
    elif text.get_text() in ['SPAD', 'LAI', '초장(cm)']:
        text.set_color('orange')  # 주황색

plt.grid(False)
plt.show()


In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import pearsonr, linregress
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


def calculate_r2(x, y):
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    return r_value ** 2


def upper_r2(x, y, **kwargs):
    r2 = calculate_r2(x, y)
    ax = plt.gca()
    ax.annotate(f'{r2:.2f}', xy=(0.5, 0.5), xycoords='axes fraction',
                ha='center', va='center', fontsize=12, color='black')


def lower_scatter_with_reg(x, y, **kwargs):
    ax = plt.gca()
    sns.regplot(x=x, y=y, ax=ax, scatter_kws={'s': 10}, line_kws={"color": "red"})


def corrfunc(x, y, **kws):
    r = np.corrcoef(x, y)[0, 1]
    plt.gca().annotate(f"{r:.2f}", xy=(0.5, 0.5), xycoords="axes fraction",
                       ha="center", va="center", fontsize=12)


def lower_scatter_with_reg2(x, y, **kwargs):
    ax = plt.gca()
    sns.regplot(x=x, y=y, ax=ax, scatter_kws={'s': 10}, line_kws={"color": "red"})


def pair_plot(data):
    # combine_info = {
    #     "": ["TMX", "TMN", "SRAD", "WSPD", "RHUM", "PRCP"],
    #     "Rainfall Days": ["r20", "r30", "r50", "r80", "PRCP"],
    #     "Total Rainfall": ["r20_sum", "r30_sum", "r50_sum", "r80_sum", "PRCP"]
    # }
    values = ['NDVI', 'CVI', 'GNDVI', 'RVI', 'NDRE', 'SPAD', 'LAI', '초장(cm)']
    # for key, values in combine_info.items():
    df = data[values]
    g = sns.PairGrid(df, vars=values)
    g.map_upper(upper_r2)
    g.map_lower(lower_scatter_with_reg)
    g.map_diag(sns.histplot, kde=True, color="gold", bins=10)
    plt.show()
    
        # r2_save_path = os.path.join(save_folder, f"r2_{key}.png")
        # plt.savefig(r2_save_path)
        # print("Save:", r2_save_path)
    
    
pair_plot(df)

In [None]:


y_variables = ['초장(cm)', 'LAI', 'SPAD', 'NDVI', 'GNDVI', 'RVI', 'CVI', 'NDRE'] 

df_mean = df.groupby(['year', '생육단계', '시비'])[y_variables].mean().reset_index()


from pandas.api.types import CategoricalDtype
df_mean['생육단계'] = df_mean['생육단계'].astype(CategoricalDtype(categories=['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기'], ordered=True))
df_mean

In [None]:
growth_stages = ['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기']
df_mean['생육단계'] = pd.Categorical(df_mean['생육단계'], categories=growth_stages, ordered=True)

# 데이터 정렬
df_mean = df_mean.sort_values('생육단계')

In [None]:
colors = {'기비': 'green', '추비': 'orange'}
linestyles = {2023: '--', 2024: '-'}

# 그래프 그리기
fig, ax = plt.subplots(figsize=(8, 6))

for (year, 시비), group in df_mean.groupby(['year', '시비']):
    
    # group['생육단계'] = group['생육단계'].astype(CategoricalDtype(categories=['분얼전기', '분얼후기', '개화기', '개화후2주', '개화후4주', '수확기'], ordered=True))

    ax.plot(
        group['생육단계'],
        group['초장(cm)'],  # 'LAI'로 변경하려면 여기를 수정하세요.
        label=f'{year} - {시비}',
        color=colors[시비],
        linestyle=linestyles[year],
        marker='o'  # 각 지점을 강조하려면 추가
    )

ax.set_xlabel('생육단계')
ax.set_ylabel('초장(cm)')
ax.set_title('생육단계별 초장')
ax.legend()
plt.show()