In [1]:
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = 'NanumGothicCoding'

In [2]:
import pandas as pd
import numpy as np

In [3]:
df_cabbage = pd.read_csv("../data/2006to2015_chinise_cabbage.csv", encoding='cp949')
df_radish = pd.read_csv("../data/2006to2015_radish.csv", encoding='cp949')
df_pepper = pd.read_csv("../data/2006to2015_red_pepper.csv", encoding='cp949')

df_cabbage_weather = pd.read_csv("../data/2006to2015_chinise_cabbage_weather.csv", encoding='cp949')
df_radish_weather = pd.read_csv("../data/2006to2015_radish_weather.csv", encoding='cp949')
df_pepper_weather = pd.read_csv("../data/2006to2015_red_pepper_weather.csv", encoding='cp949')

In [39]:
# 원본 데이터를 다시 불러와야 할 경우를 대비해 복사해서 사용
df_cabbage_copied = df_cabbage.copy()
df_radish_copied = df_radish.copy()

In [40]:
# 컬럼 삭제 및 변경
def column_drop_and_change(df, species):
    if species == 0:
        change_columns = {'.1': '노지봄배추', '.3': '고랭지배추', '.5': '노지가을배추'}
        drop_columns = ['.2', '.4', '.6', '.7']
        drop_year_columns = list(map(str, np.arange(2006, 2016)))
        total_drop_columns = drop_year_columns.copy()
        
        # drop columns 구하기
        for dyc in drop_year_columns:
             for drop_col in drop_columns:
                    for col in df.columns:
                        if col == dyc+drop_col:
                            total_drop_columns.append(col)

        columns_to_change = df_cabbage_copied.drop(total_drop_columns, axis=1).columns
        df_cabbage_copied.drop(total_drop_columns, axis=1, inplace=True)

        for col_to_change in columns_to_change:
            for change_col in change_columns:
                if col_to_change[-2:] == change_col:
                    df.loc[0, col_to_change] = change_columns[change_col]
                    df.rename(columns={col_to_change:col_to_change[:-2]}, inplace=True)
                    
    elif species == 1:
        change_columns = {'.1': '노지봄무', '.3': '고랭지무', '.5': '노지가을무', '.7': '노지겨울무'}
        drop_columns = ['.2', '.4', '.6']
        drop_year_columns = list(map(str, np.arange(2006, 2016)))
        total_drop_columns = drop_year_columns.copy()
        
    
        # drop columns 구하기
        for dyc in drop_year_columns:
             for drop_col in drop_columns:
                    for col in df.columns:
                        if col == dyc+drop_col:
                            total_drop_columns.append(col)

        columns_to_change = df_radish_copied.drop(total_drop_columns, axis=1).columns
        df_radish_copied.drop(total_drop_columns, axis=1, inplace=True)

        for col_to_change in columns_to_change:
            for change_col in change_columns:
                if col_to_change[-2:] == change_col:
                    df.loc[0, col_to_change] = change_columns[change_col]
                    df.rename(columns={col_to_change:col_to_change[:-2]}, inplace=True)
    
    return df

In [41]:
df_cabbage_change_columns = column_drop_and_change(df_cabbage_copied, 0)
df_radish_change_columns = column_drop_and_change(df_radish_copied, 1)

In [42]:
# 인덱스로 지정할 컬럼도 바꿔주기
df_cabbage_change_columns.loc[0, '시도별'] = '품종'
df_cabbage_change_columns.rename(columns={'시도별':'연도별'}, inplace=True)
df_radish_change_columns.loc[0, '시도별'] = '품종'
df_radish_change_columns.rename(columns={'시도별':'연도별'}, inplace=True)

In [43]:
df_cabbage_transpose = df_cabbage_change_columns.set_index('연도별').T
df_radish_transpose = df_radish_change_columns.set_index('연도별').T

In [44]:
# 자료형 변환
def type_conversion(df):
    for col in df.columns[1:]:
        df[col] = df[col].astype(float)
    
    return df

In [45]:
df_cabbage_transpose = df_cabbage_transpose.drop('세종특별자치시', axis=1)

In [46]:
df_cabbage_transpose = type_conversion(df_cabbage_transpose)
df_radish_transpose = type_conversion(df_radish_transpose)

In [62]:
# 새로운 데이터 프레임 추출
df_cabbage_new = pd.DataFrame(columns=['생산 연도', '품종', '10a당 생산량', '지역'])
df_radish_new = pd.DataFrame(columns=['생산 연도', '품종', '10a당 생산량', '지역'])

In [63]:
def create_new_df(df_new, species):
    if species == 0:
        idx = 0
        spring_loacations = ['강원도', '충청남도', '전라남도', '전라북도', '경상북도']
        highlands_locations = ['강원도', '전라북도', '경상북도', '경상남도']
        autumn_locations = ['충청남도', '충청북도', '전라남도', '전라북도']

        for row_num in range(len(df_cabbage_transpose.index)):
            if df_cabbage_transpose.iloc[row_num]['품종'] == '노지봄배추':
                for locate in spring_loacations:
                    df_new.loc[idx] = [df_cabbage_transpose.index[row_num], df_cabbage_transpose.iloc[row_num]['품종'], df_cabbage_transpose.iloc[row_num][locate], locate]
                    idx += 1
            if df_cabbage_transpose.iloc[row_num]['품종'] == '고랭지배추':
                for locate in highlands_locations:
                    df_new.loc[idx] = [df_cabbage_transpose.index[row_num], df_cabbage_transpose.iloc[row_num]['품종'], df_cabbage_transpose.iloc[row_num][locate], locate]
                    idx += 1
            if df_cabbage_transpose.iloc[row_num]['품종'] == '노지가을배추':
                for locate in autumn_locations:
                    df_new.loc[idx] = [df_cabbage_transpose.index[row_num], df_cabbage_transpose.iloc[row_num]['품종'], df_cabbage_transpose.iloc[row_num][locate], locate]
                    idx += 1
    elif species == 1:
        idx = 0
        spring_loacations = ['경기도', '강원도', '충청남도', '전라남도', '경상남도']
        highlands_locations = ['강원도', '충청북도', '전라북도', '경상북도']
        autumn_locations = ['경기도', '강원도', '충청남도', '전라북도', '경상북도']
        winter_locations = ['제주도']

        for row_num in range(len(df_radish_transpose.index)):
            if df_radish_transpose.iloc[row_num]['품종'] == '노지봄무':
                for locate in spring_loacations:
                    df_new.loc[idx] = [df_radish_transpose.index[row_num], df_radish_transpose.iloc[row_num]['품종'], df_radish_transpose.iloc[row_num][locate], locate]
                    idx += 1
            if df_radish_transpose.iloc[row_num]['품종'] == '고랭지무':
                for locate in highlands_locations:
                    df_new.loc[idx] = [df_radish_transpose.index[row_num], df_radish_transpose.iloc[row_num]['품종'], df_radish_transpose.iloc[row_num][locate], locate]
                    idx += 1
            if df_radish_transpose.iloc[row_num]['품종'] == '노지가을무':
                for locate in autumn_locations:
                    df_new.loc[idx] = [df_radish_transpose.index[row_num], df_radish_transpose.iloc[row_num]['품종'], df_radish_transpose.iloc[row_num][locate], locate]
                    idx += 1
            if df_radish_transpose.iloc[row_num]['품종'] == '노지겨울무':
                for locate in winter_locations:
                    df_new.loc[idx] = [df_radish_transpose.index[row_num], df_radish_transpose.iloc[row_num]['품종'], df_radish_transpose.iloc[row_num][locate], locate]
                    idx += 1
                
    return df_new

In [64]:
df_cabbage_new = create_new_df(df_cabbage_new, 0)
df_radish_new = create_new_df(df_radish_new, 1)

In [65]:
df_cabbage_new

Unnamed: 0,생산 연도,품종,10a당 생산량,지역
0,2006,노지봄배추,4068.0,강원도
1,2006,노지봄배추,4428.0,충청남도
2,2006,노지봄배추,5781.0,전라남도
3,2006,노지봄배추,4462.0,전라북도
4,2006,노지봄배추,4607.0,경상북도
...,...,...,...,...
125,2015,고랭지배추,5317.0,경상남도
126,2015,노지가을배추,11161.0,충청남도
127,2015,노지가을배추,10961.0,충청북도
128,2015,노지가을배추,12265.0,전라남도


In [66]:
df_radish_new

Unnamed: 0,생산 연도,품종,10a당 생산량,지역
0,2006,노지봄무,3478.0,경기도
1,2006,노지봄무,3058.0,강원도
2,2006,노지봄무,3665.0,충청남도
3,2006,노지봄무,4370.0,전라남도
4,2006,노지봄무,3980.0,경상남도
...,...,...,...,...
140,2015,노지가을무,7153.0,강원도
141,2015,노지가을무,9360.0,충청남도
142,2015,노지가을무,10242.0,전라북도
143,2015,노지가을무,7574.0,경상북도
