In [12]:
import pandas as pd
import json
import matplotlib.pyplot as plt

---
# 데이터 로드 및 전처리
---

In [4]:
def load_data():
    # 매매기준율 데이터 로드 및 전처리 코드
    df = pd.read_csv('../sql_data/mama.csv', sep='\t', dtype=str)
    df.columns = ['createdAt,data']
    df = df['createdAt,data'].str.split(',', n=1, expand=True)
    df.columns = ['createdAt', 'data']
    
    # JSON 파싱 함수
    def parse_json(json_str, created_at=None):
        try:
            # 앞부분 따옴표 제거
            json_str = json_str.replace('"{"result":', '{"result":')
            # 뒷부분 따옴표 제거
            if json_str.endswith('}]}"'): # '}]}"'로 끝나는지 확인
                json_str = json_str[:-1]
            data = json.loads(json_str)
            result_df = pd.DataFrame(data['result'])
            # 시간 추가
            if created_at is not None:
                result_df['createdAt'] = created_at
            return result_df
        except Exception as e:
            return None

    # 전체 데이터 처리
    parsed_data = []
    for _, row in df.iterrows(): # 각 행 순회
        result = parse_json(row['data'], row['createdAt'])
        if result is not None:
            result['createdAt'] = pd.to_datetime(result['createdAt'], format='%Y-%m-%d %H:%M:%S') + pd.Timedelta(hours=9) # UTC -> KST
            parsed_data.append(result)
    
    final_df = pd.concat(parsed_data, ignore_index=True)
    
    return final_df

In [8]:
df = load_data()
df.describe()

Unnamed: 0,cashSellingPrice,cashBuyingPrice,ttBuyingPrice,ttSellingPrice,currencyUnit,basePrice,createdAt
count,741384.0,741384.0,741384.0,741384.0,741384.0,741384.0,741384
mean,587.819453,649.157568,531.38732,528.506112,8.071562,632.249202,2025-01-19 19:34:17.108333056
min,0.0,0.0,0.0,0.0,1.0,0.41,2025-01-03 02:00:57
25%,8.05,9.83,3.6,0.0,1.0,28.87,2025-01-13 23:34:10
50%,186.815,206.345,125.02,69.94,1.0,200.31,2025-01-20 02:44:10
75%,914.63,947.21,809.53,825.87,1.0,930.92,2025-01-26 05:52:09
max,4397.4,5090.46,4731.99,4827.57,100.0,4779.78,2025-02-01 08:58:09
std,928.92872,1054.682761,969.927311,994.827912,25.496638,991.349329,


In [10]:
usd = df[df['currencyCode'] == 'USD']
usd = usd[['currencyCode', 'basePrice', 'createdAt']]
usd

Unnamed: 0,currencyCode,basePrice,createdAt
0,USD,1473.0,2025-01-03 02:00:57
42,USD,1473.0,2025-01-03 02:05:09
84,USD,1473.0,2025-01-03 02:10:09
126,USD,1473.0,2025-01-03 02:15:09
168,USD,1473.0,2025-01-03 02:20:10
...,...,...,...
741174,USD,1457.5,2025-02-01 08:50:10
741216,USD,1457.5,2025-02-01 08:52:09
741258,USD,1457.5,2025-02-01 08:54:09
741300,USD,1457.5,2025-02-01 08:56:09


In [11]:
jpy = df[df['currencyCode'] == 'JPY']
jpy = jpy[['currencyCode', 'basePrice', 'createdAt']]
jpy

Unnamed: 0,currencyCode,basePrice,createdAt
1,JPY,934.38,2025-01-03 02:00:57
43,JPY,934.38,2025-01-03 02:05:09
85,JPY,934.38,2025-01-03 02:10:09
127,JPY,934.38,2025-01-03 02:15:09
169,JPY,934.38,2025-01-03 02:20:10
...,...,...,...
741175,JPY,939.35,2025-02-01 08:50:10
741217,JPY,939.35,2025-02-01 08:52:09
741259,JPY,939.35,2025-02-01 08:54:09
741301,JPY,939.35,2025-02-01 08:56:09


---
# 시계열 시각화
---