# 삼성전자 주가 데이터로 데이터 분석 실습하기

In [None]:
# 여러 금융 데이터를 pandas의 DataFrame으로 다룰 수 있게 해주는 라이브러리
! pip install pandas-datareader

In [None]:
# yfinance는 주식 및 금융 데이터를 Yahoo Finance에서 직접 가져오는 Python 라이브러리
! pip install yfinance

In [28]:
# https://finance.yahoo.com/
import pandas as pd
import yfinance as yfin

In [29]:
# 삼성전자(005930.KS) 주식 데이터를 Yahoo Finance에서 가져오기

ticker = '005930.KS'      # 삼성전자 티커 (한국 거래소)
start_date = '2024-01-01' # 불러올 데이터 기간의 시작일자
end_date = '2025-01-01'   # 불러올 데이터 기간의 끝일자

# 주식 데이터 다운로드
df = yfin.download(ticker, start = start_date, end = end_date)

[*********************100%***********************]  1 of 1 completed


In [30]:
df.info()

# 총 5개의 컬럼과 244개의 데이터 수집

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 244 entries, 2024-01-02 to 2024-12-30
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   (Close, 005930.KS)   244 non-null    float64
 1   (High, 005930.KS)    244 non-null    float64
 2   (Low, 005930.KS)     244 non-null    float64
 3   (Open, 005930.KS)    244 non-null    float64
 4   (Volume, 005930.KS)  244 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 11.4 KB


In [31]:
# 주가 데이터를 CSV 파일로 저장
df.to_csv('data2.csv', index=True)

In [38]:
# 내용 확인(첫 5줄)
df.head()

Price,Close,High,Low,Open,Volume
Ticker,005930.KS,005930.KS,005930.KS,005930.KS,005930.KS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-01-02,78447.890625,78644.995878,77068.153855,77068.153855,17142847
2024-01-03,75885.515625,77659.462744,75885.515625,77363.80489,21753644
2024-01-04,75491.3125,76181.180891,74998.549364,74998.549364,15324439
2024-01-05,75491.3125,75984.075636,75294.207245,75589.865127,11304316
2024-01-08,75392.757812,76378.284058,75294.205188,75885.520935,11088724


In [32]:
# 데이터 프레임의 평균, 표준편차 등 주요 통계 지표 확인
df.describe()

# Ticker: 005930.KS는 삼성전자의 주식 코드
# Open: 그 날 주식 거래 시작 가격
# High: 그 날 주식의 최고 가격
# Low: 그 날 주식의 최저 가격
# Close: 그 날 주식의 종가(마지막 거래 가격)
# Volume: 그 날 거래된 삼성전자 주식의 수

Price,Close,High,Low,Open,Volume
Ticker,005930.KS,005930.KS,005930.KS,005930.KS,005930.KS
count,244.0,244.0,244.0,244.0,244.0
mean,71040.110448,71901.674625,70386.794946,71180.318776,21696580.0
std,9823.50418,9796.151104,9748.877544,9858.475159,9397620.0
min,49900.0,51800.0,49900.0,50200.0,2957915.0
25%,61450.0,62759.691385,61250.0,61739.294466,14913470.0
50%,73323.15625,74111.578116,72633.280328,73671.455015,19597440.0
75%,77925.183594,78768.186645,77365.906988,78297.761061,25876800.0
max,87310.109375,88304.531036,86614.015109,88006.204918,57691270.0


In [33]:
# 종가 기준 최대값 구하기
# 삼성전자의 1년 간 종가의 최고값
df['Close'].max()

Ticker
005930.KS    87310.109375
dtype: float64

In [34]:
# 종가 기준 최소값
# 삼성전자의 1년 간 종가의 최소값
df['Close'].min()

Ticker
005930.KS    49900.0
dtype: float64

In [35]:
# 종가 기준 평균값
df['Close'].mean()

Ticker
005930.KS    71040.110448
dtype: float64

In [36]:
# 특정 컬럼만 선택
df[['High', 'Low']]

Price,High,Low
Ticker,005930.KS,005930.KS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2024-01-02,78644.995878,77068.153855
2024-01-03,77659.462744,75885.515625
2024-01-04,76181.180891,74998.549364
2024-01-05,75984.075636,75294.207245
2024-01-08,76378.284058,75294.205188
...,...,...
2024-12-23,54000.000000,53300.000000
2024-12-24,54500.000000,53600.000000
2024-12-26,54600.000000,53500.000000
2024-12-27,54100.000000,53200.000000


In [46]:
# 시작가 대비 종가가 높았던 날의 데이터 선택
df[df[('Open', '005930.KS')] < df[('Close', '005930.KS')]]

Price,Close,High,Low,Open,Volume
Ticker,005930.KS,005930.KS,005930.KS,005930.KS,005930.KS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-01-02,78447.890625,78644.995878,77068.153855,77068.153855,17142847
2024-01-04,75491.312500,76181.180891,74998.549364,74998.549364,15324439
2024-01-11,72140.523438,72534.733948,71647.760299,71844.865555,57691266
2024-01-12,72041.968750,73027.494998,71746.310876,71943.416125,13038939
2024-01-15,72830.390625,72928.943251,72140.522243,72140.522243,2957915
...,...,...,...,...,...
2024-12-18,54900.000000,55400.000000,54000.000000,54100.000000,13698937
2024-12-20,53000.000000,53100.000000,51900.000000,52700.000000,24674774
2024-12-23,53500.000000,54000.000000,53300.000000,53400.000000,13672650
2024-12-24,54400.000000,54500.000000,53600.000000,53700.000000,11634677


In [54]:
# 종가 기준 주가가 가장 높았던 날의 데이터 선택
df[df['Close', '005930.KS'] == max(df['Close', '005930.KS'])]

Price,Close,High,Low,Open,Volume
Ticker,005930.KS,005930.KS,005930.KS,005930.KS,005930.KS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-07-09,87310.109375,87707.877527,86415.131033,87310.109375,21336201
2024-07-10,87310.109375,87508.993451,86614.015109,87111.225299,17813847


In [56]:
# 종가 기준 주가가 90,000원 이상이었던 날짜를 반환
df[df['Close'] > 90000].index

DatetimeIndex(['2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05',
               '2024-01-08', '2024-01-09', '2024-01-10', '2024-01-11',
               '2024-01-12', '2024-01-15',
               ...
               '2024-12-16', '2024-12-17', '2024-12-18', '2024-12-19',
               '2024-12-20', '2024-12-23', '2024-12-24', '2024-12-26',
               '2024-12-27', '2024-12-30'],
              dtype='datetime64[ns]', name='Date', length=244, freq=None)

In [60]:
# 2024년 1월 15일의 고가와 저가 값을 반환
# loc = 라벨 기반 인덱싱
df.loc['2024-01-15', ['High', 'Low']]

Price  Ticker   
High   005930.KS    72928.943251
Low    005930.KS    72140.522243
Name: 2024-01-15 00:00:00, dtype: float64

In [63]:
# 종가 데이터를 가격이 높은 순서대로 정렬
df_sorted = df.sort_values(by=('Close', '005930.KS'), ascending=False)
df_sorted

Price,Close,High,Low,Open,Volume
Ticker,005930.KS,005930.KS,005930.KS,005930.KS,005930.KS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-07-09,87310.109375,87707.877527,86415.131033,87310.109375,21336201
2024-07-10,87310.109375,87508.993451,86614.015109,87111.225299,17813847
2024-07-16,87210.671875,87508.998005,86216.251443,86415.135530,16166688
2024-07-11,87111.226562,88304.531036,86216.248207,88006.204918,24677608
2024-07-08,86912.343750,88105.648241,86415.133545,87409.553955,24035809
...,...,...,...,...,...
2024-12-04,53100.000000,53400.000000,52000.000000,52000.000000,29004766
2024-12-20,53000.000000,53100.000000,51900.000000,52700.000000,24674774
2024-11-12,53000.000000,54600.000000,53000.000000,54600.000000,37962881
2024-11-13,50600.000000,53000.000000,50500.000000,52000.000000,52527995
