# 중복 데이터 처리

In [1]:
import pandas as pd

In [3]:
df = pd.DataFrame(
    {
        'c1' : ['a', 'a', 'b', 'a', 'b'],
        'c2' : [1,1,1,2,2],
        'c3' : [1,1,2,2,2]
    }
)
df

Unnamed: 0,c1,c2,c3
0,a,1,1
1,a,1,1
2,b,1,2
3,a,2,2
4,b,2,2


In [4]:
df.duplicated()

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [5]:
df['c1'].duplicated()

0    False
1     True
2    False
3     True
4     True
Name: c1, dtype: bool

In [6]:
df.drop_duplicates()

Unnamed: 0,c1,c2,c3
0,a,1,1
2,b,1,2
3,a,2,2
4,b,2,2


In [8]:
df.drop_duplicates(subset=['c1', 'c2'])

Unnamed: 0,c1,c2,c3
0,a,1,1
2,b,1,2
3,a,2,2
4,b,2,2


# 데이터 표준화

In [9]:
df = pd.read_csv('https://raw.githubusercontent.com/leekyuyoung20221226/python/main/data/auto-mpg.csv', header=None)

In [10]:
df.columns = ['mpeg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 'name']

In [11]:
df

Unnamed: 0,mpeg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.00,2790.0,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52.00,2130.0,24.6,82,2,vw pickup
395,32.0,4,135.0,84.00,2295.0,11.6,82,1,dodge rampage
396,28.0,4,120.0,79.00,2625.0,18.6,82,1,ford ranger


In [12]:
mpg_to_kpl = 1.6093 / 3.78541
df['kpl'] = (df['mpeg'] * mpg_to_kpl).round(2)

In [13]:
df.head()

Unnamed: 0,mpeg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name,kpl
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu,7.65
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320,6.38
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite,7.65
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst,6.8
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino,7.23


# 자료형 변환
    숫자가 문자열(object)로 저장된 경우 숫자형으로 변환
    각 열의 자료형을 확인 - info() datatypes

In [14]:
df.dtypes

mpeg            float64
cylinders         int64
displacement    float64
horsepower       object
weight          float64
acceleration    float64
model year        int64
origin            int64
name             object
kpl             float64
dtype: object

In [15]:
# model year, origin은 범주형 데이터
# horsepower는 수치형
# tip, csv로 가져올 때 원래 수치형 데이터였는데 컬럼중에 문자열이 하나라도 포함되어 있으면 object로 변환해서 가져온다
# 확인방법

In [16]:
df['horsepower'].unique()

array(['130.0', '165.0', '150.0', '140.0', '198.0', '220.0', '215.0',
       '225.0', '190.0', '170.0', '160.0', '95.00', '97.00', '85.00',
       '88.00', '46.00', '87.00', '90.00', '113.0', '200.0', '210.0',
       '193.0', '?', '100.0', '105.0', '175.0', '153.0', '180.0', '110.0',
       '72.00', '86.00', '70.00', '76.00', '65.00', '69.00', '60.00',
       '80.00', '54.00', '208.0', '155.0', '112.0', '92.00', '145.0',
       '137.0', '158.0', '167.0', '94.00', '107.0', '230.0', '49.00',
       '75.00', '91.00', '122.0', '67.00', '83.00', '78.00', '52.00',
       '61.00', '93.00', '148.0', '129.0', '96.00', '71.00', '98.00',
       '115.0', '53.00', '81.00', '79.00', '120.0', '152.0', '102.0',
       '108.0', '68.00', '58.00', '149.0', '89.00', '63.00', '48.00',
       '66.00', '139.0', '103.0', '125.0', '133.0', '138.0', '135.0',
       '142.0', '77.00', '62.00', '132.0', '84.00', '64.00', '74.00',
       '116.0', '82.00'], dtype=object)

In [18]:
import numpy as np
df['horsepower'].replace('?', np.nan, inplace=True)

In [19]:
df[df['horsepower'].isnull()]

Unnamed: 0,mpeg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name,kpl
32,25.0,4,98.0,,2046.0,19.0,71,1,ford pinto,10.63
126,21.0,6,200.0,,2875.0,17.0,74,1,ford maverick,8.93
330,40.9,4,85.0,,1835.0,17.3,80,2,renault lecar deluxe,17.39
336,23.6,4,140.0,,2905.0,14.3,80,1,ford mustang cobra,10.03
354,34.5,4,100.0,,2320.0,15.8,81,2,renault 18i,14.67
374,23.0,4,151.0,,3035.0,20.5,82,1,amc concord dl,9.78


In [20]:
df.dropna(subset=['horsepower'], axis=0,inplace=True)

In [21]:
df.horsepower.unique()

array(['130.0', '165.0', '150.0', '140.0', '198.0', '220.0', '215.0',
       '225.0', '190.0', '170.0', '160.0', '95.00', '97.00', '85.00',
       '88.00', '46.00', '87.00', '90.00', '113.0', '200.0', '210.0',
       '193.0', '100.0', '105.0', '175.0', '153.0', '180.0', '110.0',
       '72.00', '86.00', '70.00', '76.00', '65.00', '69.00', '60.00',
       '80.00', '54.00', '208.0', '155.0', '112.0', '92.00', '145.0',
       '137.0', '158.0', '167.0', '94.00', '107.0', '230.0', '49.00',
       '75.00', '91.00', '122.0', '67.00', '83.00', '78.00', '52.00',
       '61.00', '93.00', '148.0', '129.0', '96.00', '71.00', '98.00',
       '115.0', '53.00', '81.00', '79.00', '120.0', '152.0', '102.0',
       '108.0', '68.00', '58.00', '149.0', '89.00', '63.00', '48.00',
       '66.00', '139.0', '103.0', '125.0', '133.0', '138.0', '135.0',
       '142.0', '77.00', '62.00', '132.0', '84.00', '64.00', '74.00',
       '116.0', '82.00'], dtype=object)

In [22]:
df.origin.unique()

array([1, 3, 2], dtype=int64)

In [23]:
df['origin_obj'] = df['origin'].replace({1:'USA', 2:'EU', 3:'JPN'})

In [24]:
df.head()

Unnamed: 0,mpeg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name,kpl,origin_obj
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu,7.65,USA
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320,6.38,USA
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite,7.65,USA
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst,6.8,USA
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino,7.23,USA


In [25]:
df['origin_obj'] = df['origin_obj'].astype('category')

In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 397
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   mpeg          392 non-null    float64 
 1   cylinders     392 non-null    int64   
 2   displacement  392 non-null    float64 
 3   horsepower    392 non-null    object  
 4   weight        392 non-null    float64 
 5   acceleration  392 non-null    float64 
 6   model year    392 non-null    int64   
 7   origin        392 non-null    int64   
 8   name          392 non-null    object  
 9   kpl           392 non-null    float64 
 10  origin_obj    392 non-null    category
dtypes: category(1), float64(5), int64(3), object(2)
memory usage: 34.2+ KB


In [27]:
df['model year'].sample(3)

279    78
313    80
117    73
Name: model year, dtype: int64

In [34]:
df.horsepower = df.horsepower.astype('float')

# 범주형 카테고리 데이터 처리
    구간을 정하기 애매할때

In [35]:
count, bin_divide = np.histogram(df.horsepower, bins=3)

In [37]:
bin_names = ['저출력', '보통출력', '고출력']
df['hp_bin'] = pd.cut(x=df['horsepower'],
                      bins=bin_divide,
                      labels=bin_names,
                      include_lowest=True
                     )
df.head()

Unnamed: 0,mpeg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,name,kpl,origin_obj,hp_bin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu,7.65,USA,보통출력
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320,6.38,USA,보통출력
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite,7.65,USA,보통출력
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst,6.8,USA,보통출력
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino,7.23,USA,보통출력


In [38]:
df.hp_bin.unique()

['보통출력', '고출력', '저출력']
Categories (3, object): ['저출력' < '보통출력' < '고출력']

In [39]:
df[['horsepower','hp_bin']]

Unnamed: 0,horsepower,hp_bin
0,130.0,보통출력
1,165.0,보통출력
2,150.0,보통출력
3,150.0,보통출력
4,140.0,보통출력
...,...,...
393,86.0,저출력
394,52.0,저출력
395,84.0,저출력
396,79.0,저출력


# 더미변수
    hp_bin 같은 경우는 바로 머신러닝에서 사용불가, 수치가 아니기 때문에
    데이터의 속성은 제거하면서 어떤 특성(Feature)이 있는지 없는지 여부만 표시
    0과 1로 표현
    010
    100
    001

In [41]:
horsepower_dums = pd.get_dummies(df['hp_bin'])

In [43]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.2.0-cp310-cp310-win_amd64.whl (8.2 MB)
     ---------------------------------------- 8.2/8.2 MB 13.2 MB/s eta 0:00:00
Collecting joblib>=1.1.1
  Using cached joblib-1.2.0-py3-none-any.whl (297 kB)
Collecting threadpoolctl>=2.0.0
  Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Collecting scipy>=1.3.2
  Downloading scipy-1.10.0-cp310-cp310-win_amd64.whl (42.5 MB)
     ---------------------------------------- 42.5/42.5 MB 8.4 MB/s eta 0:00:00
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.2.0 scikit-learn-1.2.0 scipy-1.10.0 threadpoolctl-3.1.0


In [44]:
from sklearn import preprocessing

In [45]:
label_encoder = preprocessing.LabelEncoder()
onehot_encoder = preprocessing.OneHotEncoder()

In [46]:
label_encoder.fit_transform(df['hp_bin'])

array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 1, 1, 0, 0, 0,
       1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 1, 1, 1,
       0, 1, 1, 0, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 1, 0, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2,
       1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,
       2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2,

In [47]:
# 2차원 배열 형태로 변환
oneshot_labeled = label_encoder.fit_transform(df['hp_bin'])
oneshot_labeled[:5]

array([1, 1, 1, 1, 1])

In [49]:
oneshot_reshaped = oneshot_labeled.reshape(-1,1)
oneshot_reshaped[:5]

array([[1],
       [1],
       [1],
       [1],
       [1]])

In [50]:
len(oneshot_reshaped)

392

In [52]:
oneshot_fitted = onehot_encoder.fit_transform(oneshot_reshaped)
print(oneshot_fitted)
print(type(oneshot_fitted))

  (0, 1)	1.0
  (1, 1)	1.0
  (2, 1)	1.0
  (3, 1)	1.0
  (4, 1)	1.0
  (5, 0)	1.0
  (6, 0)	1.0
  (7, 0)	1.0
  (8, 0)	1.0
  (9, 0)	1.0
  (10, 0)	1.0
  (11, 1)	1.0
  (12, 1)	1.0
  (13, 0)	1.0
  (14, 2)	1.0
  (15, 2)	1.0
  (16, 2)	1.0
  (17, 2)	1.0
  (18, 2)	1.0
  (19, 2)	1.0
  (20, 2)	1.0
  (21, 2)	1.0
  (22, 2)	1.0
  (23, 1)	1.0
  (24, 2)	1.0
  :	:
  (367, 2)	1.0
  (368, 2)	1.0
  (369, 2)	1.0
  (370, 2)	1.0
  (371, 2)	1.0
  (372, 2)	1.0
  (373, 2)	1.0
  (374, 2)	1.0
  (375, 2)	1.0
  (376, 2)	1.0
  (377, 2)	1.0
  (378, 2)	1.0
  (379, 2)	1.0
  (380, 1)	1.0
  (381, 2)	1.0
  (382, 2)	1.0
  (383, 1)	1.0
  (384, 2)	1.0
  (385, 2)	1.0
  (386, 2)	1.0
  (387, 2)	1.0
  (388, 2)	1.0
  (389, 2)	1.0
  (390, 2)	1.0
  (391, 2)	1.0
<class 'scipy.sparse._csr.csr_matrix'>


# 정규화

In [53]:
df.horsepower = df.horsepower / df.horsepower.max()

In [54]:
df.horsepower.describe()

count    392.000000
mean       0.454215
std        0.167353
min        0.200000
25%        0.326087
50%        0.406522
75%        0.547826
max        1.000000
Name: horsepower, dtype: float64

In [55]:
# 최대값 - 최소값의 값으로 각 데이터를 나눈다
# 각열의 값 - 최소값 / 최대값 - 최소값
# (x - min) / (max - min)

In [57]:
weight_min = df.weight.min()
weight_max = df.weight.max()
weight_minmax = weight_max - weight_min

In [59]:
df.weight = (df.weight -weight_min) / weight_minmax

In [60]:
df.weight.describe()

count    392.000000
mean       0.386897
std        0.240829
min        0.000000
25%        0.173589
50%        0.337539
75%        0.567550
max        1.000000
Name: weight, dtype: float64

# 시계열 데이터
    시간의 개념을 가진 연속적인 데이터로 순서가 있으며 순서가 바뀌면 의미도 바뀐다
    Timestamp: 특정한 시점을 기록
    Period: 두 지점 사이의 일정한 기간

In [62]:
df = pd.read_csv('https://raw.githubusercontent.com/leekyuyoung20221226/python/main/data/stock-data.csv')
df.head()

Unnamed: 0,Date,Close,Start,High,Low,Volume
0,2018-07-02,10100,10850,10900,10000,137977
1,2018-06-29,10700,10550,10900,9990,170253
2,2018-06-28,10400,10900,10950,10150,155769
3,2018-06-27,10900,10800,11050,10500,133548
4,2018-06-26,10800,10900,11000,10700,63039


In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    20 non-null     object
 1   Close   20 non-null     int64 
 2   Start   20 non-null     int64 
 3   High    20 non-null     int64 
 4   Low     20 non-null     int64 
 5   Volume  20 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 1.1+ KB


In [66]:
#df.Date.astype(np.datetime64)
pd.to_datetime(df.Date)

0    2018-07-02
1    2018-06-29
2    2018-06-28
3    2018-06-27
4    2018-06-26
5    2018-06-25
6    2018-06-22
7    2018-06-21
8    2018-06-20
9    2018-06-19
10   2018-06-18
11   2018-06-15
12   2018-06-14
13   2018-06-12
14   2018-06-11
15   2018-06-08
16   2018-06-07
17   2018-06-05
18   2018-06-04
19   2018-06-01
Name: Date, dtype: datetime64[ns]

In [67]:
df['new_Data'] = df.Date.astype(np.datetime64)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      20 non-null     object        
 1   Close     20 non-null     int64         
 2   Start     20 non-null     int64         
 3   High      20 non-null     int64         
 4   Low       20 non-null     int64         
 5   Volume    20 non-null     int64         
 6   new_Data  20 non-null     datetime64[ns]
dtypes: datetime64[ns](1), int64(5), object(1)
memory usage: 1.2+ KB


In [69]:
# datetime열을 인덱스열로 변경하고 date열을 삭제
df.set_index('new_Data', inplace=True)

In [70]:
df.drop('Date', axis=1, inplace=True)

In [71]:
df

Unnamed: 0_level_0,Close,Start,High,Low,Volume
new_Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-07-02,10100,10850,10900,10000,137977
2018-06-29,10700,10550,10900,9990,170253
2018-06-28,10400,10900,10950,10150,155769
2018-06-27,10900,10800,11050,10500,133548
2018-06-26,10800,10900,11000,10700,63039
2018-06-25,11150,11400,11450,11000,55519
2018-06-22,11300,11250,11450,10750,134805
2018-06-21,11200,11350,11750,11200,133002
2018-06-20,11550,11200,11600,10900,308596
2018-06-19,11300,11850,11950,11300,180656


In [72]:
# period로 변환 pd.to_period

In [74]:
dates = ['2019-01-01', '2020-03-01', '2021-06-01']
ts_dates = pd.to_datetime(dates)
ts_dates

DatetimeIndex(['2019-01-01', '2020-03-01', '2021-06-01'], dtype='datetime64[ns]', freq=None)

In [75]:
ts_dates.to_period(freq='D')

PeriodIndex(['2019-01-01', '2020-03-01', '2021-06-01'], dtype='period[D]')

In [76]:
ts_dates.to_period(freq='M')

PeriodIndex(['2019-01', '2020-03', '2021-06'], dtype='period[M]')

In [77]:
ts_dates.to_period(freq='A')

PeriodIndex(['2019', '2020', '2021'], dtype='period[A-DEC]')

In [78]:
df.index.to_period(freq='M')

PeriodIndex(['2018-07', '2018-06', '2018-06', '2018-06', '2018-06', '2018-06',
             '2018-06', '2018-06', '2018-06', '2018-06', '2018-06', '2018-06',
             '2018-06', '2018-06', '2018-06', '2018-06', '2018-06', '2018-06',
             '2018-06', '2018-06'],
            dtype='period[M]', name='new_Data')

# 시계열 데이터 만들기

Timestamp 배열

In [79]:
ts_ms = pd.date_range(start='2022-01-01', end=None, periods=6, freq='MS', tz='Asia/Seoul')
ts_ms

DatetimeIndex(['2022-01-01 00:00:00+09:00', '2022-02-01 00:00:00+09:00',
               '2022-03-01 00:00:00+09:00', '2022-04-01 00:00:00+09:00',
               '2022-05-01 00:00:00+09:00', '2022-06-01 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='MS')

In [80]:
ts_ms = pd.date_range(start='2022-01-01', end=None, periods=6, freq='M', tz='Asia/Seoul')
ts_ms

DatetimeIndex(['2022-01-31 00:00:00+09:00', '2022-02-28 00:00:00+09:00',
               '2022-03-31 00:00:00+09:00', '2022-04-30 00:00:00+09:00',
               '2022-05-31 00:00:00+09:00', '2022-06-30 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='M')

Period 배열

In [81]:
# 1개월 길이로 Period 배열 만들기
pr_m = pd.period_range(start='2023-01-01', end=None, periods=3, freq='M')
pr_m

PeriodIndex(['2023-01', '2023-02', '2023-03'], dtype='period[M]')

In [82]:
pr_m = pd.period_range(start='2023-01-01', end=None, periods=3, freq='H')
pr_m

PeriodIndex(['2023-01-01 00:00', '2023-01-01 01:00', '2023-01-01 02:00'], dtype='period[H]')

# 시계열 데이터 활용

In [83]:
df = pd.read_csv('https://raw.githubusercontent.com/leekyuyoung20221226/python/main/data/stock-data.csv')

year month day 각각 컬럼을 만들어서 날짜를 분리

In [87]:
df['new_date'] = pd.to_datetime(df['Date'])
df['year'] = df['new_date'].dt.year
df['month'] = df['new_date'].dt.month
df['day'] = df['new_date'].dt.day
df

Unnamed: 0,Date,Close,Start,High,Low,Volume,new_date,year,month,day
0,2018-07-02,10100,10850,10900,10000,137977,2018-07-02,2018,7,2
1,2018-06-29,10700,10550,10900,9990,170253,2018-06-29,2018,6,29
2,2018-06-28,10400,10900,10950,10150,155769,2018-06-28,2018,6,28
3,2018-06-27,10900,10800,11050,10500,133548,2018-06-27,2018,6,27
4,2018-06-26,10800,10900,11000,10700,63039,2018-06-26,2018,6,26
5,2018-06-25,11150,11400,11450,11000,55519,2018-06-25,2018,6,25
6,2018-06-22,11300,11250,11450,10750,134805,2018-06-22,2018,6,22
7,2018-06-21,11200,11350,11750,11200,133002,2018-06-21,2018,6,21
8,2018-06-20,11550,11200,11600,10900,308596,2018-06-20,2018,6,20
9,2018-06-19,11300,11850,11950,11300,180656,2018-06-19,2018,6,19
