## 5.1 pandas 함수 개요

### 가. pandas

In [1]:
import pandas as pd
from pandas import Series, DataFrame

In [2]:
a = Series([1,3,5,7])
print(a)

0    1
1    3
2    5
3    7
dtype: int64


In [3]:
print(a.values)

[1 3 5 7]


In [4]:
print(a.index)

RangeIndex(start=0, stop=4, step=1)


In [5]:
a2 = pd.Series([1,3,5,7], index=['a','b','c','d'])
print(a2)

a    1
b    3
c    5
d    7
dtype: int64


## 5.2 데이터 불러오기

In [6]:
df = pd.read_csv('./data/EX_GrapeData.csv')

In [7]:
df

Unnamed: 0,continent,brand,size,period,price
0,2,2,10.7,47.65,144
1,2,3,14.0,63.13,215
2,2,2,9.0,58.76,105
3,1,1,8.0,34.88,69
4,2,2,10.0,55.53,134
...,...,...,...,...,...
58,1,1,5.0,16.66,21.5
59,2,1,21.0,43.00,
60,2,2,5.0,12.00,
61,2,3,13.0,20.00,


In [8]:
df = pd.read_excel('./data/EX_GrapeData.xlsx')

In [9]:
df

Unnamed: 0,continent,brand,size,period,price
0,2,2,10.7,47.65,144.0
1,2,3,14.0,63.13,215.0
2,2,2,9.0,58.76,105.0
3,1,1,8.0,34.88,69.0
4,2,2,10.0,55.53,134.0
...,...,...,...,...,...
58,1,1,5.0,16.66,21.5
59,2,1,21.0,43.00,
60,2,2,5.0,12.00,
61,2,3,13.0,20.00,


### 나 DataFrame 확인하기

In [10]:
df.head()

Unnamed: 0,continent,brand,size,period,price
0,2,2,10.7,47.65,144.0
1,2,3,14.0,63.13,215.0
2,2,2,9.0,58.76,105.0
3,1,1,8.0,34.88,69.0
4,2,2,10.0,55.53,134.0


In [11]:
df.tail()

Unnamed: 0,continent,brand,size,period,price
58,1,1,5.0,16.66,21.5
59,2,1,21.0,43.0,
60,2,2,5.0,12.0,
61,2,3,13.0,20.0,
62,2,3,31.0,19.0,


In [12]:
df[1:5]

Unnamed: 0,continent,brand,size,period,price
1,2,3,14.0,63.13,215.0
2,2,2,9.0,58.76,105.0
3,1,1,8.0,34.88,69.0
4,2,2,10.0,55.53,134.0


In [13]:
df[:3]

Unnamed: 0,continent,brand,size,period,price
0,2,2,10.7,47.65,144.0
1,2,3,14.0,63.13,215.0
2,2,2,9.0,58.76,105.0


In [14]:
df[60:]

Unnamed: 0,continent,brand,size,period,price
60,2,2,5.0,12.0,
61,2,3,13.0,20.0,
62,2,3,31.0,19.0,


In [15]:
df['price']

0     144.0
1     215.0
2     105.0
3      69.0
4     134.0
      ...  
58     21.5
59      NaN
60      NaN
61      NaN
62      NaN
Name: price, Length: 63, dtype: float64

In [16]:
df[['price']]

Unnamed: 0,price
0,144.0
1,215.0
2,105.0
3,69.0
4,134.0
...,...
58,21.5
59,
60,
61,


In [17]:
df[df.columns[[0,2,4]]]

Unnamed: 0,continent,size,price
0,2,10.7,144.0
1,2,14.0,215.0
2,2,9.0,105.0
3,1,8.0,69.0
4,2,10.0,134.0
...,...,...,...
58,1,5.0,21.5
59,2,21.0,
60,2,5.0,
61,2,13.0,


In [18]:
df.loc[:,'size':'price']

Unnamed: 0,size,period,price
0,10.7,47.65,144.0
1,14.0,63.13,215.0
2,9.0,58.76,105.0
3,8.0,34.88,69.0
4,10.0,55.53,134.0
...,...,...,...
58,5.0,16.66,21.5
59,21.0,43.00,
60,5.0,12.00,
61,13.0,20.00,


In [19]:
df.iloc[1:7, 0:2]

Unnamed: 0,continent,brand
1,2,3
2,2,2
3,1,1
4,2,2
5,2,2
6,2,2


In [20]:
df.at[5, 'price']

129.0

## 5.3 데이터 변환하기

In [21]:
df_columns = df.copy()

In [22]:
df_columns.columns

Index(['continent', 'brand', 'size', 'period', 'price'], dtype='object')

In [23]:
df_columns = df_columns[['size','period','price']]

In [24]:
df_columns

Unnamed: 0,size,period,price
0,10.7,47.65,144.0
1,14.0,63.13,215.0
2,9.0,58.76,105.0
3,8.0,34.88,69.0
4,10.0,55.53,134.0
...,...,...,...
58,5.0,16.66,21.5
59,21.0,43.00,
60,5.0,12.00,
61,13.0,20.00,


In [25]:
df_columns.rename(columns={'period':'time'}, inplace=True)

In [26]:
df_columns.columns

Index(['size', 'time', 'price'], dtype='object')

In [27]:
df_columns['growth'] = df_columns['size'] / df_columns['time']

In [28]:
df_columns.head()

Unnamed: 0,size,time,price,growth
0,10.7,47.65,144.0,0.224554
1,14.0,63.13,215.0,0.221765
2,9.0,58.76,105.0,0.153165
3,8.0,34.88,69.0,0.229358
4,10.0,55.53,134.0,0.180083


In [30]:
df_continent_brand = df[(df['continent'] == 1) & (df['brand'] == 1)]

In [31]:
df_continent_brand.head()

Unnamed: 0,continent,brand,size,period,price
3,1,1,8.0,34.88,69.0
11,1,1,10.4,17.67,54.0
12,1,1,7.4,16.41,39.0
13,1,1,5.4,12.02,29.5
16,1,1,6.0,23.21,42.0


In [33]:
df_over_size_period = df[(df['size'] >= 10) & (df['period'] >= 30)]

In [34]:
df_over_size_period.head()

Unnamed: 0,continent,brand,size,period,price
0,2,2,10.7,47.65,144.0
1,2,3,14.0,63.13,215.0
4,2,2,10.0,55.53,134.0
5,2,2,10.5,43.14,129.0
6,2,2,16.0,54.86,155.0


In [35]:
df['brand'].value_counts()

2    24
1    23
3    16
Name: brand, dtype: int64

In [36]:
recode_brand = {"brand": {1:1, 2:1, 3:2}}

In [37]:
df_recode1 = df.replace(recode_brand)

In [38]:
df_recode1.head()

Unnamed: 0,continent,brand,size,period,price
0,2,1,10.7,47.65,144.0
1,2,2,14.0,63.13,215.0
2,2,1,9.0,58.76,105.0
3,1,1,8.0,34.88,69.0
4,2,1,10.0,55.53,134.0


In [39]:
df_recode1['brand'].value_counts()

1    47
2    16
Name: brand, dtype: int64

In [45]:
def brand_groups(series):
    if series == 1:
        return 1
    elif series == 2:
        return 1
    elif series == 3:
        return 2

In [46]:
df['re_brand'] = df['brand'].apply(brand_groups)

In [47]:
df.head()

Unnamed: 0,continent,brand,size,period,price,re_brand
0,2,2,10.7,47.65,144.0,1
1,2,3,14.0,63.13,215.0,2
2,2,2,9.0,58.76,105.0,1
3,1,1,8.0,34.88,69.0,1
4,2,2,10.0,55.53,134.0,1


In [48]:
df = pd.read_csv('./data/Ex_GrapeData.csv')

In [49]:
df.head()

Unnamed: 0,continent,brand,size,period,price
0,2,2,10.7,47.65,144
1,2,3,14.0,63.13,215
2,2,2,9.0,58.76,105
3,1,1,8.0,34.88,69
4,2,2,10.0,55.53,134


In [50]:
df_num = df.to_numpy()

In [51]:
df_num

array([[2, 2, 10.7, 47.65, '144'],
       [2, 3, 14.0, 63.13, '215'],
       [2, 2, 9.0, 58.76, '105'],
       [1, 1, 8.0, 34.88, '69'],
       [2, 2, 10.0, 55.53, '134'],
       [2, 2, 10.5, 43.14, '129'],
       [2, 2, 16.0, 54.86, '155'],
       [2, 1, 15.0, 44.14, '99'],
       [2, 1, 6.5, 17.46, '38.5'],
       [2, 1, 5.0, 21.04, '36.5'],
       [2, 2, 25.0, 109.38, '260'],
       [1, 1, 10.4, 17.67, '54'],
       [1, 1, 7.4, 16.41, '39'],
       [1, 1, 5.4, 12.02, '29.5'],
       [2, 2, 15.4, 49.48, '109'],
       [2, 1, 12.4, 48.74, '89.5'],
       [1, 1, 6.0, 23.21, '42'],
       [1, 1, 9.0, 28.64, '65'],
       [1, 3, 9.0, 44.95, '115'],
       [1, 1, 12.4, 23.77, '49.5'],
       [1, 1, 7.5, 20.21, '36.5'],
       [1, 3, 14.0, 32.62, '109'],
       [1, 1, 7.0, 17.84, '45'],
       [1, 2, 9.0, 22.82, '58'],
       [1, 2, 12.0, 29.48, '89'],
       [1, 1, 5.5, 15.61, '30'],
       [1, 2, 6.0, 13.25, '31'],
       [1, 3, 12.0, 45.78, '119'],
       [2, 1, 5.5, 26.53, '22'],
     

In [52]:
df_pd = pd.DataFrame(df_num)

In [54]:
df_pd.head()

Unnamed: 0,0,1,2,3,4
0,2,2,10.7,47.65,144
1,2,3,14.0,63.13,215
2,2,2,9.0,58.76,105
3,1,1,8.0,34.88,69
4,2,2,10.0,55.53,134


In [55]:
df_pd2 = pd.DataFrame(data=df_num, columns=['continent', 'brand', 'size', 'period', 'price'])
df_pd2.head()

Unnamed: 0,continent,brand,size,period,price
0,2,2,10.7,47.65,144
1,2,3,14.0,63.13,215
2,2,2,9.0,58.76,105
3,1,1,8.0,34.88,69
4,2,2,10.0,55.53,134
