In [2]:
pip install pandas

Collecting pandas
  Downloading pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl (11.3 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m:01[0m:01[0m
[?25hDownloading pytz-2024.1-py2.py3-none-any.whl (505 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m505.5/505.5 kB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.4/345.4 kB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytz, tzdata, pandas
Successful

In [3]:
import pandas as pd

In [7]:
import numpy as np
arr = np.arange(100, 110)
print(arr)

[100 101 102 103 104 105 106 107 108 109]


In [8]:
s = pd.Series(arr)
print(s)

0    100
1    101
2    102
3    103
4    104
5    105
6    106
7    107
8    108
9    109
dtype: int64


In [9]:
s = pd.Series(arr, dtype='int32')
print(s)

0    100
1    101
2    102
3    103
4    104
5    105
6    106
7    107
8    108
9    109
dtype: int32


In [10]:
person = ['kim', 'lee', 'park', 'hong']
s = pd.Series(person)

In [11]:
print(s)

0     kim
1     lee
2    park
3    hong
dtype: object


In [12]:
s = pd.Series([1, 2, 3, 4, 5, '99'])
print(s)

0     1
1     2
2     3
3     4
4     5
5    99
dtype: object


### indexing

In [13]:
person = ['kim', 'lee', 'park', 'hong']
s = pd.Series(person)

In [14]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [15]:
s[1]

'lee'

In [16]:
s[2]

'park'

In [None]:
s[-1]

In [19]:
s = pd.Series(person, index=['a', 'b', 'c', 'd'])
print(s)

a     kim
b     lee
c    park
d    hong
dtype: object


In [20]:
s['a']

'kim'

In [21]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [25]:
s.values

array(['kim', 'lee', 'park', 'hong'], dtype=object)

In [26]:
s.ndim

1

In [27]:
s.shape

(4,)

### NaN(Not A Number

In [28]:
s = pd.Series(person + [np.nan])
print(s)

0     kim
1     lee
2    park
3    hong
4     NaN
dtype: object


In [30]:
arr = np.arange(50, 55)
s = pd.Series(arr, dtype='float')
print(s)

0    50.0
1    51.0
2    52.0
3    53.0
4    54.0
dtype: float64


In [35]:
f = ['바나나', np.nan, '사과', '포도', '복숭아']
s = pd.Series(f, index = '가나다라마')

TypeError: Index(...) must be called with a collection of some kind, '가나다라마' was passed

# bool

In [34]:
s[[True, False, True, False, True]]

0    바나나
2     사과
4    복숭아
dtype: object

In [36]:
arr = np.arange(5, 20)
s = pd.Series(arr)
s < 10

0      True
1      True
2      True
3      True
4      True
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
dtype: bool

In [37]:
s[s < 10]

0    5
1    6
2    7
3    8
4    9
dtype: int64

In [38]:
s[s == 5]

0    5
dtype: int64

## 결측치처리

In [77]:
arr = np.arange(9).reshape(3, 3)
print(arr)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [78]:
df = pd.DataFrame(arr)
df

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8


In [79]:
df = pd.DataFrame(
    arr,
    columns = ['가', '나', '다'],
    index = ['a', 'b', 'c'] #보통 인덱스는 왠만하면 바꾸지 않는다
)
df

Unnamed: 0,가,나,다
a,0,1,2
b,3,4,5
c,6,7,8


In [80]:
data = {
    'name': ['kim', 'lee', 'park'],
    'age': [10, 15, 20],
    'location': ['seoul', 'LA', 'tokyo']
}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,location
0,kim,10,seoul
1,lee,15,LA
2,park,20,tokyo


In [81]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [82]:
df.columns

Index(['name', 'age', 'location'], dtype='object')

In [83]:
df.values

array([['kim', 10, 'seoul'],
       ['lee', 15, 'LA'],
       ['park', 20, 'tokyo']], dtype=object)

In [84]:
df.dtypes

name        object
age          int64
location    object
dtype: object

In [85]:
df.T

Unnamed: 0,0,1,2
name,kim,lee,park
age,10,15,20
location,seoul,LA,tokyo


In [86]:
df.index = list('abc')

In [87]:
df

Unnamed: 0,name,age,location
a,kim,10,seoul
b,lee,15,LA
c,park,20,tokyo


In [88]:
df['name']

a     kim
b     lee
c    park
Name: name, dtype: object

In [89]:
type(df['name'])

pandas.core.series.Series

In [90]:
df[['name','location']]

Unnamed: 0,name,location
a,kim,seoul
b,lee,LA
c,park,tokyo


In [91]:
type(df[['name','location']])

pandas.core.frame.DataFrame

In [92]:
df.rename(columns={'location':'hometown'}, inplace=True)

In [94]:
df

Unnamed: 0,name,age,hometown
a,kim,10,seoul
b,lee,15,LA
c,park,20,tokyo
