# 판다스(pandas)

In [2]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.3.1-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading pandas-2.3.1-cp313-cp313-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ------- -------------------------------- 2.1/11.0 MB 11.1 MB/s eta 0:00:01
   ---------------- ----------------------- 4.5/11.0 MB 11.4 MB/s eta 0:00:01
   ------------------------ --------------- 6.8/11.0 MB 11.5 MB/s eta 0:00:01
   --------------------------------- ------ 9.2/11.0 MB 11.6 MB/s eta 0:00:01
   ---------------------------------------- 11.0/11.0 MB 10.9 MB/s eta 0:00:00
Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Installing collected packages: pytz, pandas

   ---------------------------------------- 0/2 [pytz]
   -------------------- ------------------- 1/2 [pandas]
   -------------------- ------------------- 1/2 [pandas]
   -----------------

In [1]:
import numpy as np
import pandas as pd

In [2]:
# pandas Series
data1 = np.arange(1,6)
print(data1)
data2 = pd.Series(data1)
print(data2)
data2.index, data2.values

[1 2 3 4 5]
0    1
1    2
2    3
3    4
4    5
dtype: int64


(RangeIndex(start=0, stop=5, step=1), array([1, 2, 3, 4, 5]))

In [3]:
data3 = pd.Series([11,22,33,44,55])
data3

0    11
1    22
2    33
3    44
4    55
dtype: int64

In [4]:
dd = {"apple": 4400,"banana": 3500, "kiwi": 2200,"orange": 1700, "mango": 8800}
data4 = pd.Series(dd)
data4

apple     4400
banana    3500
kiwi      2200
orange    1700
mango     8800
dtype: int64

In [5]:
print(type(data4))
data4.name = "과일가격표"
data4.index.name = "과일이름"
data4, data4.index, data4.values
data4

<class 'pandas.core.series.Series'>


과일이름
apple     4400
banana    3500
kiwi      2200
orange    1700
mango     8800
Name: 과일가격표, dtype: int64

In [6]:
data2.index = ["aa","bb","cc","dd","ee"] # 개수가 같아야 함
data2, data2.index, data2.values

(aa    1
 bb    2
 cc    3
 dd    4
 ee    5
 dtype: int64,
 Index(['aa', 'bb', 'cc', 'dd', 'ee'], dtype='object'),
 array([1, 2, 3, 4, 5]))

In [7]:
# 판다스 시리즈 데이터 타입
data5 = pd.Series(np.arange(1,7), dtype = "float",index = ["aa","bb","cc","dd","ee","ff"])
data5, data5.shape, data5.size, data5.ndim


(aa    1.0
 bb    2.0
 cc    3.0
 dd    4.0
 ee    5.0
 ff    6.0
 dtype: float64,
 (6,),
 6,
 1)

In [12]:
# 시리즈 원소에 접근방법
print(data5)
print("data5['dd']:",data5['dd'])
print("data5.loc['dd']",data5.loc['dd'])
print("data5[3]", data5[3]) # deprecated
print("data5.iloc[3]:", data5.iloc[3])

aa    1.0
bb    2.0
cc    3.0
dd    4.0
ee    5.0
ff    6.0
dtype: float64
data5['dd']: 4.0
data5.loc['dd'] 4.0
data5[3] 4.0
data5.iloc[3]: 4.0


  print("data5[3]", data5[3]) # deprecated


# Data Frame

In [14]:
import pandas as pd
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]],index=["a","b","c"], columns=["A","B","c"])
df

Unnamed: 0,A,B,c
a,1,2,3
b,4,5,6
c,7,8,9


In [15]:
num = np.arange(1,10).reshape(3,3)
df = pd.DataFrame(num, index=["a","b","c"], columns=["A","B","c"])
df

Unnamed: 0,A,B,c
a,1,2,3
b,4,5,6
c,7,8,9


In [16]:
dd = {"과일이름":["apple", "banana","kiwi","melon", "orange","mango"],
      "가격":[4400,5000,2500,7000,2000,8800],
      "개수":[3,8,11,22,6,5]}
df2 = pd.DataFrame(dd)
df2

Unnamed: 0,과일이름,가격,개수
0,apple,4400,3
1,banana,5000,8
2,kiwi,2500,11
3,melon,7000,22
4,orange,2000,6
5,mango,8800,5


In [17]:
df2.columns.name = "info"
df2.index.name = ''
df2.columns = ['과일이름','원가','수량']
df2

Unnamed: 0,과일이름,원가,수량
,,,
0.0,apple,4400.0,3.0
1.0,banana,5000.0,8.0
2.0,kiwi,2500.0,11.0
3.0,melon,7000.0,22.0
4.0,orange,2000.0,6.0
5.0,mango,8800.0,5.0


In [18]:
df2.describe()

Unnamed: 0,원가,수량
count,6.0,6.0
mean,4950.0,9.166667
std,2609.022806,6.853223
min,2000.0,3.0
25%,2975.0,5.25
50%,4700.0,7.0
75%,6500.0,10.25
max,8800.0,22.0


In [19]:
df3 = pd.DataFrame(df2, columns = ['과일이름', '품종','원가','할인','수량','합계'])
df3, df3.dtypes

(     과일이름  품종    원가  할인  수량  합계
                                
 0   apple NaN  4400 NaN   3 NaN
 1  banana NaN  5000 NaN   8 NaN
 2    kiwi NaN  2500 NaN  11 NaN
 3   melon NaN  7000 NaN  22 NaN
 4  orange NaN  2000 NaN   6 NaN
 5   mango NaN  8800 NaN   5 NaN,
 과일이름     object
 품종      float64
 원가        int64
 할인      float64
 수량        int64
 합계      float64
 dtype: object)

In [20]:
# df3["품종"] = df3["품종"].astype('object')
df3.loc[:,'품종'] = df3.loc[:,'품종'].astype('object')
df3.loc[:,'품종'] = ['신선한','궁금한','맛있는','최적의','웰빙','유기농']
df3.loc[:,'할인'] = [0.2,0.3,0.2,0.1,0.2,0.3]
df3

  df3.loc[:,'품종'] = df3.loc[:,'품종'].astype('object')


Unnamed: 0,과일이름,품종,원가,할인,수량,합계
,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,
1.0,banana,궁금한,5000.0,0.3,8.0,
2.0,kiwi,맛있는,2500.0,0.2,11.0,
3.0,melon,최적의,7000.0,0.1,22.0,
4.0,orange,웰빙,2000.0,0.2,6.0,
5.0,mango,유기농,8800.0,0.3,5.0,


In [21]:
df3['합계'] = (df3["원가"] * df3["수량"] * (1 - df3["할인"])).astype('int')
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계
,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0
3.0,melon,최적의,7000.0,0.1,22.0,138600.0
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0
5.0,mango,유기농,8800.0,0.3,5.0,30799.0


In [22]:
df3['ETC'] = "-"
df3['Test'] = "test"
print(df3.dtypes)
df3

과일이름     object
품종       object
원가        int64
할인      float64
수량        int64
합계        int64
ETC      object
Test     object
dtype: object


Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC,Test
,,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,-,test
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,-,test
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,-,test
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,-,test
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,-,test
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,-,test


In [23]:
del df3['Test']
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC
,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,-
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,-
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,-
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,-
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,-
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,-


In [24]:
ee =pd.Series([1.3,2.2], index=[2,4])
df3['ETC'] = ee
df3 = df3.fillna('')
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC
,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,1.3
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,2.2
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,


In [25]:
df3['PASS'] = ''
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC,PASS
,,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,,
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,,
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,1.3,
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,,
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,2.2,
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,,


In [26]:
pp = np.where(df3['합계']<=30000,'구매각','-')
df3['PASS'] = pp
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC,PASS
,,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,,구매각
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,,구매각
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,1.3,구매각
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,,-
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,2.2,구매각
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,,-


In [29]:
df3['DV'] = np.where(df3['수량']>=7,'포장','번들')
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC,PASS,DV
,,,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,,구매각,번들
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,,구매각,포장
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,1.3,구매각,포장
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,,-,포장
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,2.2,구매각,번들
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,,-,번들


In [36]:
# df3.iloc[1, 1:4]
# df3.loc[2,:]
df3.loc[df3.loc[:,'과일이름'] == 'banana', '품종':'수량']

Unnamed: 0,품종,원가,할인,수량
,,,,
1.0,궁금한,5000.0,0.3,8.0


In [35]:
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC,PASS,DV
,,,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,,구매각,번들
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,,구매각,포장
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,1.3,구매각,포장
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,,-,포장
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,2.2,구매각,번들
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,,-,번들


In [63]:

# df3.loc['1':'5','과일이름':'할인']
# df3.iloc[1:4,0:4]
# df3.iloc[[0,2,4],[0,2,5,7]]
df3.loc[[1,3,5],['과일이름','원가','합계','PASS']]

Unnamed: 0,과일이름,원가,합계,PASS
,,,,
1.0,banana,5000.0,28000.0,구매각
3.0,melon,7000.0,138600.0,-
5.0,mango,8800.0,30799.0,-


In [62]:
df3

Unnamed: 0,과일이름,품종,원가,할인,수량,합계,ETC,PASS,DV
,,,,,,,,,
0.0,apple,신선한,4400.0,0.2,3.0,10560.0,,구매각,번들
1.0,banana,궁금한,5000.0,0.3,8.0,28000.0,,구매각,포장
2.0,kiwi,맛있는,2500.0,0.2,11.0,22000.0,1.3,구매각,포장
3.0,melon,최적의,7000.0,0.1,22.0,138600.0,,-,포장
4.0,orange,웰빙,2000.0,0.2,6.0,9600.0,2.2,구매각,번들
5.0,mango,유기농,8800.0,0.3,5.0,30799.0,,-,번들


In [86]:
import numpy as np
import pandas as pd
print("인구수 시리즈 데이터")
population_dic = {
    'korea':5182,
    'japan':12622,
    'china':141178,
    'usa' :32976
}
print("GDP 시리즈데이터")
GDP_dic = {
    'korea':169320000,
    'japan':516700000,
    'china':140925000,
    'usa':2041280000
}
population = pd.Series(population_dic)
GDP = pd.Series(GDP_dic)
print(population)
print(GDP)
print("-"*30)
print("데이터 프레임 생성")
country = pd.DataFrame({"population":population,"GDP":GDP})
country

인구수 시리즈 데이터
GDP 시리즈데이터
korea      5182
japan     12622
china    141178
usa       32976
dtype: int64
korea     169320000
japan     516700000
china     140925000
usa      2041280000
dtype: int64
------------------------------
데이터 프레임 생성


Unnamed: 0,population,GDP
korea,5182,169320000
japan,12622,516700000
china,141178,140925000
usa,32976,2041280000


In [87]:
country.loc[['korea'],:]

Unnamed: 0,population,GDP
korea,5182,169320000


In [88]:
country['GDP_per'] = country['GDP'] / country["population"]
country

Unnamed: 0,population,GDP,GDP_per
korea,5182,169320000,32674.642995
japan,12622,516700000,40936.460149
china,141178,140925000,998.207936
usa,32976,2041280000,61901.989326


In [89]:
country.loc['india'] = [142222,1505000,1212121]
country

Unnamed: 0,population,GDP,GDP_per
korea,5182,169320000,32674.64
japan,12622,516700000,40936.46
china,141178,140925000,998.2079
usa,32976,2041280000,61901.99
india,142222,1505000,1212121.0


In [92]:
country.loc['africa'] = pd.Series([123456,7777777],index = ["population","GDP"])
country

Unnamed: 0,population,GDP,GDP_per
korea,5182.0,169320000.0,32674.64
japan,12622.0,516700000.0,40936.46
china,141178.0,140925000.0,998.2079
usa,32976.0,2041280000.0,61901.99
india,142222.0,1505000.0,1212121.0
africa,123456.0,7777777.0,


In [102]:
country.drop('africa', axis = 0)
# country.drop(country[country.isnull().any(axis=1)].index, axis = 0)
country

Unnamed: 0,population,GDP,GDP_per
korea,5182.0,169320000.0,32674.64
japan,12622.0,516700000.0,40936.46
china,141178.0,140925000.0,998.2079
usa,32976.0,2041280000.0,61901.99
india,142222.0,1505000.0,1212121.0
africa,123456.0,7777777.0,
