In [1]:
import numpy as np
import pandas as pd

In [2]:
two_dimensional_list = [['dongwook', 50, 86], ['sineui', 89, 31], ['ikjoong', 68, 91], ['yoonsoo', 88, 75]]

In [3]:
my_df = pd.DataFrame(two_dimensional_list, columns=['name', 'english_score', 'math_score'], index=['a', 'b', 'c', 'd'])

In [4]:
my_df

Unnamed: 0,name,english_score,math_score
a,dongwook,50,86
b,sineui,89,31
c,ikjoong,68,91
d,yoonsoo,88,75


In [5]:
type(my_df)

pandas.core.frame.DataFrame

In [6]:
my_df.columns

Index(['name', 'english_score', 'math_score'], dtype='object')

In [7]:
my_df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [8]:
my_df.dtypes

name             object
english_score     int64
math_score        int64
dtype: object

# From list of lists, array of arrays, list of series

2차원 리스트나 2차원 numpy array로 DataFrame을 만들 수 있습니다. 심지어 pandas Series를 담고 있는 리스트로도 DataFrame을 만들 수 있습니다.

따로 column과 row(index)에 대한 설정이 없으면 그냥 0, 1, 2, ... 순서로 값이 매겨집니다.

In [9]:
two_dimensional_list = [['dongwook', 50, 86], ['sineui', 89, 31], ['ikjoong', 68, 91], ['yoonsoo', 88, 75]]

two_dimensional_array = np.array(two_dimensional_list)

list_of_series=[pd.Series(['dongwook', 50, 86]),
               pd.Series(['sineui', 89, 31]),
               pd.Series(['ikjoong', 68, 91]),
               pd.Series(['yoonsoo', 88, 75])]

In [10]:
# 아래 셋은 모두 동일합니다
df1 = pd.DataFrame(two_dimensional_list)
df2 = pd.DataFrame(two_dimensional_array)
df3 = pd.DataFrame(list_of_series)

print(df1,'\n')
print(df2,'\n')
print(df3,'\n')

          0   1   2
0  dongwook  50  86
1    sineui  89  31
2   ikjoong  68  91
3   yoonsoo  88  75 

          0   1   2
0  dongwook  50  86
1    sineui  89  31
2   ikjoong  68  91
3   yoonsoo  88  75 

          0   1   2
0  dongwook  50  86
1    sineui  89  31
2   ikjoong  68  91
3   yoonsoo  88  75 



# From dict of lists, dict of arrays, dict of series

파이썬 사전(dictionary)으로도 DataFrame을 만들 수 있습니다.

사전의 key로는 column 이름을 쓰고, 그 column에 해당하는 리스트, numpy array, 혹은 pandas Series를 사전의 value로 넣어주면 됩니다.

In [11]:
names = ['dongwook', 'sineui', 'ikjoong', 'yoonsoo']
english_scores = [50, 89, 68, 88]
math_scores = [86, 31, 91, 75]

In [12]:
dict1 = {'name': names, 'english_score': english_scores, 'math_score': math_scores}
dict2 = {'name': np.array(names), 'english_score': np.array(english_scores), 'math_score': np.array(math_scores)}
dict3 = {'name': pd.Series(names), 'english_score': pd.Series(english_scores), 'math_score': pd.Series(math_scores)}

In [13]:
# 아래 셋은 모두 동일합니다
df1 = pd.DataFrame(dict1)
df2 = pd.DataFrame(dict2)
df3 = pd.DataFrame(dict3)

print(df1, '\n')
print(df2, '\n')
print(df3, '\n')

       name  english_score  math_score
0  dongwook             50          86
1    sineui             89          31
2   ikjoong             68          91
3   yoonsoo             88          75 

       name  english_score  math_score
0  dongwook             50          86
1    sineui             89          31
2   ikjoong             68          91
3   yoonsoo             88          75 

       name  english_score  math_score
0  dongwook             50          86
1    sineui             89          31
2   ikjoong             68          91
3   yoonsoo             88          75 



# From list of dicts
리스트가 담긴 사전이 아니라, 사전이 담긴 리스트로도 DataFrame을 만들 수 있습니다.

In [14]:
my_list = [{'name':'dongwook', 'english_score': 50, 'math_score': 86}, 
           {'name':'sineui', 'english_score': 89, 'math_score': 31}, 
           {'name':'ikjoong', 'english_score': 68, 'math_score': 91}, 
           {'name':'yoonsoo', 'english_score': 88, 'math_score': 75}]

In [15]:
df = pd.DataFrame(my_list)

In [16]:
df

Unnamed: 0,name,english_score,math_score
0,dongwook,50,86
1,sineui,89,31
2,ikjoong,68,91
3,yoonsoo,88,75


# pandas의 dtype들

|dtype|설명|
|:-------------------------|:----------------------|
|int64|**정수**|
|float64|**소수**|
|object|**텍스트**|
|bool|**불린(참과 거짓)**|
|datetime64|**날짜와 시간**|
|category|**카테고리**|

In [17]:
iphone_df = pd.read_csv('data/iphone.csv', index_col = 0)

In [18]:
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [19]:
iphone_df.loc['iPhone 8', '메모리']

'2GB'

In [20]:
iphone_df.loc['iPhone X', :]

출시일        2017-11-03
디스플레이             5.8
메모리               3GB
출시 버전        iOS 11.1
Face ID           Yes
Name: iPhone X, dtype: object

In [21]:
type(iphone_df.loc['iPhone X', :])

pandas.core.series.Series

In [22]:
iphone_df.loc['iPhone X']

출시일        2017-11-03
디스플레이             5.8
메모리               3GB
출시 버전        iOS 11.1
Face ID           Yes
Name: iPhone X, dtype: object

In [23]:
iphone_df.loc[:,'출시일']

iPhone 7         2016-09-16
iPhone 7 Plus    2016-09-16
iPhone 8         2017-09-22
iPhone 8 Plus    2017-09-22
iPhone X         2017-11-03
iPhone XS        2018-09-21
iPhone XS Max    2018-09-21
Name: 출시일, dtype: object

In [24]:
iphone_df['출시일']

iPhone 7         2016-09-16
iPhone 7 Plus    2016-09-16
iPhone 8         2017-09-22
iPhone 8 Plus    2017-09-22
iPhone X         2017-11-03
iPhone XS        2018-09-21
iPhone XS Max    2018-09-21
Name: 출시일, dtype: object

In [25]:
iphone_df[['출시일', 'Face ID']]

Unnamed: 0,출시일,Face ID
iPhone 7,2016-09-16,No
iPhone 7 Plus,2016-09-16,No
iPhone 8,2017-09-22,No
iPhone 8 Plus,2017-09-22,No
iPhone X,2017-11-03,Yes
iPhone XS,2018-09-21,Yes
iPhone XS Max,2018-09-21,Yes


In [26]:
iphone_df.loc[['iPhone X', 'iPhone 8']]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No


In [27]:
iphone_df[['Face ID', '출시일', '메모리']]

Unnamed: 0,Face ID,출시일,메모리
iPhone 7,No,2016-09-16,2GB
iPhone 7 Plus,No,2016-09-16,3GB
iPhone 8,No,2017-09-22,2GB
iPhone 8 Plus,No,2017-09-22,3GB
iPhone X,Yes,2017-11-03,3GB
iPhone XS,Yes,2018-09-21,4GB
iPhone XS Max,Yes,2018-09-21,4GB


In [28]:
iphone_df.loc['iPhone 8':'iPhone XS']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes


In [29]:
iphone_df.loc[:'iPhone XS']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes


In [30]:
iphone_df['메모리':'Face ID']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID


In [31]:
iphone_df.loc[:,'메모리':'Face ID']

Unnamed: 0,메모리,출시 버전,Face ID
iPhone 7,2GB,iOS 10.0,No
iPhone 7 Plus,3GB,iOS 10.0,No
iPhone 8,2GB,iOS 11.0,No
iPhone 8 Plus,3GB,iOS 11.0,No
iPhone X,3GB,iOS 11.1,Yes
iPhone XS,4GB,iOS 12.0,Yes
iPhone XS Max,4GB,iOS 12.0,Yes


In [32]:
iphone_df.loc['iPhone 7':'iPhone X','메모리':'Face ID']

Unnamed: 0,메모리,출시 버전,Face ID
iPhone 7,2GB,iOS 10.0,No
iPhone 7 Plus,3GB,iOS 10.0,No
iPhone 8,2GB,iOS 11.0,No
iPhone 8 Plus,3GB,iOS 11.0,No
iPhone X,3GB,iOS 11.1,Yes


In [33]:
iphone_df.loc[[True, False, True, True, False, True, False]]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes


In [34]:
iphone_df.loc[[True, False, False, True, False, False, False]]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No


In [35]:
iphone_df.loc[:,[True, False, False, True, False]]

Unnamed: 0,출시일,출시 버전
iPhone 7,2016-09-16,iOS 10.0
iPhone 7 Plus,2016-09-16,iOS 10.0
iPhone 8,2017-09-22,iOS 11.0
iPhone 8 Plus,2017-09-22,iOS 11.0
iPhone X,2017-11-03,iOS 11.1
iPhone XS,2018-09-21,iOS 12.0
iPhone XS Max,2018-09-21,iOS 12.0


In [36]:
iphone_df['디스플레이'] > 5

iPhone 7         False
iPhone 7 Plus     True
iPhone 8         False
iPhone 8 Plus     True
iPhone X          True
iPhone XS         True
iPhone XS Max     True
Name: 디스플레이, dtype: bool

In [37]:
iphone_df.loc[iphone_df['디스플레이'] > 5]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [38]:
iphone_df.loc[iphone_df['Face ID'] == 'Yes']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [39]:
condition = (iphone_df['디스플레이'] > 5) & (iphone_df['Face ID'] == 'Yes')

In [40]:
iphone_df.loc[condition]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [41]:
condition = (iphone_df['디스플레이'] > 5) | (iphone_df['Face ID'] == 'Yes')

In [42]:
iphone_df.loc[condition]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [43]:
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [44]:
iphone_df.iloc[2,4]

'No'

In [45]:
iphone_df.iloc[[1,3],[1,4]]

Unnamed: 0,디스플레이,Face ID
iPhone 7 Plus,5.5,No
iPhone 8 Plus,5.5,No


In [46]:
iphone_df.iloc[3:,1:4]

Unnamed: 0,디스플레이,메모리,출시 버전
iPhone 8 Plus,5.5,3GB,iOS 11.0
iPhone X,5.8,3GB,iOS 11.1
iPhone XS,5.8,4GB,iOS 12.0
iPhone XS Max,6.5,4GB,iOS 12.0


In [47]:
iphone_df.loc['iPhone 7']

출시일        2016-09-16
디스플레이             4.7
메모리               2GB
출시 버전        iOS 10.0
Face ID            No
Name: iPhone 7, dtype: object

DataFrame 인덱싱을 하는 방법과 종류가 많아서 헷갈리기 쉽습니다.

인덱싱이 익숙해져야 다음 내용을 쉽게 배울 수 있으니, 꼭 숙지하고 넘어가세요!

이름으로 인덱싱하기|기본형태|단축형태
:--------------------|:------------------------------------:|:----------------|
하나의 row 이름|df.loc['row4']||
row이름의 리스트|df.loc[['row4', 'row5', 'row3]]||
row이름의 리스트 슬라이싱|df.loc['row2':'row5']|df['row2':'row5']
하나의 column 이름|df.loc[:,'col1']|df['col1']
column 이름의 리스트|df.loc[:,['col4', 'col6', 'col3']]|df[['col4', 'col6', 'col3']]
column 이름의 리스트 슬라이싱|df.loc[:,'col2':'col5']


위치로 인덱싱하기|기본형태|단축형태
:--------------------|:------------------------------------:|:----------------|
하나의 row 위치|df.iloc[8]||
row 위치의 리스트|df.iloc[[4, 5, 3]]||
row 위치의 리스트 슬라이싱|df.iloc[2:5]|df[2:5]|
하나의 column 위치|df.iloc[:,3]||
column 위치의 리스트|df.iloc[:,[3, 5, 6]]||
column 위치의 리스트 슬라이싱|df.iloc[:,3:7]||

다음 데이터프레임에서 지정하는 데이터를 뽑아내거나 처리하라.

In [48]:
data = {
    "국어": [80, 90, 70, 30],
    "영어": [90, 70, 60, 40],
    "수학": [90, 60, 80, 70],
}
columns = ["국어", "영어", "수학"]
index = ["춘향", "몽룡", "향단", "방자"]
df = pd.DataFrame(data, index=index, columns=columns)

(1) 모든 학생의 수학 점수를 시리즈로 나타낸다.

In [49]:
df["수학"]

춘향    90
몽룡    60
향단    80
방자    70
Name: 수학, dtype: int64

(2) 모든 학생의 국어와 영어 점수를 데이터 프레임으로 나타낸다.

In [50]:
df[["국어", "영어"]]

Unnamed: 0,국어,영어
춘향,80,90
몽룡,90,70
향단,70,60
방자,30,40


(3) 모든 학생의 각 과목 평균 점수를 새로운 열로 추가한다.

In [51]:
df["평균"] = df.mean(axis=1)

In [52]:
df

Unnamed: 0,국어,영어,수학,평균
춘향,80,90,90,86.666667
몽룡,90,70,60,73.333333
향단,70,60,80,70.0
방자,30,40,70,46.666667


(4) 방자의 영어 점수를 80점으로 수정하고 평균 점수도 다시 계산한다.

In [53]:
df["영어"]["방자"] = 80

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["영어"]["방자"] = 80


In [54]:
df

Unnamed: 0,국어,영어,수학,평균
춘향,80,90,90,86.666667
몽룡,90,70,60,73.333333
향단,70,60,80,70.0
방자,30,80,70,46.666667


In [55]:
df["평균"] = df.mean(axis=1)

In [56]:
df

Unnamed: 0,국어,영어,수학,평균
춘향,80,90,90,86.666667
몽룡,90,70,60,73.333333
향단,70,60,80,70.0
방자,30,80,70,56.666667


(5) 춘향의 점수를 데이터프레임으로 나타낸다.

In [57]:
df["춘향":"춘향"]

Unnamed: 0,국어,영어,수학,평균
춘향,80,90,90,86.666667


(6) 향단의 점수를 시리즈로 나타낸다.

In [58]:
df.T["향단"]

국어    70.0
영어    60.0
수학    80.0
평균    70.0
Name: 향단, dtype: float64

In [59]:
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [60]:
iphone_df.loc['iPhone 8','메모리'] = '2.5GB'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2.5GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [61]:
iphone_df.loc['iPhone 8', '출시 버전'] = 'iOS 10.3'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2.5GB,iOS 10.3,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [63]:
iphone_df.loc['iPhone 8'] = ['2016-09-22', '4.7', '2GB', 'iOS 11.0', 'No']
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2016-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [65]:
iphone_df['디스플레이'] = ['4.7 in', '5.5 in', '4.7 in', '5.5 in', '5.8 in', '5.8 in', '6.5 in']
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7 in,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5 in,3GB,iOS 10.0,No
iPhone 8,2016-09-22,4.7 in,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5 in,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8 in,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8 in,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5 in,4GB,iOS 12.0,Yes


In [67]:
iphone_df['Face ID'] = 'Yes'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7 in,2GB,iOS 10.0,Yes
iPhone 7 Plus,2016-09-16,5.5 in,3GB,iOS 10.0,Yes
iPhone 8,2016-09-22,4.7 in,2GB,iOS 11.0,Yes
iPhone 8 Plus,2017-09-22,5.5 in,3GB,iOS 11.0,Yes
iPhone X,2017-11-03,5.8 in,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8 in,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5 in,4GB,iOS 12.0,Yes


In [70]:
iphone_df = pd.read_csv('data/iphone.csv', index_col=0)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [71]:
iphone_df[['디스플레이', 'Face ID']]

Unnamed: 0,디스플레이,Face ID
iPhone 7,4.7,No
iPhone 7 Plus,5.5,No
iPhone 8,4.7,No
iPhone 8 Plus,5.5,No
iPhone X,5.8,Yes
iPhone XS,5.8,Yes
iPhone XS Max,6.5,Yes


In [73]:
iphone_df[['디스플레이', 'Face ID']] = 'x'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,x,2GB,iOS 10.0,x
iPhone 7 Plus,2016-09-16,x,3GB,iOS 10.0,x
iPhone 8,2017-09-22,x,2GB,iOS 11.0,x
iPhone 8 Plus,2017-09-22,x,3GB,iOS 11.0,x
iPhone X,2017-11-03,x,3GB,iOS 11.1,x
iPhone XS,2018-09-21,x,4GB,iOS 12.0,x
iPhone XS Max,2018-09-21,x,4GB,iOS 12.0,x


In [74]:
iphone_df.loc[['iPhone 7', 'iPhone X']]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,x,2GB,iOS 10.0,x
iPhone X,2017-11-03,x,3GB,iOS 11.1,x


In [76]:
iphone_df.loc[['iPhone 7', 'iPhone X']] = 'o'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,o,o,o,o,o
iPhone 7 Plus,2016-09-16,x,3GB,iOS 10.0,x
iPhone 8,2017-09-22,x,2GB,iOS 11.0,x
iPhone 8 Plus,2017-09-22,x,3GB,iOS 11.0,x
iPhone X,o,o,o,o,o
iPhone XS,2018-09-21,x,4GB,iOS 12.0,x
iPhone XS Max,2018-09-21,x,4GB,iOS 12.0,x


In [78]:
iphone_df['iPhone 7':'iPhone X'] = 'o'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,o,o,o,o,o
iPhone 7 Plus,o,o,o,o,o
iPhone 8,o,o,o,o,o
iPhone 8 Plus,o,o,o,o,o
iPhone X,o,o,o,o,o
iPhone XS,2018-09-21,x,4GB,iOS 12.0,x
iPhone XS Max,2018-09-21,x,4GB,iOS 12.0,x


In [81]:
iphone_df = pd.read_csv('data/iphone.csv', index_col = 0)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [83]:
iphone_df[iphone_df['디스플레이'] > 5]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [84]:
iphone_df[iphone_df['디스플레이'] > 5] = 'p'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,p,p,p,p,p
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,p,p,p,p,p
iPhone X,p,p,p,p,p
iPhone XS,p,p,p,p,p
iPhone XS Max,p,p,p,p,p


In [85]:
iphone_df.iloc[[1, 3], [1, 4]]

Unnamed: 0,디스플레이,Face ID
iPhone 7 Plus,p,p
iPhone 8 Plus,p,p


In [86]:
iphone_df.iloc[[1, 3], [1, 4]] = 'v'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,p,v,p,p,v
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,p,v,p,p,v
iPhone X,p,p,p,p,p
iPhone XS,p,p,p,p,p
iPhone XS Max,p,p,p,p,p


In [87]:
iphone_df.loc[['iPhone 7 Plus', 'iPhone 8 Plus'],['디스플레이','Face ID']]

Unnamed: 0,디스플레이,Face ID
iPhone 7 Plus,v,v
iPhone 8 Plus,v,v


In [105]:
iphone_df = pd.read_csv('data/iphone.csv', index_col = 0)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [106]:
iphone_df.loc['iPhone XR'] = ['2018-10-26', 6.1, '3GB', 'iOS 12.0.1', 'Yes']
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes
iPhone XR,2018-10-26,6.1,3GB,iOS 12.0.1,Yes


In [107]:
iphone_df['제조사'] = 'Apple'
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID,제조사
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No,Apple
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No,Apple
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No,Apple
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No,Apple
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes,Apple
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes,Apple
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes,Apple
iPhone XR,2018-10-26,6.1,3GB,iOS 12.0.1,Yes,Apple


In [108]:
# iphone_df.drop('iPhone XR', axis='index', inplace=False)
iphone_df.drop('iPhone XR', axis='index')

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID,제조사
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No,Apple
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No,Apple
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No,Apple
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No,Apple
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes,Apple
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes,Apple
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes,Apple


In [109]:
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID,제조사
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No,Apple
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No,Apple
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No,Apple
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No,Apple
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes,Apple
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes,Apple
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes,Apple
iPhone XR,2018-10-26,6.1,3GB,iOS 12.0.1,Yes,Apple


In [110]:
iphone_df.drop('iPhone XR', axis='index', inplace=True)
# iphone_df.drop('iPhone XR', axis=0, inplace=True)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID,제조사
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No,Apple
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No,Apple
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No,Apple
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No,Apple
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes,Apple
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes,Apple
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes,Apple


In [111]:
# iphone_df.drop('제조사',axis='columns', inplace=True)
iphone_df.drop('제조사',axis=1, inplace=True)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [112]:
iphone_df.drop(['iPhone 7', 'iPhone 8', 'iPhone X'], axis='index', inplace=True)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [145]:
liverpool_df = pd.read_csv("data/liverpool.csv", index_col=0)
liverpool_df

Unnamed: 0,position,born,number,nationality
Roberto Firmino,FW,1991,no. 9,Brazil
Sadio Mane,FW,1992,no. 10,Senegal
Mohamed Salah,FW,1992,no. 11,Egypt
Joe Gomez,DF,1997,no. 12,England
Alisson Becker,GK,1992,no. 13,Brazil


In [146]:
liverpool_df.rename(columns = {'position':'Position'})

Unnamed: 0,Position,born,number,nationality
Roberto Firmino,FW,1991,no. 9,Brazil
Sadio Mane,FW,1992,no. 10,Senegal
Mohamed Salah,FW,1992,no. 11,Egypt
Joe Gomez,DF,1997,no. 12,England
Alisson Becker,GK,1992,no. 13,Brazil


In [147]:
liverpool_df

Unnamed: 0,position,born,number,nationality
Roberto Firmino,FW,1991,no. 9,Brazil
Sadio Mane,FW,1992,no. 10,Senegal
Mohamed Salah,FW,1992,no. 11,Egypt
Joe Gomez,DF,1997,no. 12,England
Alisson Becker,GK,1992,no. 13,Brazil


In [148]:
liverpool_df.rename(columns = {'position':'Position'}, inplace=True)

In [149]:
liverpool_df

Unnamed: 0,Position,born,number,nationality
Roberto Firmino,FW,1991,no. 9,Brazil
Sadio Mane,FW,1992,no. 10,Senegal
Mohamed Salah,FW,1992,no. 11,Egypt
Joe Gomez,DF,1997,no. 12,England
Alisson Becker,GK,1992,no. 13,Brazil


In [150]:
liverpool_df.rename(columns = {'position':'Position', 'born':'Born', 'number':'Number', 'nationality':'Nationality'}, inplace=True)
liverpool_df

Unnamed: 0,Position,Born,Number,Nationality
Roberto Firmino,FW,1991,no. 9,Brazil
Sadio Mane,FW,1992,no. 10,Senegal
Mohamed Salah,FW,1992,no. 11,Egypt
Joe Gomez,DF,1997,no. 12,England
Alisson Becker,GK,1992,no. 13,Brazil


In [151]:
liverpool_df.index.name = "Player Name"
liverpool_df

Unnamed: 0_level_0,Position,Born,Number,Nationality
Player Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Roberto Firmino,FW,1991,no. 9,Brazil
Sadio Mane,FW,1992,no. 10,Senegal
Mohamed Salah,FW,1992,no. 11,Egypt
Joe Gomez,DF,1997,no. 12,England
Alisson Becker,GK,1992,no. 13,Brazil


In [152]:
liverpool_df.set_index('Number')

Unnamed: 0_level_0,Position,Born,Nationality
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no. 9,FW,1991,Brazil
no. 10,FW,1992,Senegal
no. 11,FW,1992,Egypt
no. 12,DF,1997,England
no. 13,GK,1992,Brazil


In [153]:
liverpool_df.index

Index(['Roberto Firmino', 'Sadio Mane', 'Mohamed Salah', 'Joe Gomez',
       'Alisson Becker'],
      dtype='object', name='Player Name')

In [154]:
liverpool_df['Player Name'] = liverpool_df.index
liverpool_df.set_index('Number')

Unnamed: 0_level_0,Position,Born,Nationality,Player Name
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no. 9,FW,1991,Brazil,Roberto Firmino
no. 10,FW,1992,Senegal,Sadio Mane
no. 11,FW,1992,Egypt,Mohamed Salah
no. 12,DF,1997,England,Joe Gomez
no. 13,GK,1992,Brazil,Alisson Becker


In [155]:
liverpool_df

Unnamed: 0_level_0,Position,Born,Number,Nationality,Player Name
Player Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Roberto Firmino,FW,1991,no. 9,Brazil,Roberto Firmino
Sadio Mane,FW,1992,no. 10,Senegal,Sadio Mane
Mohamed Salah,FW,1992,no. 11,Egypt,Mohamed Salah
Joe Gomez,DF,1997,no. 12,England,Joe Gomez
Alisson Becker,GK,1992,no. 13,Brazil,Alisson Becker


In [156]:
liverpool_df.set_index('Number', inplace=True)
liverpool_df

Unnamed: 0_level_0,Position,Born,Nationality,Player Name
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no. 9,FW,1991,Brazil,Roberto Firmino
no. 10,FW,1992,Senegal,Sadio Mane
no. 11,FW,1992,Egypt,Mohamed Salah
no. 12,DF,1997,England,Joe Gomez
no. 13,GK,1992,Brazil,Alisson Becker


In [157]:
laptops_df = pd.read_csv("data/laptops.csv")
laptops_df

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
0,Dell,Inspiron 15-3567,4,hdd,1024,15.6,40000,intel,i5,2.5,intel,,linux,2.50,
1,Apple,MacBook Air,8,ssd,128,13.3,55499,intel,i5,1.8,intel,2.0,mac,1.35,
2,Apple,MacBook Air,8,ssd,256,13.3,71500,intel,i5,1.8,intel,2.0,mac,1.35,
3,Apple,MacBook Pro,8,ssd,128,13.3,96890,intel,i5,2.3,intel,2.0,mac,3.02,
4,Apple,MacBook Pro,8,ssd,256,13.3,112666,intel,i5,2.3,intel,2.0,mac,3.02,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,Asus,A555LF,8,hdd,1024,15.6,39961,intel,i3 4th gen,1.7,nvidia,2.0,windows,2.30,
163,Asus,X555LA-XX172D,4,hdd,500,15.6,28489,intel,i3 4th gen,1.9,intel,,linux,2.30,
164,Asus,X554LD,2,hdd,500,15.6,29199,intel,i3 4th gen,1.9,intel,1.0,linux,2.30,
165,Asus,X550LAV-XX771D,2,hdd,500,15.6,29990,intel,i3 4th gen,1.7,intel,,linux,2.50,


In [160]:
laptops_df.head(3)

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
0,Dell,Inspiron 15-3567,4,hdd,1024,15.6,40000,intel,i5,2.5,intel,,linux,2.5,
1,Apple,MacBook Air,8,ssd,128,13.3,55499,intel,i5,1.8,intel,2.0,mac,1.35,
2,Apple,MacBook Air,8,ssd,256,13.3,71500,intel,i5,1.8,intel,2.0,mac,1.35,


In [161]:
laptops_df.head(7)

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
0,Dell,Inspiron 15-3567,4,hdd,1024,15.6,40000,intel,i5,2.5,intel,,linux,2.5,
1,Apple,MacBook Air,8,ssd,128,13.3,55499,intel,i5,1.8,intel,2.0,mac,1.35,
2,Apple,MacBook Air,8,ssd,256,13.3,71500,intel,i5,1.8,intel,2.0,mac,1.35,
3,Apple,MacBook Pro,8,ssd,128,13.3,96890,intel,i5,2.3,intel,2.0,mac,3.02,
4,Apple,MacBook Pro,8,ssd,256,13.3,112666,intel,i5,2.3,intel,2.0,mac,3.02,
5,Apple,MacBook Pro (TouchBar),16,ssd,512,15.0,226000,intel,i7,2.7,intel,2.0,mac,2.5,
6,Apple,MacBook Pro (TouchBar),16,ssd,512,13.3,158000,intel,i5,2.9,intel,2.0,mac,1.37,


In [162]:
laptops_df.tail(6)

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
161,Asus,X541UA-GO1345D,4,hdd,1024,15.6,27730,intel,i3 6th gen,2.0,intel,,linux,1.84,
162,Asus,A555LF,8,hdd,1024,15.6,39961,intel,i3 4th gen,1.7,nvidia,2.0,windows,2.3,
163,Asus,X555LA-XX172D,4,hdd,500,15.6,28489,intel,i3 4th gen,1.9,intel,,linux,2.3,
164,Asus,X554LD,2,hdd,500,15.6,29199,intel,i3 4th gen,1.9,intel,1.0,linux,2.3,
165,Asus,X550LAV-XX771D,2,hdd,500,15.6,29990,intel,i3 4th gen,1.7,intel,,linux,2.5,
166,Asus,X540LA-XX538T,4,hdd,1024,15.6,30899,intel,i3 5th gen,2.0,intel,,windows,2.3,


In [163]:
laptops_df.shape

(167, 15)

In [164]:
laptops_df.columns

Index(['brand', 'model', 'ram', 'hd_type', 'hd_size', 'screen_size', 'price',
       'processor_brand', 'processor_model', 'clock_speed',
       'graphic_card_brand', 'graphic_card_size', 'os', 'weight', 'comments'],
      dtype='object')

In [177]:
laptops_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 167 entries, 5 to 148
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   brand               167 non-null    object 
 1   model               167 non-null    object 
 2   ram                 167 non-null    int64  
 3   hd_type             167 non-null    object 
 4   hd_size             167 non-null    int64  
 5   screen_size         167 non-null    float64
 6   price               167 non-null    int64  
 7   processor_brand     167 non-null    object 
 8   processor_model     167 non-null    object 
 9   clock_speed         166 non-null    float64
 10  graphic_card_brand  163 non-null    object 
 11  graphic_card_size   81 non-null     float64
 12  os                  167 non-null    object 
 13  weight              160 non-null    float64
 14  comments            55 non-null     object 
dtypes: float64(4), int64(3), object(8)
memory usage: 15.7+ KB

In [170]:
laptops_df.describe()

Unnamed: 0,ram,hd_size,screen_size,price,clock_speed,graphic_card_size,weight
count,167.0,167.0,167.0,167.0,166.0,81.0,160.0
mean,6.898204,768.91018,14.77521,64132.898204,2.321084,52.160494,2.250813
std,3.787479,392.9908,1.376526,42797.67401,0.554187,444.134142,0.648446
min,2.0,32.0,10.1,13872.0,1.1,1.0,0.78
25%,4.0,500.0,14.0,35457.5,1.9,2.0,1.9
50%,8.0,1024.0,15.6,47990.0,2.3,2.0,2.2
75%,8.0,1024.0,15.6,77494.5,2.6,4.0,2.6
max,16.0,2048.0,17.6,226000.0,3.8,4000.0,4.2


In [171]:
laptops_df.sort_values(by='price')

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
148,Acer,Aspire SW3-016,2,ssd,32,10.1,13872,intel,Atom Z8300,1.44,intel,,windows,1.2,
83,Acer,A315-31CDC UN.GNTSI.001,2,ssd,500,15.6,17990,intel,Celeron,1.10,intel,,windows,2.1,
108,Acer,Aspire ES-15 NX.GKYSI.010,4,hdd,500,15.6,17990,amd,A4-7210,1.80,amd,,windows,2.4,
100,Acer,A315-31-P4CRUN.GNTSI.002,4,hdd,500,15.6,18990,intel,pentium,1.10,intel,,windows,,
73,Acer,Aspire ES1-523,4,hdd,1024,15.6,19465,amd,A4-7210,1.80,amd,,linux,2.4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,Microsoft,Surface Book CR9-00013,8,ssd,128,13.5,178799,intel,i5,1.80,intel,,windows,1.5,
31,Acer,Predator 17,16,ssd,256,17.3,178912,intel,i7,2.60,nvidia,,windows,4.2,Integrated Graphics
96,Alienware,AW13R3-7000SLV-PUS,8,ssd,256,13.3,190256,intel,i7,3.00,nvidia,6.0,windows,2.6,13.3 inch FHD (1920 x 1080) IPS Anti-Glare 300...
90,Alienware,15 Notebook,16,hdd,1024,15.6,199000,intel,i7,2.60,nvidia,8.0,windows,3.5,Maximum Display Resolution : 1920 x 1080 pixel


In [172]:
laptops_df.sort_values(by='price', ascending=False)

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
5,Apple,MacBook Pro (TouchBar),16,ssd,512,15.0,226000,intel,i7,2.70,intel,2.0,mac,2.5,
90,Alienware,15 Notebook,16,hdd,1024,15.6,199000,intel,i7,2.60,nvidia,8.0,windows,3.5,Maximum Display Resolution : 1920 x 1080 pixel
96,Alienware,AW13R3-7000SLV-PUS,8,ssd,256,13.3,190256,intel,i7,3.00,nvidia,6.0,windows,2.6,13.3 inch FHD (1920 x 1080) IPS Anti-Glare 300...
31,Acer,Predator 17,16,ssd,256,17.3,178912,intel,i7,2.60,nvidia,,windows,4.2,Integrated Graphics
154,Microsoft,Surface Book CR9-00013,8,ssd,128,13.5,178799,intel,i5,1.80,intel,,windows,1.5,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,Acer,Aspire ES1-523,4,hdd,1024,15.6,19465,amd,A4-7210,1.80,amd,,linux,2.4,
100,Acer,A315-31-P4CRUN.GNTSI.002,4,hdd,500,15.6,18990,intel,pentium,1.10,intel,,windows,,
108,Acer,Aspire ES-15 NX.GKYSI.010,4,hdd,500,15.6,17990,amd,A4-7210,1.80,amd,,windows,2.4,
83,Acer,A315-31CDC UN.GNTSI.001,2,ssd,500,15.6,17990,intel,Celeron,1.10,intel,,windows,2.1,


In [173]:
laptops_df

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
0,Dell,Inspiron 15-3567,4,hdd,1024,15.6,40000,intel,i5,2.5,intel,,linux,2.50,
1,Apple,MacBook Air,8,ssd,128,13.3,55499,intel,i5,1.8,intel,2.0,mac,1.35,
2,Apple,MacBook Air,8,ssd,256,13.3,71500,intel,i5,1.8,intel,2.0,mac,1.35,
3,Apple,MacBook Pro,8,ssd,128,13.3,96890,intel,i5,2.3,intel,2.0,mac,3.02,
4,Apple,MacBook Pro,8,ssd,256,13.3,112666,intel,i5,2.3,intel,2.0,mac,3.02,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,Asus,A555LF,8,hdd,1024,15.6,39961,intel,i3 4th gen,1.7,nvidia,2.0,windows,2.30,
163,Asus,X555LA-XX172D,4,hdd,500,15.6,28489,intel,i3 4th gen,1.9,intel,,linux,2.30,
164,Asus,X554LD,2,hdd,500,15.6,29199,intel,i3 4th gen,1.9,intel,1.0,linux,2.30,
165,Asus,X550LAV-XX771D,2,hdd,500,15.6,29990,intel,i3 4th gen,1.7,intel,,linux,2.50,


In [175]:
laptops_df.sort_values(by='price', ascending=False, inplace=True)
laptops_df

Unnamed: 0,brand,model,ram,hd_type,hd_size,screen_size,price,processor_brand,processor_model,clock_speed,graphic_card_brand,graphic_card_size,os,weight,comments
5,Apple,MacBook Pro (TouchBar),16,ssd,512,15.0,226000,intel,i7,2.70,intel,2.0,mac,2.5,
90,Alienware,15 Notebook,16,hdd,1024,15.6,199000,intel,i7,2.60,nvidia,8.0,windows,3.5,Maximum Display Resolution : 1920 x 1080 pixel
96,Alienware,AW13R3-7000SLV-PUS,8,ssd,256,13.3,190256,intel,i7,3.00,nvidia,6.0,windows,2.6,13.3 inch FHD (1920 x 1080) IPS Anti-Glare 300...
31,Acer,Predator 17,16,ssd,256,17.3,178912,intel,i7,2.60,nvidia,,windows,4.2,Integrated Graphics
154,Microsoft,Surface Book CR9-00013,8,ssd,128,13.5,178799,intel,i5,1.80,intel,,windows,1.5,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,Acer,Aspire ES1-523,4,hdd,1024,15.6,19465,amd,A4-7210,1.80,amd,,linux,2.4,
100,Acer,A315-31-P4CRUN.GNTSI.002,4,hdd,500,15.6,18990,intel,pentium,1.10,intel,,windows,,
108,Acer,Aspire ES-15 NX.GKYSI.010,4,hdd,500,15.6,17990,amd,A4-7210,1.80,amd,,windows,2.4,
83,Acer,A315-31CDC UN.GNTSI.001,2,ssd,500,15.6,17990,intel,Celeron,1.10,intel,,windows,2.1,


In [179]:
laptops_df = pd.read_csv("data/laptops.csv")
laptops_df['brand']

0       Dell
1      Apple
2      Apple
3      Apple
4      Apple
       ...  
162     Asus
163     Asus
164     Asus
165     Asus
166     Asus
Name: brand, Length: 167, dtype: object

In [180]:
laptops_df['brand'].unique()

array(['Dell', 'Apple', 'Acer', 'HP', 'Lenovo', 'Alienware', 'Microsoft',
       'Asus'], dtype=object)

In [181]:
laptops_df['brand'].value_counts()

HP           55
Acer         35
Dell         31
Lenovo       18
Asus          9
Apple         7
Microsoft     6
Alienware     6
Name: brand, dtype: int64

In [182]:
laptops_df['brand'].describe()

count     167
unique      8
top        HP
freq       55
Name: brand, dtype: object

In [184]:
cities_df = pd.read_csv("data/world_cities.csv", index_col = 0)
cities_df

Unnamed: 0,City / Urban area,Country,Population,Land area (in sqKm)
0,Buenos Aires,Argentina,11200000,2266
1,Melbourne,Australia,3162000,2080
2,Sydney,Australia,3502000,1687
3,Brisbane,Australia,1508000,1603
4,Perth,Australia,1177000,964
...,...,...,...,...
245,Canton,USA,267000,372
246,Spokane,USA,335000,371
247,Tashkent,Uzbekistan,2200000,531
248,Ho Chi Minh City,Vietnam,4900000,518


In [185]:
cities_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 249 entries, 0 to 249
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   City / Urban area    249 non-null    object
 1   Country              249 non-null    object
 2   Population           249 non-null    int64 
 3   Land area (in sqKm)  249 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 7.8+ KB


In [186]:
cities_df.describe()

Unnamed: 0,Population,Land area (in sqKm)
count,249.0,249.0
mean,3006538.0,1035.212851
std,4136435.0,1033.487331
min,181000.0,365.0
25%,598000.0,511.0
50%,1500000.0,684.0
75%,3502000.0,1116.0
max,33200000.0,8683.0


In [187]:
cities_df['City / Urban area'].unique()

array(['Buenos Aires', 'Melbourne', 'Sydney', 'Brisbane', 'Perth',
       'Adelaide', 'Gold Coast', 'Vienna', 'Baku/Sumqayit', 'Brussels',
       'Antwerp', 'Sao Paulo', 'Rio de Janeiro', 'Belo Horizonte',
       'Curitiba', 'Brasilia', 'Fortaleza', 'Porto Alegre', 'Campinas',
       'Goiania', 'Recife', 'Phnom Penh', 'Montreal.', 'Toronto',
       'Vancouver', 'Edmonton', 'Calgary', 'Quebec', 'Ottawa/Hull',
       'Winnipeg', 'St. Catharines', 'Santiago', 'Beijing', 'Shanghai',
       'Shenzhen', 'Shenyang', 'Tianjin', 'Dalian', 'Bogota', 'Kinshasa',
       'Lumumbashi', 'Copenhagen', 'Quito', 'Cairo', 'Helsinki', 'Paris',
       'Marseille', 'Bordeaux', 'Lyon', 'Toulouse', 'Nice', 'Toulon',
       'Avignon', 'Valenciennes', 'Douai/Lens', 'Nantes', 'Lille', 'Pau',
       'Tours', 'Bethune', 'Essen/Düsseldorf', 'Berlin', 'Frankfurt',
       'Hamburg', 'Cologne/Bonn', 'Munich', 'Stuttgart', 'Aachen',
       'Accra', 'Athens', 'Budapest', 'Delhi', 'Hyderabad', 'Bangalore',
       'Kolkat

In [188]:
cities_df['City / Urban area'].value_counts()

New York Metro         1
Toledo                 1
Arabia                 1
Fayetteville           1
Abu Dhabi              1
                      ..
Budapest               1
Baltimore              1
Allentown/Bethlehem    1
Lille                  1
Lyon                   1
Name: City / Urban area, Length: 249, dtype: int64

In [189]:
cities_df['City / Urban area'].describe()

count                249
unique               249
top       New York Metro
freq                   1
Name: City / Urban area, dtype: object

In [208]:
cities_df['Country'].value_counts()

USA         105
France       15
Brazil       10
Canada        9
Germany       8
           ... 
Ukraine       1
Lebanon       1
Zimbabwe      1
Sweden        1
Sudan         1
Name: Country, Length: 61, dtype: int64

In [202]:
# 'City / Urban area', 'Country', 'Population', 'Land area (in sqKm)'
cities_df.loc[(cities_df['Population'] / cities_df['Land area (in sqKm)']) > 10000].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19 entries, 32 to 129
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   City / Urban area    19 non-null     object
 1   Country              19 non-null     object
 2   Population           19 non-null     int64 
 3   Land area (in sqKm)  19 non-null     int64 
dtypes: int64(2), object(2)
memory usage: 608.0+ bytes


In [205]:
cities_df['Density'] = cities_df['Population'] / cities_df['Land area (in sqKm)']
cities_df.head(10)

Unnamed: 0,City / Urban area,Country,Population,Land area (in sqKm),Density
0,Buenos Aires,Argentina,11200000,2266,4942.630185
1,Melbourne,Australia,3162000,2080,1520.192308
2,Sydney,Australia,3502000,1687,2075.874333
3,Brisbane,Australia,1508000,1603,940.73612
4,Perth,Australia,1177000,964,1220.954357
5,Adelaide,Australia,1002000,729,1374.485597
6,Gold Coast,Australia,422000,383,1101.827676
7,Vienna,Austria,1550000,453,3421.633554
8,Baku/Sumqayit,Azerbaijan,2100000,544,3860.294118
9,Brussels,Belgium,1570000,712,2205.05618


In [206]:
cities_df[cities_df['Density'] > 10000]

Unnamed: 0,City / Urban area,Country,Population,Land area (in sqKm),Density
32,Beijing,China,8614000,748,11516.042781
33,Shanghai,China,10000000,746,13404.825737
34,Shenzhen,China,8000000,466,17167.381974
36,Tianjin,China,4750000,453,10485.651214
38,Bogota,Colombia,7000000,518,13513.513514
39,Kinshasa,Congo,5000000,469,10660.98081
71,Delhi,India,14300000,1295,11042.471042
73,Bangalore,India,5400000,534,10112.359551
74,Kolkata,India,12700000,531,23917.137476
75,Mumbai,India,14350000,484,29648.760331


In [207]:
cities_df.sort_values(by='Density', ascending=False)

Unnamed: 0,City / Urban area,Country,Population,Land area (in sqKm),Density
75,Mumbai,India,14350000,484,29648.760331
74,Kolkata,India,12700000,531,23917.137476
101,Karachi,Pakistan,9800000,518,18918.918919
99,Lagos,Nigeria,13400000,738,18157.181572
34,Shenzhen,China,8000000,466,17167.381974
...,...,...,...,...,...
195,Chattanooga,USA,344000,751,458.055925
224,Asheville,USA,222000,536,414.179104
57,Pau,France,181000,450,402.222222
221,Hickory,USA,188000,546,344.322344


In [221]:
city_cnt = cities_df['Country'].value_counts()
city_cnt

USA         105
France       15
Brazil       10
Canada        9
Germany       8
           ... 
Ukraine       1
Lebanon       1
Zimbabwe      1
Sweden        1
Sudan         1
Name: Country, Length: 61, dtype: int64

In [222]:
city_cnt[city_cnt == 4]

Italy    4
Name: Country, dtype: int64