In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
# 1. 2차원 표
df = pd.DataFrame(np.arange(20).reshape(4,5),
                  index=list('abcd'), columns=list('vwxyz'))
df

Unnamed: 0,v,w,x,y,z
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19


## 데이터프레임 생성

In [5]:
# 2. Dicionaty list
dl = [
      {'name':'James', 'age':24, 'job':'student'},
      {'name':'Maria', 'age':36, 'job':'teacher'},
      {'name':'Brian', 'age':30, 'job':'programmer'}
]
df = pd.DataFrame(dl)
df

Unnamed: 0,name,age,job
0,James,24,student
1,Maria,36,teacher
2,Brian,30,programmer


In [7]:
# 3. 여러개의 리스트, 위의 방법보다 더 많이 사용
names = ['James', 'Maria', 'Brian']
ages = [24, 36, 30]
jobs = ['student', 'teacher', 'programmer']
df = pd.DataFrame({
    'name' : names,
    'age' : ages,
    'job'  :jobs
})
df

Unnamed: 0,name,age,job
0,James,24,student
1,Maria,36,teacher
2,Brian,30,programmer


## 데이터프레임 생성 - 응용

In [9]:
iris = sns.load_dataset('iris')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [10]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [12]:
iris.species.value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

In [33]:
np.mean(iris[iris.species=='setosa']['sepal_length'])

5.005999999999999

In [39]:
species_list, feature_list, mean_list, std_list = [], [], [], []

for i in ['setosa', 'versicolor', 'virginica']:
    for j in iris.columns[:-1]:
        species_list.append(i)
        feature_list.append(j)
        mean_list.append(np.mean(iris[iris.species==i][j]))
        std_list.append(np.std(iris[iris.species==i][j]))

df = pd.DataFrame({
    'species' : species_list,
    'feature' : feature_list,
    'mean' : np.round(mean_list, 2),
    'std' : np.round(std_list, 2)
})
df

Unnamed: 0,species,feature,mean,std
0,setosa,sepal_length,5.01,0.35
1,setosa,sepal_width,3.43,0.38
2,setosa,petal_length,1.46,0.17
3,setosa,petal_width,0.25,0.1
4,versicolor,sepal_length,5.94,0.51
5,versicolor,sepal_width,2.77,0.31
6,versicolor,petal_length,4.26,0.47
7,versicolor,petal_width,1.33,0.2
8,virginica,sepal_length,6.59,0.63
9,virginica,sepal_width,2.97,0.32


In [34]:
# 강사님 풀이
iris.species.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [37]:
species_list, feature_list, mean_list, std_list = [], [], [], []

for species in iris.species.unique():
    for feature in iris.columns[:-1]:
        s = iris[iris.species == species][feature]
        species_list.append(species)
        feature_list.append(feature)
        mean_list.append(np.round(s.mean(),2))
        std_list.append(np.round(s.std(),4))

In [38]:
df = pd.DataFrame({
    'species' : species_list,
    'feature' : feature_list,
    'mean' : mean_list,
    'std' : std_list
})
df

Unnamed: 0,species,feature,mean,std
0,setosa,sepal_length,5.01,0.3525
1,setosa,sepal_width,3.43,0.3791
2,setosa,petal_length,1.46,0.1737
3,setosa,petal_width,0.25,0.1054
4,versicolor,sepal_length,5.94,0.5162
5,versicolor,sepal_width,2.77,0.3138
6,versicolor,petal_length,4.26,0.4699
7,versicolor,petal_width,1.33,0.1978
8,virginica,sepal_length,6.59,0.6359
9,virginica,sepal_width,2.97,0.3225


In [41]:
df.set_index(['species', 'feature'], inplace=True)

In [42]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
species,feature,Unnamed: 2_level_1,Unnamed: 3_level_1
setosa,sepal_length,5.01,0.35
setosa,sepal_width,3.43,0.38
setosa,petal_length,1.46,0.17
setosa,petal_width,0.25,0.1
versicolor,sepal_length,5.94,0.51
versicolor,sepal_width,2.77,0.31
versicolor,petal_length,4.26,0.47
versicolor,petal_width,1.33,0.2
virginica,sepal_length,6.59,0.63
virginica,sepal_width,2.97,0.32
