In [1]:
import numpy as np
import pandas as pd

In [2]:
data = [[1.4, np.nan],
           [7.1, -4.5],
        [np.nan, np.nan],
        [0.75, -1.3]]
df = pd.DataFrame(data, columns=["one", "two"], index=["a", "b", "c", "d"])

In [3]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [4]:
df.sum(axis=0)

one    9.25
two   -5.80
dtype: float64

In [5]:
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [6]:
df["one"].sum()

9.25

In [7]:
df.loc["d"].sum()

-0.55

In [8]:
df.mean(axis=1,skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [9]:
one_mean = df.mean(axis =0)["one"]

In [10]:
two_min = df.min(axis =0)["two"]

In [11]:
df["one"] = df["one"].fillna(value = one_mean)

In [12]:
df["two"] = df["two"].fillna(value=two_min)

In [13]:
df

Unnamed: 0,one,two
a,1.4,-4.5
b,7.1,-4.5
c,3.083333,-4.5
d,0.75,-1.3


In [14]:
df2 = pd.DataFrame(np.random.randn(6, 4),
                   columns=["A", "B", "C", "D"],
                   index=pd.date_range("20160701", periods=6))

In [15]:
df2

Unnamed: 0,A,B,C,D
2016-07-01,-1.438099,-1.792523,0.059451,-0.183934
2016-07-02,-0.146297,0.82506,-1.185762,0.426656
2016-07-03,-0.286706,-2.025417,-1.723997,-0.35229
2016-07-04,0.310562,0.161367,-0.626619,0.575863
2016-07-05,-0.85071,0.146366,0.12678,0.275367
2016-07-06,1.007167,-2.006855,0.007875,0.675303


In [17]:
df2["A"].corr(df2["B"]) #A열 과 B여의 상관계수 값 계산

-0.028029599359564054

In [18]:
df2["A"].cov(df2["C"]) #공분산 계산

-0.09888133678780558

In [19]:
df2.corr() #모든 열간의 상관계쑤

Unnamed: 0,A,B,C,D
A,1.0,-0.02803,-0.150646,0.704556
B,-0.02803,1.0,-0.061505,0.474243
C,-0.150646,-0.061505,1.0,0.350557
D,0.704556,0.474243,0.350557,1.0


In [20]:
df2.cov() #모든 열간의 공분산

Unnamed: 0,A,B,C,D
A,0.73554,-0.031162,-0.098881,0.251883
B,-0.031162,1.680392,-0.06102,0.256264
C,-0.098881,-0.06102,0.585744,0.111839
D,0.251883,0.256264,0.111839,0.173765


In [21]:
dates = df2.index
random_dates = np.random.permutation(dates)
df2 = df2.reindex(index=random_dates, columns=["D", "B", "C", "A"])

In [22]:
df2 #index기준 정렬 방법들

Unnamed: 0,D,B,C,A
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706
2016-07-02,0.426656,0.82506,-1.185762,-0.146297
2016-07-04,0.575863,0.161367,-0.626619,0.310562
2016-07-06,0.675303,-2.006855,0.007875,1.007167
2016-07-05,0.275367,0.146366,0.12678,-0.85071


In [23]:
df2.sort_index(axis=0)

Unnamed: 0,D,B,C,A
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099
2016-07-02,0.426656,0.82506,-1.185762,-0.146297
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706
2016-07-04,0.575863,0.161367,-0.626619,0.310562
2016-07-05,0.275367,0.146366,0.12678,-0.85071
2016-07-06,0.675303,-2.006855,0.007875,1.007167


In [26]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
2016-07-01,-1.438099,-1.792523,0.059451,-0.183934
2016-07-03,-0.286706,-2.025417,-1.723997,-0.35229
2016-07-02,-0.146297,0.82506,-1.185762,0.426656
2016-07-04,0.310562,0.161367,-0.626619,0.575863
2016-07-06,1.007167,-2.006855,0.007875,0.675303
2016-07-05,-0.85071,0.146366,0.12678,0.275367


In [29]:
df2.sort_index(axis=0, ascending=False)

Unnamed: 0,D,B,C,A
2016-07-06,0.675303,-2.006855,0.007875,1.007167
2016-07-05,0.275367,0.146366,0.12678,-0.85071
2016-07-04,0.575863,0.161367,-0.626619,0.310562
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706
2016-07-02,0.426656,0.82506,-1.185762,-0.146297
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099


In [30]:
df2.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2016-07-01,-0.183934,0.059451,-1.792523,-1.438099
2016-07-03,-0.35229,-1.723997,-2.025417,-0.286706
2016-07-02,0.426656,-1.185762,0.82506,-0.146297
2016-07-04,0.575863,-0.626619,0.161367,0.310562
2016-07-06,0.675303,0.007875,-2.006855,1.007167
2016-07-05,0.275367,0.12678,0.146366,-0.85071


In [31]:
df2 #값기준 정렬 방법

Unnamed: 0,D,B,C,A
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706
2016-07-02,0.426656,0.82506,-1.185762,-0.146297
2016-07-04,0.575863,0.161367,-0.626619,0.310562
2016-07-06,0.675303,-2.006855,0.007875,1.007167
2016-07-05,0.275367,0.146366,0.12678,-0.85071


In [32]:
df2.sort_values(by="D")

Unnamed: 0,D,B,C,A
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099
2016-07-05,0.275367,0.146366,0.12678,-0.85071
2016-07-02,0.426656,0.82506,-1.185762,-0.146297
2016-07-04,0.575863,0.161367,-0.626619,0.310562
2016-07-06,0.675303,-2.006855,0.007875,1.007167


In [33]:
df2.sort_values(by="D", ascending=False)

Unnamed: 0,D,B,C,A
2016-07-06,0.675303,-2.006855,0.007875,1.007167
2016-07-04,0.575863,0.161367,-0.626619,0.310562
2016-07-02,0.426656,0.82506,-1.185762,-0.146297
2016-07-05,0.275367,0.146366,0.12678,-0.85071
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706


In [34]:
df2["E"] = np.random.randint(0, 6, size=6)
df2["F"] = ["alpha", "beta", "gamma", "gamma", "alpha", "gamma"]

In [35]:
df2

Unnamed: 0,D,B,C,A,E,F
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099,1,alpha
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706,3,beta
2016-07-02,0.426656,0.82506,-1.185762,-0.146297,4,gamma
2016-07-04,0.575863,0.161367,-0.626619,0.310562,1,gamma
2016-07-06,0.675303,-2.006855,0.007875,1.007167,0,alpha
2016-07-05,0.275367,0.146366,0.12678,-0.85071,3,gamma


In [37]:
df2.sort_values(by=["E","F"])

Unnamed: 0,D,B,C,A,E,F
2016-07-06,0.675303,-2.006855,0.007875,1.007167,0,alpha
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099,1,alpha
2016-07-04,0.575863,0.161367,-0.626619,0.310562,1,gamma
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706,3,beta
2016-07-05,0.275367,0.146366,0.12678,-0.85071,3,gamma
2016-07-02,0.426656,0.82506,-1.185762,-0.146297,4,gamma


In [38]:
df2["F"].unique()

array(['alpha', 'beta', 'gamma'], dtype=object)

In [40]:
df2["F"].value_counts()

gamma    3
alpha    2
beta     1
Name: F, dtype: int64

In [42]:
df2["F"].isin(["alpha","beta"])

2016-07-01     True
2016-07-03     True
2016-07-02    False
2016-07-04    False
2016-07-06     True
2016-07-05    False
Name: F, dtype: bool

In [43]:
df2.loc[df2["F"].isin(["alpha","beta"])]

Unnamed: 0,D,B,C,A,E,F
2016-07-01,-0.183934,-1.792523,0.059451,-1.438099,1,alpha
2016-07-03,-0.35229,-2.025417,-1.723997,-0.286706,3,beta
2016-07-06,0.675303,-2.006855,0.007875,1.007167,0,alpha


In [49]:
df3 = pd.DataFrame(np.random.randn(4, 3), columns=["b", "d", "e"],
                   index=["Seoul", "Incheon", "Busan", "Daegu"])

In [50]:
df3

Unnamed: 0,b,d,e
Seoul,1.655895,0.389432,-1.363818
Incheon,1.344993,-0.751464,0.619855
Busan,-0.298493,0.10808,0.121306
Daegu,-1.377058,-1.060205,0.533904


In [45]:
func = lambda x: x.max()-x.min()

In [48]:
df3.apply(func,axis=0)

b    1.997035
d    3.267423
e    1.729992
dtype: float64

In [51]:
df3.apply(func, axis=1)

Seoul      3.019713
Incheon    2.096457
Busan      0.419799
Daegu      1.910962
dtype: float64