In [1]:
import pandas as pd
import numpy as np

In [2]:
df1 = pd.DataFrame(
    {
        "国家": ["中国", "美国", "日本"],
        "地区": ["亚洲", "北美", "亚洲"],
        "人口": [13.97, 3.28, 1.26],
        "GDP": [14.34, 21.43, 5.08],
    }
)
df1

Unnamed: 0,国家,地区,人口,GDP
0,中国,亚洲,13.97,14.34
1,美国,北美,3.28,21.43
2,日本,亚洲,1.26,5.08


In [3]:
print(df1.index)

RangeIndex(start=0, stop=3, step=1)


In [4]:
print(df1.info)

<bound method DataFrame.info of    国家  地区     人口    GDP
0  中国  亚洲  13.97  14.34
1  美国  北美   3.28  21.43
2  日本  亚洲   1.26   5.08>


In [5]:
print(df1['人口'])

0    13.97
1     3.28
2     1.26
Name: 人口, dtype: float64


In [6]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df3 = pd.DataFrame(
    {
        "A": 1.0,  # 全都是1.0
        "B": [1, 2, 3, 4],  # 长度必须和行数一样
        "C": pd.Series(
            1, index=list(range(4)), dtype="float32"
        ),  # 生成一个长度为4的list
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df3

Unnamed: 0,A,B,C,D,E,F
0,1.0,1,1.0,3,test,foo
1,1.0,2,1.0,3,train,foo
2,1.0,3,1.0,3,test,foo
3,1.0,4,1.0,3,train,foo


In [8]:
s = pd.Series([14.34, 21.43, 5.08], name="gdp")

print(type(s))  # pandas.core.series.Series
print(type(df1))  # pandas.core.frame.DataFrame

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>


In [9]:
pd.Series(["a", "b", "c", "d", "e"])
pd.Series(("a", "b", "c", "d", "e"))

0    a
1    b
2    c
3    d
4    e
dtype: object

In [10]:
# 由索引分别为a、b、c、d、e的5个随机浮点数数组组成
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
s

a    0.115401
b   -1.123114
c   -0.287447
d    0.518791
e   -2.505857
dtype: float64

In [11]:
print(s.index)  # 查看索引

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')


In [12]:
s = pd.Series(np.random.randn(5))  # 未指定索引，则默认从0开始递增
s

0   -1.663959
1    0.016996
2   -0.722663
3    0.652031
4    1.880783
dtype: float64

In [13]:
d = {'b': 1, 'a': 0, 'c': 2}
s = pd.Series(d)
s

b    1
a    0
c    2
dtype: int64

In [14]:
# 如果指定索引，则会按索引顺序，如有无法与索引对应的值，会产生缺失值
s = pd.Series(d, index=['b', 'c', 'd', 'a'])
s

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [17]:
ar1 = np.array(['a','b','c'])
ar1

numpy.ndarray

In [16]:
for a in ar1:
    print(a)

a
b
c
