In [1]:
import pandas as pd

# 一、对象的创建

### 1.1 一维对象的创建

In [None]:
# 字典创建法
dict_v={"a":0,"b":0.25,"c":0.5,"d":0.75,"e":1}
print(dict_v)
sr = pd.Series(dict_v)                           # 用字典创建对象
print(sr)                                        # 纯文本形式，直接打印字符串表示，没有表格样式。
sr

In [None]:
# 通过字典将多个Series组合成一个Dataframe，同样也是字典创建法
i = ["1号","2号","3号","4号"]
v1 = [53,34,24,39]
v2 = ["female","male","male","female"]
sr1 = pd.Series(v1,index=i)
sr2 = pd.Series(v2,index=i)
df = pd.DataFrame({"age":sr1,"gender":sr2})
df

In [None]:
# 数组创建法
import numpy as np
v1 = [1,2,3,4,5]                              # 列表
v2 = np.array([1,2,3,4,5])                    # numpy数组
k = ["a","b","c","d","e"]                     # 定义键值，值可以是（列表，数组或者张量）

sr1 = pd.Series(v1,index=k)                   # 用列表创建对象并指定索引
sr2 = pd.Series(v2)                           # 用数组创建对象并指定索引,index可以省略，省略后从0开始顺序数字

sr1
# sr2                                         # 取消注释查看不同结果

In [None]:
# 利用双线程同时查看结果但是得不到表格......
import threading
import pandas as pd


def list_func():
    v1 = [1, 2, 3, 4, 5]
    k = ["a","b","c","d","e"]
    sr1 = pd.Series(v1,index=k)
    print(sr1)

def np_func():
    v2 = np.array([1, 2, 3, 4, 5])
    sr2 = pd.Series(v2)
    print(sr2)

a = threading.Thread(target=list_func)
a.start()
b = threading.Thread(target=np_func)
b.start()

### 1.2 一维对象的属性（values，index）

In [None]:
# 列表得到的values为数组，同理张量还有数组也是
v = [345,6346,234,465]
k = ["1号","2号","3号","4号"]
sr = pd.Series(v,index=k)       # 列表创建sr
print(sr)                       # 查看序列
print(sr.values)                # 查看values属性
sr.index                        # 查看index属性

### 1.3 二维数组的创建

- 字典创建法

In [None]:
v1 = [53,64,74,42]
i = ["1号","2号","3号","4号"]
sr1 = pd.Series(v1,index=i)
sr1

In [None]:
v2 = ["female","male","male","female"]
i = ["1号","2号","3号","4号"]
sr2 = pd.Series(v2,index=i)
sr2

In [None]:
df=pd.DataFrame({"年龄":sr1,"性别":sr2})   # 创建df对象
df
# 若sr1与sr2的index不完全一致，index取交集，对象会缺失一定数量的值NaN（如把第二个4号改6号再运行）

- 数组创建法

In [None]:
import numpy as np
# 设定键值
v = np.array([[53,"female"],[64,"male"],[73,"male"],[49,"female"]])        # 也可以不用numpy模块（数组把数字转化为了字符串，v为字符串型数组）直接写 v = [[53,"female"],[64,"male"],[73,"male"],[49,"female"]]
i = ["1号","2号","3号","4号"]
c = ["age","gender"]
# 创建数组
df = pd.DataFrame(v,index=i,columns=c)
df

### 1.4 二维对象的属性（values，index，columns）

In [None]:
v = [[53,"female"],[64,"male"],[73,"male"],[49,"female"]]
i = ["1号","2号","3号","4号"]
c = ["age","gender"]
df = pd.DataFrame(v,index=i,columns=c)
print(df.values)                  # 查看values属性
print(df.index)                   # 查看index属性
print(df.columns)                 # 查看columns属性

In [None]:
# 提取values中的第一列
arr = df.values
arr = arr[:,0].astype(int)      # 提取第[0]列，并转化为一个int数组
print(arr)
# 数组只能容纳一种变量类型，所以要astype（int）

# 二、对象的索引
Pandas索引分显式索引和隐式索引。显式用Pandas对象提供的索引，隐式用数组本身自带的从0开始的索引。
若索引为整数，则二者可能混淆，所以有索引器：loc（显式）和 iloc（隐式）

### 2.1 一维对象的索引

In [None]:
# 访问和修改
v = [53,34,24,39]
i = ["1号","2号","3号","4号"]
sr = pd.Series(v,index=i)
print(sr,'\n')
print(sr["3号"],'\n')                # 访问3号元素
print(sr[['1号','3号']],'\n')        # 花式索引
sr['3号'] = 100                      # 修改
print(sr)
# 亦可以写sr.loc["3号"]，输出结果一样(iloc也可）

In [None]:
v = [53,34,24,39]
i = ["1号","2号","3号","4号"]
sr = pd.Series(v,index=i)
print(sr,'\n')
# 访问切片（显示索引涵盖最后一个“2号”，隐式和numpy中的切片规则一样不涵盖
print(sr["1号":"2号"],'\n')

# 切片仅是视图
cut = sr["1号":"2号"]
cut["1号"] = 100
print(sr,'\n')

# 对象赋值仅是绑定
a = sr
a["3号"] = 200
print(sr)
# 亦可以写sr.;loc["1号":"2号"],输出结果一样(iloc也可）

### 2.2 二维对象的索引
在二维对象中不能去掉索引器!!!
- 列索引（columns）是 “显式可访问” 的：
DataFrame 把列名设计成了类似 “属性” 的可直接访问对象，df[col_name] 本质上是对列索引的直接操作。这种设计让列操作更便捷，符合用户 “按列名取数据” 的直觉。
- 行索引（index）是 “隐式需要定位” 的：
行没有类似 “属性” 的直接访问方式，必须通过索引器（.loc/.iloc）明确指定位置。这是因为行的标识（索引）可能是整数、字符串等多种类型，且可能存在重复，需要通过 .loc 明确 “按标签定位行” 的逻辑。

In [None]:
# 二维对象中必须使用索引器，无混淆用.loc
# 制表
i = ["1号","2号","3号","4号"]
v1 = [53,34,24,39]
v2 = ["female","male","male","female"]
sr1 = pd.Series(v1,index=i)
sr2 = pd.Series(v2,index=i)
df = pd.DataFrame({"age":sr1,"gender":sr2})
print(df,'\n')

# 访问元素，必须带loc
print(df.loc["1号","age"],'\n')
print(df.loc[["1号","3号"],["gender","age"]],'\n')

# 修改元素，必须带loc
df.loc["3号","age"] = 100
print(df)

In [None]:
# 二维对象中必须使用索引器，也可以使用.iloc
# 制表
i = ["1号","2号","3号","4号"]
v1 = [53,34,24,39]
v2 = ["female","male","male","female"]
sr1 = pd.Series(v1,index=i)
sr2 = pd.Series(v2,index=i)
df = pd.DataFrame({"age":sr1,"gender":sr2})
print(df,'\n')

# 用隐式索引访问元素，必须带iloc
print(df.iloc[0,1])
print()

print(df.iloc[[0,2],[1,0]])          # iloc[行索引, 列索引]；选取第0、2行，第1、0列。始终保留 DataFrame 结构
print()

# 修改元素
df.iloc[2,0] = 100
print(df)

In [None]:
# 切片和显式索引
v = np.array([[53,"female"],[64,"male"],[73,"male"],[49,"female"]])
i = ["a","b","c","d"]
c = ["age","gender"]
df = pd.DataFrame(v,index=i,columns=c)
print(df,'\n')

# 切片
print(df.loc['a':'b','age'])      # 选取从索引'a'到索引'b'的所有行（包含端点），只选取 'age' 这一列
print()

print(df.loc['c',:])              # 选取索引为 'c' 的行，选取所有列(可以直接写df.loc['c']，隐式也可以，numpy如此)
print()

print(df.loc[:,"age"])            # 表示选取所有行，只选取 'age' 这一列（也可以直接写df["age"]，numpy不行）

In [None]:
# 切片和隐式索引
v = np.array([[53,"female"],[64,"male"],[73,"male"],[49,"female"]])
i = ["a","b","c","d"]
c = ["age","gender"]
df = pd.DataFrame(v,index=i,columns=c)
print(df,'\n')

print(df.iloc[0:3,0])            # 切片
print()
print(df.iloc[:,1])              # 提取第二列的所有元素
print()
print(df.iloc[1,:])              # 选取第二行的所有元素

# 三、对象的变形

### 3.1 对象的转置

In [None]:
# 创建畸形df
v = [[23,24,25,26],["male","female","male","female"]]
i = ["age","gender"]
c = ["a","b","c","d"]
df = pd.DataFrame(v,index=i,columns=c)
print(df,'\n')
# 转置
df = df.T
df

### 3.2 对象的翻转

In [None]:
v = np.array([[53,"female"],[64,"male"],[73,"male"],[49,"female"]])
i = ["a","b","c","d"]
c = ["age","gender"]
df = pd.DataFrame(v,index=i,columns=c)
print(df,'\n')


# 左右翻转
df = df.iloc[ : , : :-1]  # 选取所有行，对列逆序选取。
print(df,'\n')
# 上下翻转
df = df.iloc[ : : -1, : ] # 选取所有列，对行逆序选取。
df

### 3.3 对象的重塑
有关插入新行或新列：
- df.loc[label] 是强制按行标签解析的（无论 label 是否在列名中，都只操作行）
- df[label] 是优先按列名解析的（如果 label 是列名，则返回列；如果不是，可能报错或按其他规则处理）

In [None]:
i = ["a","b","c","d","e"]
v1 = [10,12,14,16,17]
v2 = ['male','female','female','male','female']
v3 = [1,2,3,4,5]
sr1 = pd.Series(v1,index=i)
sr2 = pd.Series(v2,index=i)
sr3 = pd.Series(v3,index=i)
# print(sr1,sr2,sr3)

df = pd.DataFrame({"age":sr1,"gender":sr2})
print(df)                    # 字典法创建df

In [None]:
# 插入第三列名：id
df['id'] = sr3
# 插入新的一行名：f
df.loc["f"] = [20,"female",6]         # 正规写法为：["f",:]
print(df)
# 分离第一列出来
sr4 = df["age"]
#print(sr4)


### 3.4 对象的拼接 pd.concat()函数

In [173]:
# 一维对象的合并
k1 = ["a","b","c"]
k2 = ["d","e"]
v1 = [10,12,14]
v2 = [16,17]
sr1 = pd.Series(v1,index=k1)
sr2 = pd.Series(v2,index=k2)
# 得到两个一维对象
# sr1,sr2
# 合并sr1和sr2
pd.concat([sr1,sr2])

In [None]:
# .index.is_unique方法检查索引是否重复
sr3 = pd.concat([sr1,sr2])
# 查看索引
sr3.index
# 判断索引是否重复，返回True无重复，False有重复
sr3.index.is_unique

In [None]:
# 二维对象的合并
i1 = ['n1','n2','n3','n4']
i2 = ['n1','n2','n3','n4']
i3 = ['n5','n6']
v1 = [[10,"female"],[12,"male"],[14,"female"],[16,"male"]]
v2 = [[1,"y"],[2,"n"],[3,'y'],[4,'n']]
v3 = [[50,"male",5,"y"],[25,"female",6,"y"]]
c1 = ["age","gender"]
c2 = ["id","ikun"]
c3 = ["age","gender","id","ikun"]
df1 = pd.DataFrame(v1,index=i1,columns=c1)
df2 = pd.DataFrame(v2,index=i2,columns=c2)
df3 = pd.DataFrame(v3,index=i3,columns=c3)
print(df1,'\n')
print(df2,'\n')
print(df3)
# 得到三个df

In [None]:
# 合并列对象
df = pd.concat([df1,df2],axis=1)
df

In [None]:
# 合并行对象
df = pd.concat([df,df3],axis=0)        # axis=0可不写，默认行方向先合并
df