In [1]:
import numpy as np
import pandas as pd

## read files 读取文件

In [2]:
df_csv = pd.read_csv("./data/my_csv.csv")

In [3]:
df_csv

Unnamed: 0,col1,col2,col3,col4,col5
0,2,a,1.4,apple,2020/1/1
1,3,b,3.4,banana,2020/1/2
2,6,c,2.5,orange,2020/1/5
3,5,d,3.2,lemon,2020/1/7


In [4]:
df_txt = pd.read_table("./data/my_table.txt")

In [5]:
df_txt

Unnamed: 0,col1,col2,col3,col4
0,2,a,1.4,apple 2020/1/1
1,3,b,3.4,banana 2020/1/2
2,6,c,2.5,orange 2020/1/5
3,5,d,3.2,lemon 2020/1/7


In [6]:
df_excel = pd.read_excel("./data/my_excel.xlsx")

In [7]:
df_excel

Unnamed: 0,col1,col2,col3,col4,col5
0,2,a,1.4,apple,2020/1/1
1,3,b,3.4,banana,2020/1/2
2,6,c,2.5,orange,2020/1/5
3,5,d,3.2,lemon,2020/1/7


## no header

In [8]:
pd.read_table("./data/my_table.txt", header=None)

Unnamed: 0,0,1,2,3
0,col1,col2,col3,col4
1,2,a,1.4,apple 2020/1/1
2,3,b,3.4,banana 2020/1/2
3,6,c,2.5,orange 2020/1/5
4,5,d,3.2,lemon 2020/1/7


## index_col=[  ]

In [10]:
pd.read_csv("./data/my_csv.csv", index_col=["col1","col5"])

Unnamed: 0_level_0,Unnamed: 1_level_0,col2,col3,col4
col1,col5,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,2020/1/1,a,1.4,apple
3,2020/1/2,b,3.4,banana
6,2020/1/5,c,2.5,orange
5,2020/1/7,d,3.2,lemon


## usecols=[   ]

In [13]:
pd.read_table("./data/my_table.txt",usecols=["col1","col2"])

Unnamed: 0,col1,col2
0,2,a
1,3,b
2,6,c
3,5,d


In [14]:
pd.read_excel("./data/my_excel.xlsx", usecols=["col1"])

Unnamed: 0,col1
0,2
1,3
2,6
3,5


## parse_dates()

In [15]:
pd.read_csv("./data/my_csv.csv", parse_dates=["col5"])

Unnamed: 0,col1,col2,col3,col4,col5
0,2,a,1.4,apple,2020-01-01
1,3,b,3.4,banana,2020-01-02
2,6,c,2.5,orange,2020-01-05
3,5,d,3.2,lemon,2020-01-07


## nrows= a number

In [18]:
pd.read_table("./data/my_table.txt",nrows=3, parse_dates=["col4"])

Unnamed: 0,col1,col2,col3,col4
0,2,a,1.4,apple 2020/1/1
1,3,b,3.4,banana 2020/1/2
2,6,c,2.5,orange 2020/1/5


## sep engine 特殊分隔符， txt文件


In [19]:
pd.read_table("./data/my_table_special_sep.txt")

Unnamed: 0,col1 |||| col2
0,TS |||| This is an apple.
1,GQ |||| My name is Bob.
2,WT |||| Well done!
3,PT |||| May I help you?


In [20]:
pd.read_table("./data/my_table_special_sep.txt", sep="\|\|\|\|", engine="python")

Unnamed: 0,col1,col2
0,TS,This is an apple.
1,GQ,My name is Bob.
2,WT,Well done!
3,PT,May I help you?


# write files 保存数据

In [22]:
df_csv.to_csv("./data/chao/writeFile.csv", index=False)
df_excel.to_excel("./data/chao/writeFile.xlsx", index=False)

## 没有 to_table， 但是可以用 to_csv代替

In [23]:
df_txt.to_csv("./data/chao/table.txt",sep="\t", index=False)

# 基本数据结构

## Series 一维数据

In [2]:
ser1 = pd.Series(data=[26,17,29],index=pd.Index(["Chao","Apurva","Vova"], name="Finland"), dtype="object",name="My first Series")

In [3]:
ser1

Finland
Chao      26
Apurva    17
Vova      29
Name: My first Series, dtype: object

## access attributes

In [4]:
ser1.name

'My first Series'

In [5]:
ser1.index

Index(['Chao', 'Apurva', 'Vova'], dtype='object', name='Finland')

In [6]:
ser1.values

array([26, 17, 29], dtype=object)

In [7]:
ser1.dtype

dtype('O')

In [8]:
ser1.shape

(3,)

In [9]:
ser1.size

3

In [10]:
ser1.ndim

1

# DataFrame 二维数据

In [16]:
data = [[100,200,300],[101,201,301],[102,202,303],[103,203,303],[104,204,304]]
index = pd.Index(["a","b","c","d","e"])
cols=["one hundred","two hundred","three hundred"]

In [17]:
df1 = pd.DataFrame(data=data, index=index, columns=cols)

In [18]:
df1

Unnamed: 0,one hundred,two hundred,three hundred
a,100,200,300
b,101,201,301
c,102,202,303
d,103,203,303
e,104,204,304


## access to df attributes

In [19]:
df1.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [20]:
df1.values

array([[100, 200, 300],
       [101, 201, 301],
       [102, 202, 303],
       [103, 203, 303],
       [104, 204, 304]], dtype=int64)

In [21]:
df1.columns

Index(['one hundred', 'two hundred', 'three hundred'], dtype='object')

In [22]:
df1.shape

(5, 3)

In [23]:
df1.ndim

2

In [24]:
df1.size

15