# Pandas学习

## 1. 如何将一个列表转换成Pandas数据框

In [1]:
import pandas as pd
my_list = [('join', 25, 'male'), ('lisa', 30, 'female'), ('david', 18, 'male')]
df = pd.DataFrame(my_list,columns=['Name', 'age', 'gender'])

In [2]:
print(df)

    Name  age  gender
0   join   25    male
1   lisa   30  female
2  david   18    male


## 2.如何从一个CSV文件中读取数据到一个Pandas数据框 

In [3]:
pd = pd.read_csv('/home/data/hdfs/anomaly_label.csv', encoding='utf-8')

In [4]:
print(pd)

                         BlockId    Label
0       blk_-1608999687919862906   Normal
1        blk_7503483334202473044   Normal
2       blk_-3544583377289625738  Anomaly
3       blk_-9073992586687739851   Normal
4        blk_7854771516489510256   Normal
...                          ...      ...
575056   blk_1019720114020043203   Normal
575057  blk_-2683116845478050414   Normal
575058   blk_5595059397348477632   Normal
575059   blk_1513937873877967730   Normal
575060  blk_-9128742458709757181  Anomaly

[575061 rows x 2 columns]


## 3. 如何通过pandas创建数据到mysql数据库里面

In [2]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.types import *
df = pd.DataFrame({"班级":["一年级","二年级","三年级","四年级"],
               "男生人数":[25,23,27,30],
               "女生人数":[19,17,20,20]})
engin = create_engine('mysql+mysqlconnector://root:123456@127.0.0.1:3306/test_pandas')
df.to_sql("clsses",engin)

4

## 4. 如何查看一个Pandas数据框的行数和列数

In [4]:
import pandas as pd
df = pd.DataFrame({'A':[1,2,3],'B':[4,5,6],'C':[7,8,9]})
print(df.shape),print(df)

(3, 3)
   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9


(None, None)

## 5.如何查看一个Pandas数据框的列名?

In [1]:
import pandas as pd
data = {'name':['alex','box','chery'],'age':[18,20,12]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age
0,alex,18
1,box,20
2,chery,12


In [2]:
print(df.columns)

Index(['name', 'age'], dtype='object')


## 6. 如何查看一个Pandas数据框的索引？

In [3]:
import pandas as pd
data = {'name':['alex','box','chery'],'age':[18,20,12]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age
0,alex,18
1,box,20
2,chery,12


In [4]:
print(df.index)

RangeIndex(start=0, stop=3, step=1)


## 7. 如何导入Pandas库并查看其版本好

In [7]:
import pandas as pd
print(pd.__version__)

2.1.0


## 8. 如何从CSV文件读取数据并创建一个Pandas数据框

In [2]:
import pandas as pd
df = pd.read_csv('../../log/loglizer/data/HDFS/anomaly_label.csv',encoding='utf-8')
df.head(3)

Unnamed: 0,BlockId,Label
0,blk_-1608999687919862906,Normal
1,blk_7503483334202473044,Normal
2,blk_-3544583377289625738,Anomaly


In [3]:
df.tail(3)

Unnamed: 0,BlockId,Label
575058,blk_5595059397348477632,Normal
575059,blk_1513937873877967730,Normal
575060,blk_-9128742458709757181,Anomaly


## 9. 如何查看一个Pandas数据框的数据类型

In [4]:
import pandas as pd
data = {'name':['alex','bob','chery'],'age':[10,12,13]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age
0,alex,10
1,bob,12
2,chery,13


In [5]:
print(df.dtypes)

name    object
age      int64
dtype: object


## 10.如何查看一个Pandas数据框的数据摘要统计信息?

In [6]:
import pandas as pd
df = pd.DataFrame({'A':[1,2,3,4,5],'B':[2.1,4.2,6.3,8.4,10.5],'C':['a','b','a','b','a']})
df

Unnamed: 0,A,B,C
0,1,2.1,a
1,2,4.2,b
2,3,6.3,a
3,4,8.4,b
4,5,10.5,a


In [7]:
suf = df.describe()
print(suf)

              A          B
count  5.000000   5.000000
mean   3.000000   6.300000
std    1.581139   3.320392
min    1.000000   2.100000
25%    2.000000   4.200000
50%    3.000000   6.300000
75%    4.000000   8.400000
max    5.000000  10.500000


## 11.如何选择一个Pandas数据框的行

In [8]:
import pandas as pd
df = pd.DataFrame({'Name':['Alice','Bob','Charlie'],
                   'Age':[25,30,35],
                   'City':['New York','Paris','London']})
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Paris
2,Charlie,35,London


In [9]:
first_row = df.loc[0]
first_row

Name       Alice
Age           25
City    New York
Name: 0, dtype: object

In [10]:
first_two = df.loc[0:1]
first_two

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Paris


In [14]:
sub = df.loc[[0,2],['Name','Age']]
sub

Unnamed: 0,Name,Age
0,Alice,25
2,Charlie,35


## 12. 如何选择一个Pandas数据框的列

In [15]:
import pandas as pd
df = pd.DataFrame({'Name':['Alice','Bob','Charlie'],
                   'Age':[25,30,35],
                   'City':['New York','Paris','London']})
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Paris
2,Charlie,35,London


In [16]:
df['Name']

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

In [21]:
df[['Name','City']]

Unnamed: 0,Name,City
0,Alice,New York
1,Bob,Paris
2,Charlie,London


In [26]:
df.iloc[:,0:2]

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35


In [None]:
d