## **Introduction to Pandas**

In [1]:
import pandas as pd

### **Data Structure**

In [2]:
s = pd.Series([3, -5, 7, 4])
s 

0    3
1   -5
2    7
3    4
dtype: int64

In [3]:
data = {
    'Name': ['Anh', 'Thai', 'Hoa'],
    'Country': ['Brazil', 'Vietnam', 'Vietnam'],
    'Grade': [7.0, 8.0, 9.0]
}
df = pd.DataFrame(data, columns=['Name', 'Country', 'Grade'])
df 

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [4]:
df[1:]

Unnamed: 0,Name,Country,Grade
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [5]:
df.iloc[0]

Name          Anh
Country    Brazil
Grade         7.0
Name: 0, dtype: object

In [8]:
df.iloc[0, 0] 

'Anh'

In [9]:
df.loc[0, 'Name']

'Anh'

In [10]:
df.loc[0, ['Name', 'Grade']]

Name     Anh
Grade    7.0
Name: 0, dtype: object

In [11]:
df.head(2)

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0


In [12]:
df.tail(2)

Unnamed: 0,Name,Country,Grade
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [15]:
df.sample(frac=0.3)

Unnamed: 0,Name,Country,Grade
1,Thai,Vietnam,8.0


In [16]:
df.nlargest(n=2, columns='Grade')

Unnamed: 0,Name,Country,Grade
2,Hoa,Vietnam,9.0
1,Thai,Vietnam,8.0


In [17]:
df.nsmallest(n=2, columns='Grade')

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0


In [18]:
df['Name']

0     Anh
1    Thai
2     Hoa
Name: Name, dtype: object

In [19]:
df.Grade 

0    7.0
1    8.0
2    9.0
Name: Grade, dtype: float64

### **Drop**

In [20]:
df 

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [21]:
df.drop([0, 2])

Unnamed: 0,Name,Country,Grade
1,Thai,Vietnam,8.0


In [22]:
df.drop('Name', axis=1)

Unnamed: 0,Country,Grade
0,Brazil,7.0
1,Vietnam,8.0
2,Vietnam,9.0


### **Sorting**

In [23]:
df.sort_values('Name')

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
2,Hoa,Vietnam,9.0
1,Thai,Vietnam,8.0


In [24]:
df.sort_index(axis=1)

Unnamed: 0,Country,Grade,Name
0,Brazil,7.0,Anh
1,Vietnam,8.0,Thai
2,Vietnam,9.0,Hoa


### **Apply Function**

In [25]:
f = lambda x : x*2
df.apply(f)

Unnamed: 0,Name,Country,Grade
0,AnhAnh,BrazilBrazil,14.0
1,ThaiThai,VietnamVietnam,16.0
2,HoaHoa,VietnamVietnam,18.0


In [26]:
f = lambda x : x*2 
df.map(f)

Unnamed: 0,Name,Country,Grade
0,AnhAnh,BrazilBrazil,14.0
1,ThaiThai,VietnamVietnam,16.0
2,HoaHoa,VietnamVietnam,18.0


In [27]:
f = lambda x : x*2 
df.map(f)

Unnamed: 0,Name,Country,Grade
0,AnhAnh,BrazilBrazil,14.0
1,ThaiThai,VietnamVietnam,16.0
2,HoaHoa,VietnamVietnam,18.0


In [30]:
f = lambda x : [1, 2, 3, 4]
df.apply(f)

Unnamed: 0,Name,Country,Grade
0,1,1,1
1,2,2,2
2,3,3,3
3,4,4,4


In [31]:
f = lambda x : [1, 2, 3]
df.map(f)

Unnamed: 0,Name,Country,Grade
0,"[1, 2, 3]","[1, 2, 3]","[1, 2, 3]"
1,"[1, 2, 3]","[1, 2, 3]","[1, 2, 3]"
2,"[1, 2, 3]","[1, 2, 3]","[1, 2, 3]"


### **Renaming**

In [32]:
df.rename(columns={'Grade': 'Score'})

Unnamed: 0,Name,Country,Score
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


### **Reindexing**

In [36]:
new_df = df.set_index('Grade')

In [39]:
new_df

Unnamed: 0_level_0,Name,Country
Grade,Unnamed: 1_level_1,Unnamed: 2_level_1
7.0,Anh,Brazil
8.0,Thai,Vietnam
9.0,Hoa,Vietnam


In [41]:
new_df.loc[7.0]

Name          Anh
Country    Brazil
Name: 7.0, dtype: object

In [43]:
new_df = df.set_index('Name')

In [44]:
new_df

Unnamed: 0_level_0,Country,Grade
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Anh,Brazil,7.0
Thai,Vietnam,8.0
Hoa,Vietnam,9.0


In [45]:
new_df.loc['Anh']

Country    Brazil
Grade         7.0
Name: Anh, dtype: object

### **Replacing**

In [46]:
df 

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [47]:
df.replace({'Vietnam': 'VN'})

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,VN,8.0
2,Hoa,VN,9.0


In [50]:
new_df = df.replace({7.0: 8.0})
new_df

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,8.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


### **Group Data**

In [51]:
new_df.groupby(by='Grade').min()

Unnamed: 0_level_0,Name,Country
Grade,Unnamed: 1_level_1,Unnamed: 2_level_1
8.0,Anh,Brazil
9.0,Hoa,Vietnam


### **Rolling**

In [52]:
df['Grade'].rolling(3)

Rolling [window=3,center=False,axis=0,method=single]

In [54]:
df['Grade'].rolling(3, center=True).sum()

0     NaN
1    24.0
2     NaN
Name: Grade, dtype: float64

: 