# DataFrames

In [1]:
import pandas as pd
import numpy as np

In [2]:
columns = ['w', 'x', 'y', 'z']
index = ['a', 'b', 'c', 'd']
data = np.arange(16).reshape((4, 4))
data

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [3]:
df = pd.DataFrame(data=data, index=index, columns=columns)
df 

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


## Columns

In [4]:
print(df['w'])
type(df['w'])

a     0
b     4
c     8
d    12
Name: w, dtype: int32


pandas.core.series.Series

In [5]:
df[['w', 'x']]

Unnamed: 0,w,x
a,0,1
b,4,5
c,8,9
d,12,13


### Create new columns 

In [6]:
print(df)
df['new'] = df['w'] + df['x']
print(df)

    w   x   y   z
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15
    w   x   y   z  new
a   0   1   2   3    1
b   4   5   6   7    9
c   8   9  10  11   17
d  12  13  14  15   25


### Removing columns

In [7]:
df.drop('new', axis=1, inplace=True)

In [8]:
df

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


## Rows

In [9]:
df

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [10]:
df.loc['a']

w    0
x    1
y    2
z    3
Name: a, dtype: int32

In [11]:
df.loc[['a', 'b']]

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7


In [12]:
print(df)
df.iloc[0]

    w   x   y   z
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


w    0
x    1
y    2
z    3
Name: a, dtype: int32

### Removing rows

In [13]:
df

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [14]:
df.drop('c', axis=0)

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
d,12,13,14,15


In [15]:
df

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [16]:
# [[10, 11], [14, 14]]
df.loc[['c', 'd'], ['y', 'z']]

Unnamed: 0,y,z
c,10,11
d,14,15


## Conditional selection

In [17]:
df

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [18]:
df >= 10

Unnamed: 0,w,x,y,z
a,False,False,False,False
b,False,False,False,False
c,False,False,True,True
d,True,True,True,True


In [19]:
df['x'] >= 10

a    False
b    False
c    False
d     True
Name: x, dtype: bool

## Reset Indexing

In [27]:
df

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [28]:
df.reset_index()

Unnamed: 0,index,w,x,y,z
0,a,0,1,2,3
1,b,4,5,6,7
2,c,8,9,10,11
3,d,12,13,14,15


In [30]:
df

Unnamed: 0,w,x,y,z
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [31]:
list_index = ['aa', 'bb', 'cc', 'dd']
df['new_index'] = list_index
df

Unnamed: 0,w,x,y,z,new_index
a,0,1,2,3,aa
b,4,5,6,7,bb
c,8,9,10,11,cc
d,12,13,14,15,dd


In [34]:
df.set_index('new_index', inplace=True)

In [35]:
df

Unnamed: 0_level_0,w,x,y,z
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aa,0,1,2,3
bb,4,5,6,7
cc,8,9,10,11
dd,12,13,14,15


## DataFrame summaries

In [36]:
df.describe()

Unnamed: 0,w,x,y,z
count,4.0,4.0,4.0,4.0
mean,6.0,7.0,8.0,9.0
std,5.163978,5.163978,5.163978,5.163978
min,0.0,1.0,2.0,3.0
25%,3.0,4.0,5.0,6.0
50%,6.0,7.0,8.0,9.0
75%,9.0,10.0,11.0,12.0
max,12.0,13.0,14.0,15.0


In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, aa to dd
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   w       4 non-null      int32
 1   x       4 non-null      int32
 2   y       4 non-null      int32
 3   z       4 non-null      int32
dtypes: int32(4)
memory usage: 96.0+ bytes


##### تمامی حقوق برای رضا علوی محفوظ است