In [1]:
import pandas as pd
import numpy as np

###### Generate two $3*4$ DataFrames, one $6*2$ and one Series with random numbers and one dataframe from a dictionary


In [2]:
df_1 = pd.DataFrame( np.random.uniform( 0 , 10 , [3 , 4] ),
                    columns=['a','b','c','d'],
                    index=['x0','x1','x2'])


df_2 = pd.DataFrame( np.random.uniform( 0 , 10 , [3 , 4] )
                    )
df_2.index=['x3','x4','x5']
df_2.columns=['a','b','c','d']


df_3 = pd.DataFrame( np.random.uniform( 0 , 10 , [6 , 2] ),
                    columns=['e','f'],
                    index=['y%s'%i for i in range(6)])

df_4 = pd.DataFrame.from_dict( {
    "Mostafa":{"Age":32,"GPA":4, "books":100},
    "Ali":{"Age":33,"GPA":3.9, "books":120},
    "Vahid":{"Age":34,"GPA":3.8, "books":90},
} , orient='index' )


se_1 = pd.Series( np.random.uniform( 0 , 10 , [6 , ] ) 
                 , index=['s%s'%i for i in range(6)] )


print(df_1)
print(df_2)
print(df_3)
print(df_4)
print(se_1)
print(df_1.columns)
print(df_1.index)

           a         b         c         d
x0  2.013590  6.757464  0.537072  6.724690
x1  1.408858  7.810152  0.162720  7.134558
x2  5.323144  1.794908  2.581617  8.326221
           a         b         c         d
x3  4.612637  2.218209  7.060667  5.629964
x4  6.425883  8.063890  0.332589  4.959994
x5  5.583034  9.501964  2.861137  7.760770
           e         f
y0  0.327440  1.331517
y1  6.440302  3.495962
y2  2.060589  0.341290
y3  9.370563  4.007071
y4  2.872266  3.621617
y5  0.876414  7.119172
         Age  GPA  books
Ali       33  3.9    120
Mostafa   32  4.0    100
Vahid     34  3.8     90
s0    9.276246
s1    3.882459
s2    0.972475
s3    7.314831
s4    7.221911
s5    6.823548
dtype: float64
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['x0', 'x1', 'x2'], dtype='object')


#### data indexing 

In [3]:
print(df_4.loc['Mostafa'])

Age       32.0
GPA        4.0
books    100.0
Name: Mostafa, dtype: float64


In [4]:
print(df_4.iloc[1])

Age       32.0
GPA        4.0
books    100.0
Name: Mostafa, dtype: float64


In [5]:
print(df_4['GPA'])

Ali        3.9
Mostafa    4.0
Vahid      3.8
Name: GPA, dtype: float64


In [6]:
print( df_4['GPA']['Ali'] )

3.9


#### reindexing and filling

In [7]:
se_2 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
se_2

0      blue
2    purple
4    yellow
dtype: object

In [8]:
se_2.reindex(range(6))

0      blue
1       NaN
2    purple
3       NaN
4    yellow
5       NaN
dtype: object

In [9]:
se_2.reindex(range(6), fill_value='red')

0      blue
1       red
2    purple
3       red
4    yellow
5       red
dtype: object

In [10]:
se_2.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [11]:
se_3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 3, 4])
se_3

0      blue
3    purple
4    yellow
dtype: object

In [12]:
print( se_3.reindex(range(6), method='ffill', limit=1))


0      blue
1      blue
2       NaN
3    purple
4    yellow
5    yellow
dtype: object


In [13]:
print(df_4)
print(df_4.reindex(['Ali','Mostafa','Hesam','Vahid']))
print(df_4.reindex( columns= ['GPA','Age','books']))
print(df_4.reindex(['Mostafa','Ali','Vahid']))

         Age  GPA  books
Ali       33  3.9    120
Mostafa   32  4.0    100
Vahid     34  3.8     90
          Age  GPA  books
Ali      33.0  3.9  120.0
Mostafa  32.0  4.0  100.0
Hesam     NaN  NaN    NaN
Vahid    34.0  3.8   90.0
         GPA  Age  books
Ali      3.9   33    120
Mostafa  4.0   32    100
Vahid    3.8   34     90
         Age  GPA  books
Mostafa   32  4.0    100
Ali       33  3.9    120
Vahid     34  3.8     90


#### Arithmetic

In [19]:
df_5 = pd.DataFrame( np.random.uniform( 0 , 10 , [3 , 4] ),
                    columns=['a','b','c','d'],
                    index=['x0','x1','x2'])
df_6 = pd.DataFrame( np.random.uniform( 0 , 10 , [3 , 5] ),
                    columns=['a','b','c','d','e'],
                    index=['x0','x1','x2'])

In [20]:
print(df_5)
print(df_6)
print(df_5 + df_6)
print(df_5.add(df_6, fill_value=0) )  #### try sub, div, mul

           a         b         c         d
x0  1.640431  8.540041  3.737812  6.150799
x1  7.332454  2.786515  5.568070  9.785020
x2  5.533822  0.286955  1.017410  9.622851
           a         b         c         d         e
x0  2.557162  5.031338  9.814089  8.435638  3.491319
x1  9.195587  2.834366  5.805208  6.638963  0.552551
x2  9.220053  3.087629  3.326036  6.825141  5.603103
            a          b          c          d   e
x0   4.197593  13.571379  13.551900  14.586437 NaN
x1  16.528041   5.620881  11.373277  16.423983 NaN
x2  14.753874   3.374584   4.343446  16.447992 NaN
            a          b          c          d         e
x0   4.197593  13.571379  13.551900  14.586437  3.491319
x1  16.528041   5.620881  11.373277  16.423983  0.552551
x2  14.753874   3.374584   4.343446  16.447992  5.603103


#### Operations between DataFrame and Series

In [46]:
df_7 = pd.DataFrame( np.array(range(9)).reshape(3,3) ,
                    columns=['a','b','c'])
se_4 = pd.Series([1, 2, 3], index=['a','b','c'])
se_5 = pd.Series([1, 2, 3], index=[0,1,2])

print(df_7)
print(se_4)
print(se_5)

print( df_7.sub( se_5 , axis=0) )
print( df_7.sub( se_4 , axis=1) )

   a  b  c
0  0  1  2
1  3  4  5
2  6  7  8
a    1
b    2
c    3
dtype: int64
0    1
1    2
2    3
dtype: int64
   a  b  c
0 -1  0  1
1  1  2  3
2  3  4  5
   a  b  c
0 -1 -1 -1
1  2  2  2
2  5  5  5


#### Function application and mapping

In [3]:
def myfunc(x,y):
    return [x/y , x+1]

In [4]:
# you can write it simppler but just to show you how to do it
df_1[["var1", "var2"]]=df_1[["books","Age"]].apply(lambda x: myfunc(*x), axis=1)

In [5]:
df_1

Unnamed: 0,Age,GPA,books,var1,var2
Ali,33,3.9,120,3.636364,121.0
Mostafa,32,4.0,100,3.125,101.0
Vahid,34,3.8,90,2.647059,91.0
