# # pandas.Series( data, index, dtype, copy)

###  1.) Create an Empty Series

In [2]:
import pandas as pd
s = pd.Series()
s

Series([], dtype: float64)

### 2.) Create a Series from ndarray

In [3]:
import pandas as pd
import numpy as np

data = np.array(['one','two','three','four'])
s = pd.Series(data)
s

0      one
1      two
2    three
3     four
dtype: object

In [4]:
# change index
s = pd.Series(data, index = [10,11,12,13])
s

10      one
11      two
12    three
13     four
dtype: object

### 3.) Create a Series from dict

In [5]:
da = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(da)
s

a    0.0
b    1.0
c    2.0
dtype: float64

In [7]:
# change index
s = pd.Series(da,index=['a' ,'b','c','d'])
s

a    0.0
b    1.0
c    2.0
d    NaN
dtype: float64

### 4.) Create a Series from Scalar

In [8]:
s = pd.Series(10, index=['a','b','c','d'])
s

a    10
b    10
c    10
d    10
dtype: int64

### 5.) Accessing Data from Series with Position

In [9]:
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [10]:
s[0]

1

In [11]:
s[3:]

d    4
e    5
dtype: int64

In [13]:
s[:3]

a    1
b    2
c    3
dtype: int64

In [14]:
s['a']

1

In [15]:
s[['b','c','e']]

b    2
c    3
e    5
dtype: int64

# #pandas.DataFrame( data, index, columns, dtype, copy)

In [17]:
import pandas as pd
df = pd.DataFrame()
df

### 1.) Create a DataFrame from Lists

In [20]:
data = [1,2,3,4,5,6,7]
df = pd.DataFrame(data)
df.head()

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


In [21]:
data = [['Santosh',25],['Ashok',24],['Ashutosh',22],['Ajay',18]]
df = pd.DataFrame(data,columns=['Name','Age'])
df

Unnamed: 0,Name,Age
0,Santosh,25
1,Ashok,24
2,Ashutosh,22
3,Ajay,18


In [22]:
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
df

Unnamed: 0,Name,Age
0,Santosh,25.0
1,Ashok,24.0
2,Ashutosh,22.0
3,Ajay,18.0


### 2.) Create a DataFrame from Dict of ndarrays / Lists

In [23]:
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Tom,28
1,Jack,34
2,Steve,29
3,Ricky,42


In [24]:
df = pd.DataFrame(data,index=['Rank1','Rank2','Rank3','Rank4'])
df

Unnamed: 0,Name,Age
Rank1,Tom,28
Rank2,Jack,34
Rank3,Steve,29
Rank4,Ricky,42


### 3.) Create a DataFrame from List of Dicts

In [25]:
data = [{'a':1,'b':2,'c':3}, {'a':1,'b':2,}, {'a':4,'b':6,'c':8}]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,2,3.0
1,1,2,
2,4,6,8.0


In [26]:
df = pd.DataFrame(data,index=['first', 'second', 'third'])
df

Unnamed: 0,a,b,c
first,1,2,3.0
second,1,2,
third,4,6,8.0


In [27]:
df1 = pd.DataFrame(data,index=['first','second', 'third'],columns=['a', 'b', 'c'])
df2 = pd.DataFrame(data,index=['first','second', 'third'],columns=['a', 'b1', 'c'])
df1

Unnamed: 0,a,b,c
first,1,2,3.0
second,1,2,
third,4,6,8.0


In [28]:
df2

Unnamed: 0,a,b1,c
first,1,,3.0
second,1,,
third,4,,8.0


### 4.) Create a DataFrame from Dict of Series

In [29]:
data = {'one': pd.Series([1,2,3],index=['a','b','c']),
        'two': pd.Series([1,2,3,4],index=['a','b','c','d'])
       }
df = pd.DataFrame(data)
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


### 5.) Column Selection

In [30]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [31]:
df['two']

a    1
b    2
c    3
d    4
Name: two, dtype: int64

### 6.) Column Addition

In [32]:
df['three']=pd.Series([10,20,30],index=['a','b','c'])
df

Unnamed: 0,one,two,three
a,1.0,1,10.0
b,2.0,2,20.0
c,3.0,3,30.0
d,,4,


In [33]:
df['four']=df['one']+df['three']
df

Unnamed: 0,one,two,three,four
a,1.0,1,10.0,11.0
b,2.0,2,20.0,22.0
c,3.0,3,30.0,33.0
d,,4,,


### 7.) Column Deletion

In [34]:
del df['one']
df

Unnamed: 0,two,three,four
a,1,10.0,11.0
b,2,20.0,22.0
c,3,30.0,33.0
d,4,,


In [35]:
df.pop('two')
df

Unnamed: 0,three,four
a,10.0,11.0
b,20.0,22.0
c,30.0,33.0
d,,


### 8.) Row Selection, Addition, and Deletion

In [37]:
df.loc['b']

three    20.0
four     22.0
Name: b, dtype: float64

In [38]:
df.iloc[2]

three    30.0
four     33.0
Name: c, dtype: float64

In [39]:
df[2:4]

Unnamed: 0,three,four
c,30.0,33.0
d,,


In [41]:
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['three','four'],index=['e','f'])
df = df.append(df2)
df

Unnamed: 0,three,four
a,10.0,11.0
b,20.0,22.0
c,30.0,33.0
d,,
e,5.0,6.0
f,7.0,8.0
e,5.0,6.0
f,7.0,8.0


In [42]:
df.drop(['e','f'])

Unnamed: 0,three,four
a,10.0,11.0
b,20.0,22.0
c,30.0,33.0
d,,


# # pandas.Panel(data, items, major_axis, minor_axis, dtype, copy)¶

In [43]:
import pandas as pd
import numpy as np

data = np.random.rand(2,4,5)
df = pd.Panel(data)
df

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  exec(code_obj, self.user_global_ns, self.user_ns)


<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 4 (major_axis) x 5 (minor_axis)
Items axis: 0 to 1
Major_axis axis: 0 to 3
Minor_axis axis: 0 to 4

In [44]:
data = {'item1': pd.DataFrame(np.random.rand(4,3)),
        'item2': pd.DataFrame(np.random.rand(4,2))
       }
df = pd.Panel(data)
df

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 4 (major_axis) x 3 (minor_axis)
Items axis: item1 to item2
Major_axis axis: 0 to 3
Minor_axis axis: 0 to 2

### 1.) Selecting the Data from Panel

In [45]:
df = pd.Panel(data)
df['item1']

Unnamed: 0,0,1,2
0,0.702505,0.067989,0.901406
1,0.77069,0.17796,0.711485
2,0.53812,0.322863,0.091437
3,0.981836,0.583779,0.658607


### 1.) Using major_axis

In [46]:
df.major_xs(1)

Unnamed: 0,item1,item2
0,0.77069,0.674357
1,0.17796,0.213833
2,0.711485,


### 2.) Using minor_axis

In [47]:
df.minor_xs(1)

Unnamed: 0,item1,item2
0,0.067989,0.043567
1,0.17796,0.213833
2,0.322863,0.588507
3,0.583779,0.420292


# # Pandas Basic funcionality

In [49]:
import pandas as pd
import numpy as np

d = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
   'Age':pd.Series([25,26,25,23,30,29,23]),
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}

df = pd.DataFrame(d)
df

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2
5,Smith,29,4.6
6,Jack,23,3.8


### 1.) T (Transpose)

In [50]:
df.T

Unnamed: 0,0,1,2,3,4,5,6
Name,Tom,James,Ricky,Vin,Steve,Smith,Jack
Age,25,26,25,23,30,29,23
Rating,4.23,3.24,3.98,2.56,3.2,4.6,3.8


### 2.) axes

In [51]:
df.axes

[RangeIndex(start=0, stop=7, step=1),
 Index(['Name', 'Age', 'Rating'], dtype='object')]

### 3.) dtypes

In [52]:
df.dtypes

Name       object
Age         int64
Rating    float64
dtype: object

### 4.) empty -return objects are empty are not

In [53]:
df.empty

False

### 5.) ndim -return no. of dimensions of object

In [54]:
df.ndim

2

### 6.) shape - return tuple(row *column)

In [55]:
df.shape

(7, 3)

### 7.) size - return no. of elements

In [56]:
df.size

21

### 8.) values - return ndarray

In [57]:
df.values

array([['Tom', 25, 4.23],
       ['James', 26, 3.24],
       ['Ricky', 25, 3.98],
       ['Vin', 23, 2.56],
       ['Steve', 30, 3.2],
       ['Smith', 29, 4.6],
       ['Jack', 23, 3.8]], dtype=object)

### 9.) head() and tail()

In [58]:
df.head()

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2


In [59]:
df.tail()

Unnamed: 0,Name,Age,Rating
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2
5,Smith,29,4.6
6,Jack,23,3.8


#  # Pandas Statistics

In [60]:
import pandas as pd

d = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack',
   'Lee','David','Gasper','Betina','Andres']),
   'Age':pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])}

df = pd.DataFrame(d)
df


Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2
5,Smith,29,4.6
6,Jack,23,3.8
7,Lee,34,3.78
8,David,40,2.98
9,Gasper,30,4.8


### 1.) sum()

In [61]:
df.sum()

Name      TomJamesRickyVinSteveSmithJackLeeDavidGasperBe...
Age                                                     382
Rating                                                44.92
dtype: object

In [62]:
df.sum(axis=1)

0     29.23
1     29.24
2     28.98
3     25.56
4     33.20
5     33.60
6     26.80
7     37.78
8     42.98
9     34.80
10    55.10
11    49.65
dtype: float64

In [63]:
df.sum(axis=0)

Name      TomJamesRickyVinSteveSmithJackLeeDavidGasperBe...
Age                                                     382
Rating                                                44.92
dtype: object

### 2.) mean()

In [64]:
df.mean()

Age       31.833333
Rating     3.743333
dtype: float64

In [65]:
df.mean(axis=1).head()

0    14.615
1    14.620
2    14.490
3    12.780
4    16.600
dtype: float64

### 3.) std()

In [66]:
df.std()

Age       9.232682
Rating    0.661628
dtype: float64

In [67]:
df.std(axis=1).head()

0    14.686608
1    16.093750
2    14.863385
3    14.453263
4    18.950462
dtype: float64

### 4.) median()

In [68]:
df.median()

Age       29.50
Rating     3.79
dtype: float64

In [69]:
df.median(axis=1).head()

0    14.615
1    14.620
2    14.490
3    12.780
4    16.600
dtype: float64

### 5.) mode()¶


In [70]:
df.mode().head()

Unnamed: 0,Name,Age,Rating
0,Andres,23.0,2.56
1,Betina,25.0,2.98
2,David,30.0,3.2
3,Gasper,,3.24
4,Jack,,3.65


### 6.) count() - return no. of non null observation


In [71]:
df.count()

Name      12
Age       12
Rating    12
dtype: int64

In [73]:
df.count(axis=1).head()

0    3
1    3
2    3
3    3
4    3
dtype: int64

### 7.) min() and max()

In [74]:
df.min()

Name      Andres
Age           23
Rating      2.56
dtype: object

In [75]:
df.max()

Name      Vin
Age        51
Rating    4.8
dtype: object

### 8.) cumsum() - comlative sum

In [79]:
df.cumsum()

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,TomJames,51,7.47
2,TomJamesRicky,76,11.45
3,TomJamesRickyVin,99,14.01
4,TomJamesRickyVinSteve,129,17.21
5,TomJamesRickyVinSteveSmith,158,21.81
6,TomJamesRickyVinSteveSmithJack,181,25.61
7,TomJamesRickyVinSteveSmithJackLee,215,29.39
8,TomJamesRickyVinSteveSmithJackLeeDavid,255,32.37
9,TomJamesRickyVinSteveSmithJackLeeDavidGasper,285,37.17


### 9.) cumprod() and abs() - throw exception when DataFrame contain character or string

# # Pandas function

### 1.) table wise function - pipe()

In [80]:
import pandas as pd
import numpy as np

def adder(ele1,ele2):
    return ele1+ele2
df = pd.DataFrame(np.random.rand(5,3), columns=['a','b','c'])
df

Unnamed: 0,a,b,c
0,0.34391,0.871503,0.89832
1,0.356442,0.784137,0.640839
2,0.9972,0.457714,0.543184
3,0.176084,0.061077,0.56852
4,0.708682,0.211544,0.593299


In [81]:
df.pipe(adder,2)

Unnamed: 0,a,b,c
0,2.34391,2.871503,2.89832
1,2.356442,2.784137,2.640839
2,2.9972,2.457714,2.543184
3,2.176084,2.061077,2.56852
4,2.708682,2.211544,2.593299


### 2.) Row or Column Wise Function Application - apply()

In [82]:
df.apply(np.mean)

a    0.516464
b    0.477195
c    0.648833
dtype: float64

In [83]:
df.apply(np.mean, axis = 1)

0    0.704578
1    0.593806
2    0.666033
3    0.268560
4    0.504508
dtype: float64

In [84]:
df.apply(lambda x: x.max() - x.min())

a    0.821115
b    0.810427
c    0.355136
dtype: float64

### 3.) Element Wise Function Application - applymap()

In [85]:
df

Unnamed: 0,a,b,c
0,0.34391,0.871503,0.89832
1,0.356442,0.784137,0.640839
2,0.9972,0.457714,0.543184
3,0.176084,0.061077,0.56852
4,0.708682,0.211544,0.593299


In [86]:
df['a'].map(lambda x: x*100)

0    34.390969
1    35.644246
2    99.719981
3    17.608439
4    70.868196
Name: a, dtype: float64