#                                                     Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
labels = ['a', 'b', 'c']
my_data = [10, 20, 30]
arr = np.array(my_data)
d = {
    'a': 10, 
    'b': 20,
    'c':30
}

#### pd.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)   One-dimensional ndarray with axis labels (including time series).

In [8]:
pd.Series(data = my_data, index = labels)

a    10
b    20
c    30
dtype: int64

In [11]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [14]:
# series can  hold any type
pd.Series(data = [print, sum, max])

0    <built-in function print>
1      <built-in function sum>
2      <built-in function max>
dtype: object

In [15]:
ser1 = pd.Series([1, 2, 3, 4], ['USA', 'Germany', 'USSR', 'Japan'])

In [16]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [17]:
ser2 = pd.Series([1, 2, 5, 4], ['USA', 'Germany', 'Italy', 'Japan'])

In [18]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

#### Grab information from series

In [19]:
ser1['USA']

1

In [28]:
# match up based on the index. Nan if there is no match. Index based operation can be done!
ser1 + ser2

Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64

# 

#                                            Data Frames

In [29]:
from numpy.random import randn

In [33]:
# to get same randon numbers even in differenct machines.
np.random.seed(101)

In [34]:
df = pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'],['W', 'X', 'Y', 'Z'])

In [35]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [40]:
# Column wise selection
df[['W', 'X']]

Unnamed: 0,W,X
A,2.70685,0.628133
B,0.651118,-0.319318
C,-2.018168,0.740122
D,0.188695,-0.758872
E,0.190794,1.978757


In [41]:
type(df)

pandas.core.frame.DataFrame

In [42]:
type(df['W'])

pandas.core.series.Series

In [44]:
df['WX'] = df['W'] + df['X']

In [45]:
df

Unnamed: 0,W,X,Y,Z,WX
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [49]:
df.drop('A', axis = 0)

Unnamed: 0,W,X,Y,Z,WX
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [51]:
df.drop('W', axis = 1)

Unnamed: 0,X,Y,Z,WX
A,0.628133,0.907969,0.503826,3.334983
B,-0.319318,-0.848077,0.605965,0.3318
C,0.740122,0.528813,-0.589001,-1.278046
D,-0.758872,-0.933237,0.955057,-0.570177
E,1.978757,2.605967,0.683509,2.169552


In [53]:
df.shape

(5, 5)

In [54]:
# Selecting Row

In [55]:
df.loc['A']

W     2.706850
X     0.628133
Y     0.907969
Z     0.503826
WX    3.334983
Name: A, dtype: float64

In [57]:
df.iloc[[0,1,2]]

Unnamed: 0,W,X,Y,Z,WX
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046


In [58]:
df.loc[['A', 'B'], ['X', 'Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077


# 

In [59]:
df.loc['A']['W']

2.706849839399938

In [60]:
df

Unnamed: 0,W,X,Y,Z,WX
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [63]:
bool_df = df > 0

In [64]:
bool_df

Unnamed: 0,W,X,Y,Z,WX
A,True,True,True,True,True
B,True,False,False,True,True
C,False,True,True,False,False
D,True,False,False,True,False
E,True,True,True,True,True


In [65]:
df[bool_df]

Unnamed: 0,W,X,Y,Z,WX
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,,,0.605965,0.3318
C,,0.740122,0.528813,,
D,0.188695,,,0.955057,
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [70]:
result_df = df[df['W'] > 0]

In [72]:
result_df

Unnamed: 0,W,X,Y,Z,WX
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [71]:
result_df['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [74]:
df[df['W'] > 0]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [81]:
boolser = df['W'] > 0
result = df[boolser]
my_cols = ['X', 'Y']
result[my_cols]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
D,-0.758872,-0.933237
E,1.978757,2.605967


In [83]:
df[df['W'] > 0][['X', 'Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
D,-0.758872,-0.933237
E,1.978757,2.605967


In [93]:
# and cannot be used. | as or.

df[(df['W'] > 0) & (df['X'] > 0)]

Unnamed: 0,W,X,Y,Z,WX
A,2.70685,0.628133,0.907969,0.503826,3.334983
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [94]:
df

Unnamed: 0,W,X,Y,Z,WX
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [95]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z,WX
0,A,2.70685,0.628133,0.907969,0.503826,3.334983
1,B,0.651118,-0.319318,-0.848077,0.605965,0.3318
2,C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
3,D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
4,E,0.190794,1.978757,2.605967,0.683509,2.169552


In [96]:
states = ['MH', 'KL', 'BANG', 'DEL', 'HY']
df['states'] = states

In [97]:
df

Unnamed: 0,W,X,Y,Z,WX,states
A,2.70685,0.628133,0.907969,0.503826,3.334983,MH
B,0.651118,-0.319318,-0.848077,0.605965,0.3318,KL
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046,BANG
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177,DEL
E,0.190794,1.978757,2.605967,0.683509,2.169552,HY


In [98]:
df.set_index('states')

Unnamed: 0_level_0,W,X,Y,Z,WX
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MH,2.70685,0.628133,0.907969,0.503826,3.334983
KL,0.651118,-0.319318,-0.848077,0.605965,0.3318
BANG,-2.018168,0.740122,0.528813,-0.589001,-1.278046
DEL,0.188695,-0.758872,-0.933237,0.955057,-0.570177
HY,0.190794,1.978757,2.605967,0.683509,2.169552
