In [None]:
![]()

In [2]:
import pandas as pd
import numpy as np

## Object Creation

Creating a Series by passing a list of values, letting pandas create a default integer index:

### Series

In [5]:
s = pd.Series([1,3,5,np.nan,6,8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [13]:
s.head()

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
dtype: float64

In [14]:
s.describe()

count    5.000000
mean     4.600000
std      2.701851
min      1.000000
25%      3.000000
50%      5.000000
75%      6.000000
max      8.000000
dtype: float64

In [23]:
print(s[0],s[1],s[5])

1.0 3.0 8.0


In [None]:
ser = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd'])
print(ser)

In [15]:
# Get one element
s['b']

KeyError: 'b'

In [None]:
s[~(s > 1)]

In [None]:
# Series s where value is not >1 s where value is <-1 or >2
s[(s < -1) | (s > 2)]

In [None]:
# Set index a of Series s to 6
s['a'] = 6
print(s)

### Dropping

Drop values from rows (axis=0)

In [None]:
s.drop(['a', 'c'])

In [None]:
s3 = pd.Series([7, -2, 3], index=['a', 'c', 'd'])
print(s3)

#### Arithmetic Operations with Fill Methods

In [None]:
print(s,'\n',s3)
s + s3

In [None]:
Add =s.add(s3, fill_value=0)
sub = s.sub(s3, fill_value=2)
div = s.div(s3, fill_value=4)
mul = s.mul(s3, fill_value=3)
print(Add,sub,mul,div)

## DataFrame 

#### Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:

In [None]:
dates = pd.date_range('20180505', periods=6)
print(dates)

In [None]:
f = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
print(f)

In [None]:
df2 = pd.DataFrame({ 'A' : 1.,
                        'B' : pd.Timestamp('20180505'),
                        'C' : pd.Series(12,index=list(range(4)),dtype='float32'),
                        'D' : np.array([3] * 4,dtype='int32'),
                        'E' : pd.Categorical(["test","train","test","train"]),
                        'F' : 'foo' })
print(df2)

In [None]:
df2.dtypes
#np.dtype(df2)

Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [None]:
data = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasília'],
'Population': [11190846, 1303171035, 207847528]}
print(data)

In [None]:
df = pd.DataFrame(data,columns=['Country', 'Capital', 'Population'])
print(df)

In [None]:
# Selection

# Get subset of a DataFrame 
df[1:]

In [None]:
# By Position
d = df.iloc[[0],[0]] # Select single value by row &column
print(d)
df.iat[0,0]

Select single value by row & column labels

In [None]:
# By Label
df.loc[[0], ['Country']]

In [None]:
df.at[0,'Country']

Select single row of subset of rows

In [None]:
# By Label/Position
df.ix[2]

In [None]:
df.ix[:,'Capital'] #Select a single column ofsubset of columns

In [None]:
df.ix[1,'Capital'] #Select rows and columns

### Boolean Indexing

In [None]:
df[df['Population']>1200000000]

### Dropping Data Frame

In [None]:
df.drop('Country', axis=1) # Drop values from columns(axis=1)

In [None]:
df

In [None]:
df.sort_index() #Sort by labels along an axis

In [None]:
df.sort_values(by='Country') #Sort by the values along an axis

In [None]:
df.rank() #Assign ranks to entries

##### Retrieving Series/DataFrame Information

In [None]:
df.shape #(rows,columns)
df.index #Describe index
df.columns #Describe DataFrame columns
df.info() #Info on DataFrame
df.count() #Number of non-NA values

In [None]:
print(df.sum()) #Sum of values
print(df.cumsum()) #Cummulative sum of values
print(df.min()) #Minimum
print(df.max()) #maximum values
#df.idxmin()#Minimum
#df.idxmax() #Maximum index value
print(df.describe()) #Summary statistics
print(df.mean()) #Mean of values
print(df.median()) #Median of values

### Applying Functions

In [None]:
f = lambda x: x*2
df.apply(f) #Apply function

In [None]:
df.applymap(f) #Apply function element-wise