# Pandas

In [1]:
#import pandas & numpy

import pandas as pd
import numpy as np

In [6]:
df=pd.DataFrame(np.arange(1,21).reshape(5,4), index=['r1','r2','r3','r4','r5'],columns=['c1','c2','c3','c4'])

In [7]:
df

Unnamed: 0,c1,c2,c3,c4
r1,1,2,3,4
r2,5,6,7,8
r3,9,10,11,12
r4,13,14,15,16
r5,17,18,19,20


# loc & iloc(index-location)
## used to extract the rows and column values

In [8]:
#get dat by rowname
df.loc['r1']

c1    1
c2    2
c3    3
c4    4
Name: r1, dtype: int32

In [17]:
#type it will be series as it's only one row

type(df.loc['r1'])

pandas.core.series.Series

In [13]:
#iloc

#get data based on index 
df.iloc[2:4,2:]

Unnamed: 0,c3,c4
r3,11,12
r4,15,16


# Difference between Dataframe & Series

##### when there is only one row or only one column then its data series otherwise even if  one row and one column are present together it becomes dataframe 

In [16]:
df.iloc[0:1,0:1]

Unnamed: 0,c1
r1,1


In [18]:
type(df.iloc[0:1,0:1])

pandas.core.frame.DataFrame

In [19]:
df.iloc[0:1,0]

r1    1
Name: c1, dtype: int32

In [20]:
type(df.iloc[0:1,0])

pandas.core.series.Series

# Convert dataframe into arrays


In [27]:
arr= df.iloc[:4,:3].values

In [28]:
arr


array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11],
       [13, 14, 15]])

# Count null values

In [30]:
df.isnull().sum()

c1    0
c2    0
c3    0
c4    0
dtype: int64

# Count occurence of item based on column names

In [34]:
df['c1'].value_counts()

9     1
1     1
13    1
17    1
5     1
Name: c1, dtype: int64

# Check for unique values


In [36]:
df['c2'].unique()

array([ 2,  6, 10, 14, 18])

# Handling CSV Files


In [40]:
from io import StringIO, BytesIO

In [41]:
#read csv data

data=('col1,col2,col3\n'
            'x,y,1\n'
            'a,b,2\n'
            'c,d,3')

In [42]:
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,x,y,1
1,a,b,2
2,c,d,3


#### usecols just to read particular column4

In [46]:

df=pd.read_csv(StringIO(data),usecols=['col2','col3'])

In [47]:
df

Unnamed: 0,col2,col3
0,y,1
1,b,2
2,d,3


#### specify datatpes of columns

In [56]:
data = ('a,b,c,d\n'
            '1,2,3,4\n'
            '5,6,7,8\n'
            '9,10,11,12')
print(data)


a,b,c,d
1,2,3,4
5,6,7,8
9,10,11,12


In [57]:
df=pd.read_csv(StringIO(data),dtype=object)

In [58]:
df

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12


In [59]:
df['b'][1]

'6'

In [60]:
df=pd.read_csv(StringIO(data),dtype={'b':int,'c':float,'a':'Int64'})

In [61]:
df


Unnamed: 0,a,b,c,d
0,1,2,3.0,4
1,5,6,7.0,8
2,9,10,11.0,12


In [55]:
df['b'][1]

6

#### Check all the datatypes

In [64]:
df.dtypes

a      Int64
b      int32
c    float64
d      int64
dtype: object

#### Handling index

In [67]:
data = ('index,a,b,c\n'
           '4,apple,bat,5.7\n'
            '8,orange,cow,10')

In [66]:
pd.read_csv(StringIO(data))

Unnamed: 0,index,a,b,c
0,4,apple,bat,5.7
1,8,orange,cow,10.0


In [68]:
pd.read_csv(StringIO(data),index_col=0)

Unnamed: 0_level_0,a,b,c
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,apple,bat,5.7
8,orange,cow,10.0


In [73]:
data = ('a,b,c\n'
           '4,apple,bat,\n'
            '4,orange,cow,')


In [74]:
pd.read_csv(StringIO(data))

Unnamed: 0,a,b,c
4,apple,bat,
4,orange,cow,


In [76]:
pd.read_csv(StringIO(data),index_col=False)

Unnamed: 0,a,b,c
0,4,apple,bat
1,4,orange,cow
