# Pandas Basics

## Intro To Pandas

In [2]:
import pandas as pd

In [5]:
print(pd.__version__)

2.1.4


In [11]:
A = pd.Series([2,3,4,5], index = ['a','b','c','d'])
A

a    2
b    3
c    4
d    5
dtype: int64

In [13]:
A.values

array([2, 3, 4, 5], dtype=int64)

In [15]:
## notice its a Numpy array for the series! That is because pandas is built on top of Numpy

type(A.values)

numpy.ndarray

In [17]:
type(A)

pandas.core.series.Series

In [19]:
A.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [23]:
## You can index the data of a series

A['a']

2

In [25]:
## We can also slice!

A['a':'c']

a    2
b    3
c    4
dtype: int64

## Pandas: Series

In [32]:
## We can also create a series using a dictonary!

grades_dict = {'A':4,'B':3,'C':2,'D':1}
grades = pd.Series(grades_dict)
grades

A    4
B    3
C    2
D    1
dtype: int64

In [34]:
grades.values

array([4, 3, 2, 1], dtype=int64)

In [36]:
grades.index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [38]:
marks_dict = {'A':85,'B':75,'C':65,'D':55}
marks = pd.Series(marks_dict)
marks

A    85
B    75
C    65
D    55
dtype: int64

In [40]:
marks.values

array([85, 75, 65, 55], dtype=int64)

In [42]:
marks.index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [44]:
marks['A']

85

In [46]:
marks[0:2]

A    85
B    75
dtype: int64

## Pandas: Dataframe

In [49]:
rs = pd.DataFrame({'Marks':marks,'Grades':grades})
rs

Unnamed: 0,Marks,Grades
A,85,4
B,75,3
C,65,2
D,55,1


In [53]:
rs.T

Unnamed: 0,A,B,C,D
Marks,85,75,65,55
Grades,4,3,2,1


In [55]:
rs.values

array([[85,  4],
       [75,  3],
       [65,  2],
       [55,  1]], dtype=int64)

In [57]:
rs.index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [59]:
rs.columns

Index(['Marks', 'Grades'], dtype='object')

In [61]:
rs.values[2,0]

65

In [69]:
rs['ScaledMarks'] = 100*(rs['Marks']/90)
rs

Unnamed: 0,Marks,Grades,ScaledMarks
A,85,4,94.444444
B,75,3,83.333333
C,65,2,72.222222
D,55,1,61.111111


In [71]:
del rs['ScaledMarks']
rs

Unnamed: 0,Marks,Grades
A,85,4
B,75,3
C,65,2
D,55,1


In [73]:
G = rs[rs['Marks']>70]
G

Unnamed: 0,Marks,Grades
A,85,4
B,75,3


## Pandas: Missing Values

In [79]:
data = pd.DataFrame([{'a':1,'b':2},{'b':3,'d':4}])
data

Unnamed: 0,a,b,d
0,1.0,2,
1,,3,4.0


In [83]:
data.fillna(0)

Unnamed: 0,a,b,d
0,1.0,2,0.0
1,0.0,3,4.0


In [90]:
## Drop all rows with missing values

data.dropna()

Unnamed: 0,a,b,d


## Pandas: Indexing (loc & iloc)

### loc = explicit index = data.loc['a':'b']
### iloc =  implicit index = data.iloc[0:2]

In [99]:
data = pd.DataFrame([1,3,5],index = ['a','b','c'])
data

Unnamed: 0,0
a,1
b,3
c,5


In [109]:
data.loc['a':'b']

Unnamed: 0,0
a,1
b,3


In [113]:
data.iloc[0:2]

Unnamed: 0,0
a,1
b,3


In [116]:
rs

Unnamed: 0,Marks,Grades
A,85,4
B,75,3
C,65,2
D,55,1


In [118]:
rs.iloc[2,:]

Marks     65
Grades     2
Name: C, dtype: int64

In [120]:
rs.loc['B':'C']

Unnamed: 0,Marks,Grades
B,75,3
C,65,2


In [122]:
## Reverse the dataframe!

rs.iloc[::-1,:]

Unnamed: 0,Marks,Grades
D,55,1
C,65,2
B,75,3
A,85,4
