

<p align="center">
    <img src="http://raghudathesh.weebly.com/uploads/4/8/9/6/48968251/10_orig.png">
</p>

2D ndarray creation

In [None]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
data = np.array([[2,4], [5,6]])
df = pd.DataFrame(data, columns=['A', 'B'])
df

Unnamed: 0,A,B
0,2,4
1,5,6


Dictionary of arrays, lists and tuples


In [None]:
data = {
    'A': [1, 2, 3],
    'B': (4, 5, 6)
}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


NumPy structured/record array

In [None]:
data = np.array([(1, 2.4), (4, 5.6)], dtype=[('x', 'i4'), ('y', 'f4')])
df = pd.DataFrame(data)
df

Unnamed: 0,x,y
0,1,2.4
1,4,5.6


Dictionary of Series

In [None]:
s1 = pd.Series([1,2,3], index=['a', 'b', 'c'])
s2 = pd.Series([4,5], index=['a','b'])

data = { 'ohia': s1, 'Nevada': s2}
df = pd.DataFrame(data)
df

Unnamed: 0,ohia,Nevada
a,1,4.0
b,2,5.0
c,3,


Dictionary of Dictionary

In [None]:
data = {
    'ohia':{'a': 1, 'b': 2},
    'Nevada':{'a': 4, 'b':5}
}
df = DataFrame(data)
df

Unnamed: 0,ohia,Nevada
a,1,4
b,2,5


List od dictionaries

In [None]:
data = [{'A': 3, 'B':4}, {'A': 2, 'B':1}]
df=pd.DataFrame(data)
df

Unnamed: 0,A,B
0,3,4
1,2,1


List of lists

In [None]:
data = [[1,2],[3,4]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,1,2
1,3,4


List of tuples

In [None]:
data = [(1,2),(3,4)]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,1,2
1,3,4


Another Dataframe - Indexes and data are copied unless explicitly changed.

In [None]:
df = pd.DataFrame({'A': [1,2], 'B': [3,4]})
df_new = pd.DataFrame(df)
df_new

Unnamed: 0,A,B
0,1,3
1,2,4




<p align="center">
    <img src="http://raghudathesh.weebly.com/uploads/4/8/9/6/48968251/11_orig.png">
</p>



**Index Objects**


In [None]:
import pandas as pd

idx = pd.Index([0,1,2,3,4,5])
idx

Index([0, 1, 2, 3, 4, 5], dtype='int64')

append() method


In [None]:
idx2 = pd.Index([6,7])
appended = idx.append(idx2)
appended

Index([0, 1, 2, 3, 4, 5, 6, 7], dtype='int64')

difference() method

In [None]:
idx2 = pd.Index([8,7,1])
difference_idx = idx.difference(idx2)
difference_idx

Index([0, 2, 3, 4, 5], dtype='int64')

intersection() method

In [None]:
inter = idx.intersection(idx2)
inter

Index([1], dtype='int64')

union() method

In [None]:
union_idx = idx.union(idx2)
union_idx

Index([0, 1, 2, 3, 4, 5, 7, 8], dtype='int64')

isin() method

In [None]:
isin_mtd = idx.isin(idx2)
isin_mtd

array([False,  True, False, False, False, False])

delete() method

In [None]:
deleted = idx.delete(2)
print(deleted)

Index([0, 1, 3, 4, 5], dtype='int64')


drop(values)

In [None]:
dropped = idx.drop([3,4])
print(dropped)

Index([0, 1, 2, 5], dtype='int64')


insert(i, value)

In [None]:
Inserted = idx.insert(2, 30)
print(Inserted)

Index([0, 1, 30, 2, 3, 4, 5], dtype='int64')


is_monotonic - Check if index is sorted in increasing order

In [None]:
Monotonic = idx.is_monotonic_increasing
print(Monotonic)

True


is_unique - Return only unique elements

In [None]:
idx_duplicates = pd.Index([10, 20, 20, 30])
print("Unique values:", idx_duplicates.unique())


Unique values: Index([10, 20, 30], dtype='int64')





<p align="center">
    <img src="http://raghudathesh.weebly.com/uploads/4/8/9/6/48968251/12_orig.png">
</p>

In [None]:
import pandas as pd

idx_obj = pd.Series(['blue', 'yellow', 'red'], index=[0,2,4])
idx_obj

Unnamed: 0,0
0,blue
2,yellow
4,red


New Index - reindexing

In [None]:
new_obj = idx_obj.reindex([1,2,3,4,5])
new_obj

Unnamed: 0,0
1,
2,yellow
3,
4,red
5,


ffill method - This fill forwards


In [None]:
ffilled = new_obj.ffill()
ffilled

Unnamed: 0,0
1,
2,yellow
3,yellow
4,red
5,red


bfill method - This fills backward

In [None]:
bfilled = new_obj.bfill()
bfilled

Unnamed: 0,0
1,yellow
2,yellow
3,red
4,red
5,


fill_value


In [None]:
filled = idx_obj.reindex([0, 1, 2, 3, 4], fill_value='missing')
print(filled)

0       blue
1    missing
2     yellow
3    missing
4        red
dtype: object


limit - Maximum number of elements to fill when using method.

Only 1 consecutive missing value is filled.
Others remain NaN.

In [None]:
limited_fill = idx_obj.reindex([0, 1, 2, 3, 4], method='ffill', limit=1)
print(limited_fill)


0      blue
1      blue
2    yellow
3    yellow
4       red
dtype: object


tolerance

Maximum distance allowed to match labels

Values are filled only if within 0.1 distance of existing index values.
Otherwise, NaN.


In [None]:
obj2 = pd.Series([10, 20, 30], index=[1.0, 2.0, 3.0])

reindexed = obj2.reindex([1.0, 1.05, 2.1], method='nearest', tolerance=0.1)
print(reindexed)


1.00    10.0
1.05    10.0
2.10     NaN
dtype: float64


MultiIndex

In [None]:
new_obj1 = idx_obj.reindex([0, 2, 4], copy=False)
print(new_obj1)
print("Is same object:", new_obj1 is idx_obj)


0      blue
2    yellow
4       red
dtype: object
Is same object: False



<p align="center">
    <img src="https://miro.medium.com/v2/resize:fit:1100/format:webp/1*CgAWzayEQY8PQuMpRkSGfQ.png">
</p>



In [None]:
import pandas as pd

df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'charlie', 'David', 'eve'],
    'Score': [85, 30, 45, 56, 80]
}, index=['a', 'b', 'c', 'd', 'e'])

df

Unnamed: 0,Name,Score
a,Alice,85
b,Bob,30
c,charlie,45
d,David,56
e,eve,80


.loc[] - Select using label

In [None]:
df.loc['b']

Unnamed: 0,b
Name,Bob
Score,30


.iloc[] - using position

In [None]:
df.iloc[1]

Unnamed: 0,b
Name,Bob
Score,30


.loc - List of labels

In [None]:
df.loc[['a', 'e']]

Unnamed: 0,Name,Score
a,Alice,85
e,eve,80


.iloc - List of positions

In [None]:
df.iloc[[1,4]]

Unnamed: 0,Name,Score
b,Bob,30
e,eve,80


Slicing
.loc[]: Inclusive on both ends



In [None]:
df.loc['b':'d']

Unnamed: 0,Name,Score
b,Bob,30
c,charlie,45
d,David,56


.iloc[]: Exclusive on end

In [None]:
df.iloc[1:4]

Unnamed: 0,Name,Score
b,Bob,30
c,charlie,45
d,David,56


Conditions
.loc[]: Condition on DataFrame

In [None]:
df.loc[df['Score'] > 60]

Unnamed: 0,Name,Score
a,Alice,85
e,eve,80


.iloc[]: Boolean list

In [None]:
condition = [False, True, False, True, False]
df.iloc[condition]

Unnamed: 0,Name,Score
b,Bob,30
d,David,56


Callable Functions

.loc[]: With lambda

In [None]:
df.loc[lambda x:x['Score'] > 80]

Unnamed: 0,Name,Score
a,Alice,85


.iloc[]: With lambda

In [None]:
df.iloc[lambda x: [1,4]]

Unnamed: 0,Name,Score
b,Bob,30
e,eve,80




<p align="center">
    <img src="http://raghudathesh.weebly.com/uploads/4/8/9/6/48968251/13_orig.png">
</p>

df.iloc[:, where]

In [None]:
df.iloc[:, 1]

Unnamed: 0,Score
a,85
b,30
c,45
d,56
e,80


df.iloc[where_i, where_j]

In [None]:
df.iloc[1, 0]

'Bob'

df.at[label_i, label_j]

In [None]:
df.at['a', 'Score']

np.int64(85)

df.iat[i, j]
Select a single value using integer position

In [None]:
df.iat[1, 1]

np.int64(30)

reindex method
Reorder or insert/remove rows or columns by label


In [None]:
df.reindex(['c', 'b', 'a', 'e', 's'])

Unnamed: 0,Name,Score
c,charlie,45.0
b,Bob,30.0
a,Alice,85.0
e,eve,80.0
s,,


**Series and DataFrame methods for arithmetic.**



<p align="center">
    <img src="http://raghudathesh.weebly.com/uploads/4/8/9/6/48968251/14_orig.png">
</p>



add()

In [4]:
import pandas as pd

s1 = pd.Series([1,2,3])
s2 = pd.Series([4,5,6])

add = s1.add(s2)
add

Unnamed: 0,0
0,5
1,7
2,9


radd()

In [2]:
import pandas as pd

s3 = pd.Series([1,2,3])
radd = s3.radd(10)
radd

Unnamed: 0,0
0,11
1,12
2,13


sub()

In [6]:
sub = s1.sub(s2)
sub

Unnamed: 0,0
0,-3
1,-3
2,-3


rsub()

In [7]:
rsubtract = s1.rsub(10)
rsubtract

Unnamed: 0,0
0,9
1,8
2,7


div()

In [8]:
divide = s1.div(s2)
divide

Unnamed: 0,0
0,0.25
1,0.4
2,0.5


floordiv()

In [9]:
floordivision = s1.div(s2)
floordivision

Unnamed: 0,0
0,0.25
1,0.4
2,0.5


mul

In [10]:
multiply = s1.mul(s2)
multiply

Unnamed: 0,0
0,4
1,10
2,18


pow()

In [13]:
power = s1.pow(2)
power

Unnamed: 0,0
0,1
1,4
2,9
