In [1]:
#implementation for reindexing a Series:
import numpy as np
import pandas as pd

obj = pd.Series(['blue', 'blue', 'purple', 'yellow'], dtype=object)
obj3 = obj.reindex(np.arange(6), method='ffill')

print(obj3)


0      blue
1      blue
2    purple
3    yellow
4    yellow
5    yellow
dtype: object


In [4]:
# implementation for reindexing a DataFrame:
import numpy as np
import pandas as pd

frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
                     index=['a', 'c', 'd'],
                     columns=['Ohio', 'Texas', 'California'])

frame2 = frame.reindex(index=['a', 'b', 'c', 'd'])
states = ['Texas', 'Utah', 'California']
frame3 = frame.reindex(columns=states)

print(frame2)
print(frame3)
print(frame)


   Ohio  Texas  California
a   0.0    1.0         2.0
b   NaN    NaN         NaN
c   3.0    4.0         5.0
d   6.0    7.0         8.0
   Texas  Utah  California
a      1   NaN           2
c      4   NaN           5
d      7   NaN           8
   Ohio  Texas  California
a     0      1           2
c     3      4           5
d     6      7           8


In [5]:
#dropping entries from an axis
import numpy as np
import pandas as pd

obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
obj_drop = obj.drop(['d', 'c'])

data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])

data_drop_rows = data.drop(index=['Colorado', 'Ohio'])
data_drop_columns = data.drop(columns=['two'])

print(new_obj)
print(obj_drop)
print(data_drop_rows)
print(data_drop_columns)


a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64
a    0.0
b    1.0
e    4.0
dtype: float64
          one  two  three  four
Utah        8    9     10    11
New York   12   13     14    15
          one  three  four
Ohio        0      2     3
Colorado    4      6     7
Utah        8     10    11
New York   12     14    15


In [6]:
#dropping entries from an axis
import pandas as pd
import numpy as np

obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
obj_drop = obj.drop(['d', 'c'])

data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])

data_drop_rows = data.drop(index=['Colorado', 'Ohio'])
data_drop_columns = data.drop(columns=['two'])

print(new_obj)
print(obj_drop)
print(data_drop_rows)
print(data_drop_columns)


a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64
a    0.0
b    1.0
e    4.0
dtype: float64
          one  two  three  four
Utah        8    9     10    11
New York   12   13     14    15
          one  three  four
Ohio        0      2     3
Colorado    4      6     7
Utah        8     10    11
New York   12     14    15


In [7]:
#indexing and selection
import numpy as np
import pandas as pd

obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])

obj_indexing_label = obj['b']
obj_indexing_integer = obj[1]
obj_indexing_slice = obj[2:4]
obj_indexing_sequence = obj[['b', 'a', 'd']]
data_column_single = data['two']
data_column_multiple = data[['three', 'one']]
data_boolean_array = data[data['three'] > 5]
data_boolean_dataframe = data < 5
data_boolean_dataframe[data_boolean_dataframe] = 0

print(obj_indexing_label)
print(obj_indexing_integer)
print(obj_indexing_slice)
print(obj_indexing_sequence)
print(data_column_single)
print(data_column_multiple)
print(data_boolean_array)
print(data_boolean_dataframe)


1.0
1.0
c    2.0
d    3.0
dtype: float64
b    1.0
a    0.0
d    3.0
dtype: float64
Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32
          three  one
Ohio          2    0
Colorado      6    4
Utah         10    8
New York     14   12
          one  two  three  four
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
            one    two  three   four
Ohio          0      0      0      0
Colorado      0  False  False  False
Utah      False  False  False  False
New York  False  False  False  False


In [8]:
# basic functionality of selecting data from a Pandas DataFrame 
#using both label-based and integer-based indexing with the loc and iloc attributes.
import pandas as pd

data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])

row_label_selection = data.loc[['Ohio', 'Colorado']]
row_integer_selection = data.iloc[[0, 1]]
column_label_selection = data.loc[:, 'two']
column_integer_selection = data.iloc[:, 1]
subset_selection = data.loc[['Ohio', 'Colorado'], ['three', 'one']]

print(row_label_selection)
print(row_integer_selection)
print(column_label_selection)
print(column_integer_selection)
print(subset_selection)


          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32
Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32
          three  one
Ohio          2    0
Colorado      6    4


In [9]:
#Arithmetic with Series
import pandas as pd
import numpy as np

s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=["a", "c", "d", "e"])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=["a", "c", "e", "f", "g"])

result = s1 + s2
print(result)


a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64


In [12]:
#arithemetic with data frames
import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list("bcd"), index=["Ohio", "Texas", "Colorado"])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list("bde"), index=["Utah", "Ohio", "Texas", "Oregon"])

result = df1 + df2
print(df1)
print('hey')
print(df2)
print(result)


            b    c    d
Ohio      0.0  1.0  2.0
Texas     3.0  4.0  5.0
Colorado  6.0  7.0  8.0
hey
          b     d     e
Utah    0.0   1.0   2.0
Ohio    3.0   4.0   5.0
Texas   6.0   7.0   8.0
Oregon  9.0  10.0  11.0
            b   c     d   e
Colorado  NaN NaN   NaN NaN
Ohio      3.0 NaN   6.0 NaN
Oregon    NaN NaN   NaN NaN
Texas     9.0 NaN  12.0 NaN
Utah      NaN NaN   NaN NaN


In [13]:
#handling missing values with fill_value
import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list("abcd"))
df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)), columns=list("abcde"))

#**************************************
print(df2)
df2.loc[1, "b"] = np.nan

result = df1.add(df2, fill_value=0)
# print(result)

      a     b     c     d     e
0   0.0   1.0   2.0   3.0   4.0
1   5.0   6.0   7.0   8.0   9.0
2  10.0  11.0  12.0  13.0  14.0
3  15.0  16.0  17.0  18.0  19.0


In [14]:
#arithmetic btn dataframe and series
import pandas as pd
import numpy as np

frame = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list("bde"), index=["Utah", "Ohio", "Texas", "Oregon"])
series = frame.iloc[0]

result = frame - series
print(result)


          b    d    e
Utah    0.0  0.0  0.0
Ohio    3.0  3.0  3.0
Texas   6.0  6.0  6.0
Oregon  9.0  9.0  9.0


In [16]:
#applying a function on a dataframe using 
import pandas as pd
import numpy as np

frame = pd.DataFrame(np.random.standard_normal((4, 3)), columns=list("bde"), index=["Utah", "Ohio", "Texas", "Oregon"])

def f1(x):
    return x.max() - x.min()

print(frame)

result = frame.apply(f1)
print(result)


               b         d         e
Utah   -0.179476 -0.920289  0.515165
Ohio   -1.331061 -1.295767  0.808368
Texas  -0.665147 -0.984559  0.420335
Oregon -0.240074 -0.440705 -0.362206
b    1.151585
d    0.855062
e    1.170574
dtype: float64
