In [None]:
# Pandas Complete Concepts

In [1]:
#  Data Structure - Series 

import pandas as pd
import numpy as np

In [2]:
# Creating an empty series:

a = pd.Series()
print(a)

Series([], dtype: float64)


  a = pd.Series()


In [5]:
# Creating a series from ndarray

a = np.arange(1,11)
b = pd.Series(a)
print(b)


# using string values
print()
a1 = np.array(['a','b','c','d','e'])
b1 = pd.Series(a1)
print(b1)

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

0    a
1    b
2    c
3    d
4    e
dtype: object


In [13]:
# Indexing the array

a = np.array(['a','b','c','d','e'])
b = pd.Series(a, index=[101,102,103,104,105])
print(b)

101    a
102    b
103    c
104    d
105    e
dtype: object


In [14]:
# If index not specified, then the dictionary keys are taken in a sorted order to construct index.

val = {'101':'a', '102': 'b','103':'c', '104':'d', '105':'e'}
x = pd.Series(val)
print(x)

101    a
102    b
103    c
104    d
105    e
dtype: object


In [15]:
# If index is passed, the values in data corresponding to the labels in the index will be pulled out.

val = {'101':'a', '102': 'b','103':'c', '104':'d', '105':'e'}
x = pd.Series(val, index = ['104','101','105','103','102'])  # we can sort in any order as we wish
print(x)

104    d
101    a
105    e
103    c
102    b
dtype: object


In [16]:
# Above program with missing value

val = {'101':'a', '102': 'b', '104':'d', '105':'e'}
x = pd.Series(val, index = ['104','101','105','103','102']) 
print(x)

104      d
101      a
105      e
103    NaN
102      b
dtype: object


In [19]:
# Create a Series from Scalar
# If data is a scalar value, an index must be provided. The value will be repeated to match the length of index

a = pd.Series(11, index=[101,102,103,104,105])
print(a)

101    11
102    11
103    11
104    11
105    11
dtype: int64


In [23]:
# Accessing Data from Series with Position

a = np.array(['aaa','baa','cba','dbb','ecb'])
b = pd.Series(a, index=[1,2,3,4,5])
print(b[3])

cba


In [24]:
# Retrieving First 3 elements using ':' symbol

x=np.array(['a','b','c','d','e'])
a=pd.Series(x, index=[101,102,103,104,105])
print(a[:3])

101    a
102    b
103    c
dtype: object


In [25]:
# Retrieving last 3 elements using ':' symbol

x=np.array(['a','b','c','d','e'])
a=pd.Series(x, index=[101,102,103,104,105])
print(a[-3:])

103    c
104    d
105    e
dtype: object


In [2]:
# Retrieve Data Using Label (Index)

x=np.array(['a','b','c','d','e'])
a=pd.Series(x, index=[101,102,103,104,105])
print(a[102])

b


In [6]:
# Retrieve Multiple Data Using Label (Index)

x=np.array(['a','b','c','d','e'])
a=pd.Series(x, index=[101,102,103,104,105])
print(a[[102,103,104]])

# another example:

y = np.array([101,102,103,104,105])
a1 = pd.Series(y, index=['a','b','c','d','e'])
print(a1[['a','c','e']])

102    b
103    c
104    d
dtype: object
a    101
c    103
e    105
dtype: int64


In [7]:
# Data Structure - DataFrame

# Creating an Empty DataFrame

a = pd.DataFrame()
print (a)

Empty DataFrame
Columns: []
Index: []


In [8]:
# Create a DataFrame from Lists

x=np.arange(3,7)
print(x)
print()
a = pd.DataFrame(x)
print (a)

[3 4 5 6]

   0
0  3
1  4
2  5
3  6


In [9]:
# Another Example:

x=[['Tom',32],['Jerry', 45], ['Bob',44]]
a = pd.DataFrame(x, columns=['Name','Age'])
print (a)

    Name  Age
0    Tom   32
1  Jerry   45
2    Bob   44


In [10]:
# Create a DataFrame from Dict of ndarrays / Lists

x = {'Name':['Tom', 'Jerry', 'Bob', 'Rhodes'],'Age':[24,36,39,22]}
a = pd.DataFrame(x)
print (a)

     Name  Age
0     Tom   24
1   Jerry   36
2     Bob   39
3  Rhodes   22


In [11]:
# create an indexed DataFrame using arrays.

x = {'Name':['Tom', 'Jerry', 'Bob', 'Rhodes'],'Age':[24,36,39,22]}
a = pd.DataFrame(x, index=['Rank 1', 'Rank 2', 'Rank 3', 'Rank 4'])
print (a)

          Name  Age
Rank 1     Tom   24
Rank 2   Jerry   36
Rank 3     Bob   39
Rank 4  Rhodes   22


In [14]:
# Create a DataFrame from List of Dicts

x = [{'Name': 'Bob', 'Age': 17},{'Name': 'John', 'Age': 16, 'Average Mark': 88}]
a = pd.DataFrame(x)
print (a)
print()

# give index name:

x = [{'Name': 'Bob', 'Age': 17},{'Name': 'John', 'Age': 16, 'Average Mark': 88}]
a = pd.DataFrame(x, index=['Row_1','Row_2'])
print (a)

   Name  Age  Average Mark
0   Bob   17           NaN
1  John   16          88.0

       Name  Age  Average Mark
Row_1   Bob   17           NaN
Row_2  John   16          88.0


In [2]:
# Create a DataFrame from Dict of Series

a = {'one':pd.Series([1,2,3,4],index = ['a','b','c','d']),
    'two':pd.Series([1,2,3,4,5,6], index = ['a','b','c','d','e','f'])}
x = pd.DataFrame(a)
print(x)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  4.0    4
e  NaN    5
f  NaN    6


In [3]:
#  Data Structure - Column Selection, Addition and Deletion

# Column Selection:(it prints the specified column alone)

a = {'one':pd.Series([1,2,3,4],index = ['a','b','c','d']),
    'two':pd.Series([1,2,3,4,5,6], index = ['a','b','c','d','e','f'])}
x = pd.DataFrame(a)
print(x['one'])

a    1.0
b    2.0
c    3.0
d    4.0
e    NaN
f    NaN
Name: one, dtype: float64


In [8]:
# Adding a column

a = {'one':pd.Series([1, 2, 3, 4],index = ['a', 'b', 'c', 'd']),
    'two':pd.Series([1, 2, 3, 4, 5, 6], index = ['a', 'b', 'c', 'd', 'e', 'f'])}
x = pd.DataFrame(a)
x['three'] = pd.Series([12, 34, 56, 78, 90], index = ['a', 'b', 'c', 'd', 'e'])
print(x)

   one  two  three
a  1.0    1   12.0
b  2.0    2   34.0
c  3.0    3   56.0
d  4.0    4   78.0
e  NaN    5   90.0
f  NaN    6    NaN


In [7]:
# Creating a new column with existing coulmns

a = {'one' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
      'two' : pd.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f'])}
x = pd.DataFrame(a)
x['three'] = x['one']+x['two']
print(x)

   one  two  three
a  1.0    1    2.0
b  2.0    2    4.0
c  3.0    3    6.0
d  4.0    4    8.0
e  NaN    5    NaN
f  NaN    6    NaN


In [9]:
# Column Deletion using 'Del' Command

a = {'one':pd.Series([1, 2, 3, 4],index = ['a', 'b', 'c', 'd']),
    'two':pd.Series([1, 2, 3, 4, 5, 6], index = ['a', 'b', 'c', 'd', 'e', 'f']),
    'three':pd.Series([12, 34, 56, 78, 90], index = ['a', 'b', 'c', 'd', 'e'])}
x = pd.DataFrame(a)
print(x)
print("\n After Deleting \n")

del x['two']
print(x)

   one  two  three
a  1.0    1   12.0
b  2.0    2   34.0
c  3.0    3   56.0
d  4.0    4   78.0
e  NaN    5   90.0
f  NaN    6    NaN

 After Deleting 

   one  three
a  1.0   12.0
b  2.0   34.0
c  3.0   56.0
d  4.0   78.0
e  NaN   90.0
f  NaN    NaN


In [10]:
# Column Deletion using 'pop' Command

a = {'one':pd.Series([1, 2, 3, 4],index = ['a', 'b', 'c', 'd']),
    'two':pd.Series([1, 2, 3, 4, 5, 6], index = ['a', 'b', 'c', 'd', 'e', 'f']),
    'three':pd.Series([12, 34, 56, 78, 90], index = ['a', 'b', 'c', 'd', 'e'])}
x = pd.DataFrame(a)
print(x)
print("\n After Deleting \n")

x.pop('three')
print(x)

   one  two  three
a  1.0    1   12.0
b  2.0    2   34.0
c  3.0    3   56.0
d  4.0    4   78.0
e  NaN    5   90.0
f  NaN    6    NaN

 After Deleting 

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  4.0    4
e  NaN    5
f  NaN    6


In [11]:
# Data Structure - Row Selection, Addition and Deletion

# Row Selection by label

a = {'one':pd.Series([1,2,3,4],index = ['a','b','c','d']),
    'two':pd.Series([1,2,3,4,5,6], index = ['a','b','c','d','e','f'])}
x = pd.DataFrame(a)
print(x. loc['c'])

one    3.0
two    3.0
Name: c, dtype: float64


In [14]:
# Row Selection by integer location

a = {'one':pd.Series([1,2,3,4],index = ['a','b','c','d']),
    'two':pd.Series([1,2,3,4,5,6], index = ['a','b','c','d','e','f'])}
x = pd.DataFrame(a)
print(x .iloc[3])            # we should give the index value

one    4.0
two    4.0
Name: d, dtype: float64


In [15]:
# Slice Row - Selecting Multiple Rows

a = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
      'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

x = pd.DataFrame(a)
print (x.iloc[0:2])        # it takes 0 th index value to .. upto slice from 2nd index value 

   one  two
a  1.0    1
b  2.0    2


In [16]:
# Adding New Rows - Add new rows to a DataFrame using the append function

a1 = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
a2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])

x = a1.append(a2)
print (x)

   a  b
0  1  2
1  3  4
0  5  6
1  7  8


In [17]:
# Deleting a row using drop command

x = {'Name':['Tom', 'Jerry', 'Bob', 'Rhodes'],'Age':[24,36,39,22]}
a = pd.DataFrame(x, index=[1,2,3,4])
print (a)

print("\n Deleting ")
x=a.drop(2)
print(x)

     Name  Age
1     Tom   24
2   Jerry   36
3     Bob   39
4  Rhodes   22

 Deleting 
     Name  Age
1     Tom   24
3     Bob   39
4  Rhodes   22
