# Python Pandas

In [1]:
import pandas as pd

# Creating a Series using List

In [2]:
import pandas as pd
ser1 = pd.Series([2.5, 4, 4.7, 6.0, 6.9, 9])
print(ser1)

0    2.5
1    4.0
2    4.7
3    6.0
4    6.9
5    9.0
dtype: float64


# Creating Series of string values with name

In [3]:
import pandas as pd
ser2 = pd.Series(["India", "Australia", "England"], name="Countries")
print(ser2)

0        India
1    Australia
2      England
Name: Countries, dtype: object


# Python shorthand for list creation used to create Series

In [4]:
import pandas as pd
ser3 = pd.Series(["K"]*6)
print(ser3)

0    K
1    K
2    K
3    K
4    K
5    K
dtype: object


# Creating Series using dictionary

In [5]:
import pandas as pd
ser4 = pd.Series({"India": "New Delhi",
"Australia": "Canberra",
"New zealand": "Willington"})
print(ser4)

India           New Delhi
Australia        Canberra
New zealand    Willington
dtype: object


# Create series using NumPy functions

In [8]:
import pandas as pd
import numpy as np
ser1 = pd.Series(np.linspace(1, 100, 10))
print(ser1)

0      1.0
1     12.0
2     23.0
3     34.0
4     45.0
5     56.0
6     67.0
7     78.0
8     89.0
9    100.0
dtype: float64


# Get index and values of a series

In [9]:
import pandas as pd
import numpy as np
ser1 = pd.Series({"India": "New Delhi",
"Australia": "Canberra",
"New zealand": "Willington"})
print(ser1.values)
print(ser1.index)

['New Delhi' 'Canberra' 'Willington']
Index(['India', 'Australia', 'New zealand'], dtype='object')


# Specify an index at Series creation

In [10]:
import pandas as pd
values = ["India", "Australia", "England",
"New zealand", "South Africa", "Sri lanka"]
code = ["IND", "AUS", "ENG", "NZ", "SA", "SL"]
ser1 = pd.Series(values, index=code)
print(ser1)

IND           India
AUS       Australia
ENG         England
NZ      New zealand
SA     South Africa
SL        Sri lanka
dtype: object


# Get Length Size and Shape of a Series

In [11]:
import pandas as pd
values = ["India", "Australia", "England",
"New zealand", "South Africa", "Sri lanka"]
code = ["IND", "AUS", "ENG", "NZ", "SA", "SL"]
ser1 = pd.Series(values, index=code)
print(len(ser1))
print(ser1.shape)
print(ser1.size)

6
(6,)
6


# Get the first or last few rows from a Series

# Example of Head()

In [12]:
import pandas as pd
values = ["India", "Australia", "England",
"New zealand", "South Africa", "Sri lanka"]
code = ["IND", "AUS", "ENG", "NZ", "SA", "SL"]
ser1 = pd.Series(values, index=code)
print(ser1)

IND           India
AUS       Australia
ENG         England
NZ      New zealand
SA     South Africa
SL        Sri lanka
dtype: object


In [13]:
ser1.head()

IND           India
AUS       Australia
ENG         England
NZ      New zealand
SA     South Africa
dtype: object

In [14]:
ser1.head(3)

IND        India
AUS    Australia
ENG      England
dtype: object

# Example of Tail()

# import pandas as pd
values = ["India", "Australia", "England",
"New zealand", "South Africa", "Sri lanka"]
code = ["IND", "AUS", "ENG", "NZ", "SA", "SL"]
ser1 = pd.Series(values, index=code)
print(ser1)

In [16]:
ser1.tail()

AUS       Australia
ENG         England
NZ      New zealand
SA     South Africa
SL        Sri lanka
dtype: object

In [17]:
ser1.tail(3)

NZ     New zealand
SA    South Africa
SL       Sri lanka
dtype: object

# Example of Take()

In [18]:
import pandas as pd
values = ["India", "Australia", "England",
"New zealand", "South Africa", "Sri lanka"]
code = ["IND", "AUS", "ENG", "NZ", "SA", "SL"]
ser1 = pd.Series(values, index=code)
print(ser1)

IND           India
AUS       Australia
ENG         England
NZ      New zealand
SA     South Africa
SL        Sri lanka
dtype: object


In [19]:
ser1.take([1, 5, 4])

AUS       Australia
SL        Sri lanka
SA     South Africa
dtype: object

In [20]:
ser1.take([2, 5])

ENG      England
SL     Sri lanka
dtype: object

# Slicing a Series into subsets

In [21]:
import pandas as pd
num = [000, 254, 460, 501, 524, 632, 689, 745, 879, 989]
idx = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
series = pd.Series(num, index=idx)
series

A      0
B    254
C    460
D    501
E    524
F    632
G    689
H    745
I    879
J    989
dtype: int64

In [22]:
series[0:5] 

A      0
B    254
C    460
D    501
E    524
dtype: int64

In [23]:
series[3:7]

D    501
E    524
F    632
G    689
dtype: int64

In [29]:
series[2:7:3]

C    460
F    632
dtype: int64

In [30]:
series[:8]

A      0
B    254
C    460
D    501
E    524
F    632
G    689
H    745
dtype: int64

In [38]:
series[6::3]

G    689
J    989
dtype: int64

In [40]:
series[::-1]

J    989
I    879
H    745
G    689
F    632
E    524
D    501
C    460
B    254
A      0
dtype: int64

# DATAFRAME

# Create and Print DataFrame

In [42]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


# Set Index and Columns in Pandas

In [43]:
import pandas as pd
employees1 = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]},
index=['Emp001', 'Emp002','Emp003', 'Emp004', 'Emp005'],
columns=['Name', 'Occupation', 'Date Of Join', 'Age'])
print(employees1)

           Name  Occupation Date Of Join  Age
Emp001    Kiran  Programmer   2020-08-19   22
Emp002   Tanuja  Journalist   2020-10-26   21
Emp003    Mohan    Mechanic   2019-06-07   23
Emp004   Barsha    Engineer   2017-02-30   26
Emp005  Hemanta     Postman   2018-12-24   34


# Rename DataFrame Columns

In [44]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


In [45]:
employees.columns = ['EmpCode', 'EmpName', 'EmpOccupation', 'EmpDOJ', 'EmpAge']
print(employees)

  EmpCode  EmpName EmpOccupation      EmpDOJ  EmpAge
0  Emp110    Kiran    Programmer  2020-08-19      22
1  Emp111   Tanuja    Journalist  2020-10-26      21
2  Emp112    Mohan      Mechanic  2019-06-07      23
3  Emp113   Barsha      Engineer  2017-02-30      26
4  Emp114  Hemanta       Postman  2018-12-24      34


# Drop DataFrame Column(s) by Name and Index

In [46]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


In [47]:
employees.drop('Date Of Join', axis=1, inplace=True)
print(employees)

  EmpCode     Name  Occupation  Age
0  Emp110    Kiran  Programmer   22
1  Emp111   Tanuja  Journalist   21
2  Emp112    Mohan    Mechanic   23
3  Emp113   Barsha    Engineer   26
4  Emp114  Hemanta     Postman   34


# Add Column to DataFrame

In [53]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


In [54]:
employees['City'] = ['Bhubaneswer', 'Cuttack', 'Bhadrak', 'Balasore', 'Rourkela']
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age         City
0  Emp110    Kiran  Programmer   2020-08-19   22  Bhubaneswer
1  Emp111   Tanuja  Journalist   2020-10-26   21      Cuttack
2  Emp112    Mohan    Mechanic   2019-06-07   23      Bhadrak
3  Emp113   Barsha    Engineer   2017-02-30   26     Balasore
4  Emp114  Hemanta     Postman   2018-12-24   34     Rourkela


# Get list of the column headers

In [55]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


In [64]:
print(list(employees))

['EmpCode', 'Name', 'Occupation', 'Date Of Join', 'Age']


In [62]:
print(employees.columns.tolist())

['EmpCode', 'Name', 'Occupation', 'Date Of Join', 'Age']


In [63]:
print(list(employees.columns.values))

['EmpCode', 'Name', 'Occupation', 'Date Of Join', 'Age']


# Generate DataFrame with random values

In [71]:
import pandas as pd
import numpy as np
df_random = pd.DataFrame(np.random.randint(1000, size=(10, 6)),
columns=list('ABCDEF'),
index=['Row-{}'.format(i) for i in range(10)])
print(df_random)

         A    B    C    D    E    F
Row-0  359  399  253  641  136  145
Row-1  347  657  984  650   63  535
Row-2  620  105  106  679  872  970
Row-3  776  935  852  678  834  913
Row-4  937  627  108  624  333  556
Row-5  927  378  884  581  498  232
Row-6  786  495  471  939  570  802
Row-7  260  432  296  436  638  425
Row-8  441  583  189  646  546  361
Row-9   96  330  392  566  334  349


# Select multiple columns from DataFrame

In [72]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


In [73]:
df = employees[['Name', 'Date Of Join', 'Age']]
print(df)

      Name Date Of Join  Age
0    Kiran   2020-08-19   22
1   Tanuja   2020-10-26   21
2    Mohan   2019-06-07   23
3   Barsha   2017-02-30   26
4  Hemanta   2018-12-24   34


# Convert Dictionary into DataFrame

In [74]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


In [75]:
dataframe = pd.DataFrame(employees)
print(dataframe)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


# Check whether a pandas DataFrame is empty

In [88]:
# check if there is any element 
# in the given dataframe or not 
result = df.empty 
  
# Print the result 
print(result) 

False


# Create an empty DataFrame with Date Index

In [91]:
import datetime
import pandas as pd
todays_date = datetime.datetime.now().date()
index = pd.date_range(todays_date, periods=10, freq='D')
columns = ['A', 'B', 'C',]
df = pd.DataFrame(index=index, columns=columns)
df = df.fillna(0)
print(df)

            A  B  C
2021-01-04  0  0  0
2021-01-05  0  0  0
2021-01-06  0  0  0
2021-01-07  0  0  0
2021-01-08  0  0  0
2021-01-09  0  0  0
2021-01-10  0  0  0
2021-01-11  0  0  0
2021-01-12  0  0  0
2021-01-13  0  0  0


# Check the datatype of DataFrame Columns

In [92]:
import pandas as pd
df = pd.DataFrame({'Age': [22, 21, 28, 32, 35, 42, 29],
'Color': ['Red', 'Blue', 'White', 'Yellow', 'Green', 'Black',
'Magenta'],
'Food': ['Apple', 'Carrot', 'Mango', 'Guava', 'Butter',
'Chicken', 'Fish'],
'Height': [60, 50, 200, 180, 125, 135, 214],
'Score': [9.8, 9.4, 8.0, 7.4, 2.4, 5.9, 6.2],
'State': ['OD', 'TN', 'KL', 'BH', 'GL', 'NL', 'MH']
},
index=['Kiran', 'Mohan', 'Akash', 'Devil', 'Kirti',
'Gopi', 'Deepak'])
print(df)


        Age    Color     Food  Height  Score State
Kiran    22      Red    Apple      60    9.8    OD
Mohan    21     Blue   Carrot      50    9.4    TN
Akash    28    White    Mango     200    8.0    KL
Devil    32   Yellow    Guava     180    7.4    BH
Kirti    35    Green   Butter     125    2.4    GL
Gopi     42    Black  Chicken     135    5.9    NL
Deepak   29  Magenta     Fish     214    6.2    MH


In [93]:
df.dtypes

Age         int64
Color      object
Food       object
Height      int64
Score     float64
State      object
dtype: object

# Change data type of a specific column of a pandas DataFrame

In [94]:
print(df)
df.dtypes

        Age    Color     Food  Height  Score State
Kiran    22      Red    Apple      60    9.8    OD
Mohan    21     Blue   Carrot      50    9.4    TN
Akash    28    White    Mango     200    8.0    KL
Devil    32   Yellow    Guava     180    7.4    BH
Kirti    35    Green   Butter     125    2.4    GL
Gopi     42    Black  Chicken     135    5.9    NL
Deepak   29  Magenta     Fish     214    6.2    MH


Age         int64
Color      object
Food       object
Height      int64
Score     float64
State      object
dtype: object

In [95]:
df['Age'] = df['Age'].astype(str)
print(df.dtypes)

Age        object
Color      object
Food       object
Height      int64
Score     float64
State      object
dtype: object


# Appending two DataFrame objects

In [96]:
import pandas as pd
df1 = pd.DataFrame({'Age': [22, 21, 28, 32, 35], 'Height': [60, 50, 200, 180, 125],
'Score': [9.8, 9.4, 8.0, 7.4, 2.4], 'State': ['OD', 'TN', 'KL', 'BH', 'GL']},
index=['Kiran', 'Mohan', 'Akash', 'Devil', 'Kirti'])
print(df1)

       Age  Height  Score State
Kiran   22      60    9.8    OD
Mohan   21      50    9.4    TN
Akash   28     200    8.0    KL
Devil   32     180    7.4    BH
Kirti   35     125    2.4    GL


In [97]:
df2 = pd.DataFrame({'Age': [32, 35, 42, 29], 'Color': ['Yellow', 'Green', 'Black',
'Magenta'],
'Food': ['Guava', 'Butter',
'Chicken', 'Fish'],
'Score': [7.4, 2.4, 5.9, 6.2], 'State': ['BH', 'GL', 'NL', 'MH']},
index=['Devil', 'Kirti',
'Gopi', 'Deepak'])
print(df2)

        Age    Color     Food  Score State
Devil    32   Yellow    Guava    7.4    BH
Kirti    35    Green   Butter    2.4    GL
Gopi     42    Black  Chicken    5.9    NL
Deepak   29  Magenta     Fish    6.2    MH


In [98]:
df3 = df1.append(df2, sort=True)
print(df3)

        Age    Color     Food  Height  Score State
Kiran    22      NaN      NaN    60.0    9.8    OD
Mohan    21      NaN      NaN    50.0    9.4    TN
Akash    28      NaN      NaN   200.0    8.0    KL
Devil    32      NaN      NaN   180.0    7.4    BH
Kirti    35      NaN      NaN   125.0    2.4    GL
Devil    32   Yellow    Guava     NaN    7.4    BH
Kirti    35    Green   Butter     NaN    2.4    GL
Gopi     42    Black  Chicken     NaN    5.9    NL
Deepak   29  Magenta     Fish     NaN    6.2    MH


# How to add an extra row at end in a pandas DataFrame

In [99]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]})
print(employees)

  EmpCode     Name  Occupation Date Of Join  Age
0  Emp110    Kiran  Programmer   2020-08-19   22
1  Emp111   Tanuja  Journalist   2020-10-26   21
2  Emp112    Mohan    Mechanic   2019-06-07   23
3  Emp113   Barsha    Engineer   2017-02-30   26
4  Emp114  Hemanta     Postman   2018-12-24   34


In [103]:
employees.loc[len(employees)] = ['Emp115', 'Biswo', 'Doctor', '2019-07-20','31']
print(employees)

  EmpCode     Name  Occupation Date Of Join Age
0  Emp110    Kiran  Programmer   2020-08-19  22
1  Emp111   Tanuja  Journalist   2020-10-26  21
2  Emp112    Mohan    Mechanic   2019-06-07  23
3  Emp113   Barsha    Engineer   2017-02-30  26
4  Emp114  Hemanta     Postman   2018-12-24  34
5  Emp114  Hemanta     Postman   2018-12-24  34
6  Emp115    Biswo      Doctor   2019-07-20  31


# Add row with specific index name

In [111]:
import pandas as pd
employees = pd.DataFrame({
'EmpCode': ['Emp110', 'Emp111', 'Emp112', 'Emp113', 'Emp114'],
'Name': ['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta'],
'Occupation': ['Programmer', 'Journalist', 'Mechanic',
'Engineer', 'Postman'],
'Date Of Join': ['2020-08-19', '2020-10-26', '2019-06-07', '2017-02-30',
'2018-12-24'],
'Age': [22, 21, 23, 26, 34]},
index=['Emp001', 'Emp002','Emp003', 'Emp004', 'Emp005'],
columns=['Name', 'Occupation', 'Date Of Join', 'Age'])
print(employees)

           Name  Occupation Date Of Join  Age
Emp001    Kiran  Programmer   2020-08-19   22
Emp002   Tanuja  Journalist   2020-10-26   21
Emp003    Mohan    Mechanic   2019-06-07   23
Emp004   Barsha    Engineer   2017-02-30   26
Emp005  Hemanta     Postman   2018-12-24   34


In [112]:
employees.loc['Emp003'] = ['Amiya', 'Teacher', '2020-01-31', 26]
print(employees)

           Name  Occupation Date Of Join  Age
Emp001    Kiran  Programmer   2020-08-19   22
Emp002   Tanuja  Journalist   2020-10-26   21
Emp003    Amiya     Teacher   2020-01-31   26
Emp004   Barsha    Engineer   2017-02-30   26
Emp005  Hemanta     Postman   2018-12-24   34


# Example of append, concat and combine_first

In [113]:
import pandas as pd
a = {'A': 20, 'B': 30}
b = {'B': 50, 'C': 60, 'D': 70}
df1 = pd.DataFrame(a, index=[0])
df2 = pd.DataFrame(b, index=[1])

# Append

In [114]:
d1 = pd.DataFrame()
d1 = d1.append(df1)
d1 = d1.append(df2).fillna(0)
print(d1)

      A   B     C     D
0  20.0  30   0.0   0.0
1   0.0  50  60.0  70.0


# Concat

In [115]:
d2 = pd.concat([df1, df2]).fillna(0)
print(d2)

      A   B     C     D
0  20.0  30   0.0   0.0
1   0.0  50  60.0  70.0


# Combine First

In [117]:
d3 = pd.DataFrame()
d3 = d3.combine_first(df1).combine_first(df2).fillna(0)
print(d3)

      A     B     C     D
0  20.0  30.0   0.0   0.0
1   0.0  50.0  60.0  70.0


# Filter DataFrame rows containing specific string values with an AND operator

In [118]:
import pandas as pd
df = pd.DataFrame({'DateOfBirth': ['1970-10-21', '1984-05-26', '1999-04-15',
'1974-02-24', '1982-06-20', '1989-07-17',
'1990-08-19'],
'State': ['OD', 'TN', 'WB', 'GL', 'MH', 'NL', 'ND']
},
index=['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta',
'Deepak', 'Sneha'])
print(df)

        DateOfBirth State
Kiran    1970-10-21    OD
Tanuja   1984-05-26    TN
Mohan    1999-04-15    WB
Barsha   1974-02-24    GL
Hemanta  1982-06-20    MH
Deepak   1989-07-17    NL
Sneha    1990-08-19    ND


In [126]:
df1 = df[df.index.str.contains('ran') & df['State'].str.contains("OD")]
print(df1)

      DateOfBirth State
Kiran  1970-10-21    OD


# Filter DataFrame rows using OR operator

In [129]:
df2 = df[df.index.str.contains('ran') | df['State'].str.contains("TN")]
print(df2)

       DateOfBirth State
Kiran   1970-10-21    OD
Tanuja  1984-05-26    TN


# Get Unique row values

In [130]:
import pandas as pd
df = pd.DataFrame({'State': ['OD', 'TN', 'WB', 'GL', 'MH', 'NL', 'ND']
},
index=['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta',
'Deepak', 'Sneha'])
print(df)

        State
Kiran      OD
Tanuja     TN
Mohan      WB
Barsha     GL
Hemanta    MH
Deepak     NL
Sneha      ND


In [134]:
print(df["State"].unique())

['OD' 'TN' 'WB' 'GL' 'MH' 'NL' 'ND']


# Count Distinct Values

In [136]:
import pandas as pd
df = pd.DataFrame({'Age': [31, 30, 22, 33, 42, 40, 27, 39],
'Height': [73, 70, 160, 79, 150, 75, 135, 91],
'Score': [9.8, 9.5, 9.0, 8.4, 6, 7, 3, 1],
'State': ['OD', 'TN', 'WB', 'MH', 'NL', 'PN', 'RJ', 'BH']},
index=['Kiran', 'Mohan', 'Devil', 'Deepak', 'Hemanta', 'Amiya', 'Biswo', 'Abhijit'])
print(df)

         Age  Height  Score State
Kiran     31      73    9.8    OD
Mohan     30      70    9.5    TN
Devil     22     160    9.0    WB
Deepak    33      79    8.4    MH
Hemanta   42     150    6.0    NL
Amiya     40      75    7.0    PN
Biswo     27     135    3.0    RJ
Abhijit   39      91    1.0    BH


In [137]:
print(df.Height.value_counts())

70     1
91     1
75     1
73     1
135    1
150    1
79     1
160    1
Name: Height, dtype: int64


# Remove duplicate rows

In [138]:
import pandas as pd
df = pd.DataFrame({'Age': [31, 30, 22, 33, 42, 40, 27, 39],
'Height': [73, 70, 160, 79, 150, 75, 135, 91],
'Score': [9.8, 9.5, 9.0, 8.4, 6, 7, 3, 1],
'State': ['OD', 'TN', 'WB', 'MH', 'NL', 'PN', 'RJ', 'BH']},
index=['Kiran', 'Mohan', 'Devil', 'Deepak', 'Hemanta', 'Amiya', 'Biswo', 'Abhijit'])
print(df)

         Age  Height  Score State
Kiran     31      73    9.8    OD
Mohan     30      70    9.5    TN
Devil     22     160    9.0    WB
Deepak    33      79    8.4    MH
Hemanta   42     150    6.0    NL
Amiya     40      75    7.0    PN
Biswo     27     135    3.0    RJ
Abhijit   39      91    1.0    BH


In [139]:
df1 = df.reset_index().drop_duplicates(subset='index',
keep='first').set_index('index')
print(df1)

         Age  Height  Score State
index                            
Kiran     31      73    9.8    OD
Mohan     30      70    9.5    TN
Devil     22     160    9.0    WB
Deepak    33      79    8.4    MH
Hemanta   42     150    6.0    NL
Amiya     40      75    7.0    PN
Biswo     27     135    3.0    RJ
Abhijit   39      91    1.0    BH


# Example of isnull() and notnull()

In [140]:
import pandas as pd
df = pd.DataFrame([[40, 50, 60, 70], [87, 42, 63, 30], [4,]],
columns=['Apple', 'Papaya', 'Guava', 'Carrot'],
index=['BasketA', 'BasketB', 'BasketC'])
print(df)

         Apple  Papaya  Guava  Carrot
BasketA     40    50.0   60.0    70.0
BasketB     87    42.0   63.0    30.0
BasketC      4     NaN    NaN     NaN


In [141]:
df.isnull()

Unnamed: 0,Apple,Papaya,Guava,Carrot
BasketA,False,False,False,False
BasketB,False,False,False,False
BasketC,False,True,True,True


In [142]:
df.notnull()

Unnamed: 0,Apple,Papaya,Guava,Carrot
BasketA,True,True,True,True
BasketB,True,True,True,True
BasketC,True,False,False,False


# Droping rows with missing data- dropna()

In [143]:
import pandas as pd
df = pd.DataFrame([[40, 50, 60, 70], [87, 42, 63, 30], [4,]],
columns=['Apple', 'Papaya', 'Guava', 'Carrot'],
index=['BasketA', 'BasketB', 'BasketC'])
print(df)

         Apple  Papaya  Guava  Carrot
BasketA     40    50.0   60.0    70.0
BasketB     87    42.0   63.0    30.0
BasketC      4     NaN    NaN     NaN


In [144]:
df.dropna()

Unnamed: 0,Apple,Papaya,Guava,Carrot
BasketA,40,50.0,60.0,70.0
BasketB,87,42.0,63.0,30.0


# Drop columns with missing data

In [145]:
import pandas as pd
df = pd.DataFrame([[40, 50, 60, 70], [87, 42, 63, 30], [4,]],
columns=['Apple', 'Papaya', 'Guava', 'Carrot'],
index=['BasketA', 'BasketB', 'BasketC'])
print(df)

         Apple  Papaya  Guava  Carrot
BasketA     40    50.0   60.0    70.0
BasketB     87    42.0   63.0    30.0
BasketC      4     NaN    NaN     NaN


In [148]:
df.dropna(1)

Unnamed: 0,Apple
BasketA,40
BasketB,87
BasketC,4


# Sort Index Values

In [149]:
import pandas as pd
df = pd.DataFrame({'DateOfBirth': ['1970-10-21', '1984-05-26', '1999-04-15',
'1974-02-24', '1982-06-20', '1989-07-17',
'1990-08-19'],
'State': ['OD', 'TN', 'WB', 'GL', 'MH', 'NL', 'ND']
},
index=['Kiran', 'Tanuja', 'Mohan', 'Barsha', 'Hemanta',
'Deepak', 'Sneha'])
df


Unnamed: 0,DateOfBirth,State
Kiran,1970-10-21,OD
Tanuja,1984-05-26,TN
Mohan,1999-04-15,WB
Barsha,1974-02-24,GL
Hemanta,1982-06-20,MH
Deepak,1989-07-17,NL
Sneha,1990-08-19,ND


# ASCENDING

In [151]:
df.sort_index(ascending=True)

Unnamed: 0,DateOfBirth,State
Barsha,1974-02-24,GL
Deepak,1989-07-17,NL
Hemanta,1982-06-20,MH
Kiran,1970-10-21,OD
Mohan,1999-04-15,WB
Sneha,1990-08-19,ND
Tanuja,1984-05-26,TN


# DESCENDING

In [152]:
df.sort_index(ascending=False)

Unnamed: 0,DateOfBirth,State
Tanuja,1984-05-26,TN
Sneha,1990-08-19,ND
Mohan,1999-04-15,WB
Kiran,1970-10-21,OD
Hemanta,1982-06-20,MH
Deepak,1989-07-17,NL
Barsha,1974-02-24,GL


# Example of using rank()

In [153]:
import pandas as pd
df = pd.DataFrame([[40, 50, 60, 70], [87, 42, 63, 30], [4,]],
columns=['Apple', 'Papaya', 'Guava', 'Carrot'],
index=['BasketA', 'BasketB', 'BasketC'])
print(df)

         Apple  Papaya  Guava  Carrot
BasketA     40    50.0   60.0    70.0
BasketB     87    42.0   63.0    30.0
BasketC      4     NaN    NaN     NaN


In [154]:
df.rank()

Unnamed: 0,Apple,Papaya,Guava,Carrot
BasketA,2.0,2.0,1.0,2.0
BasketB,3.0,1.0,2.0,1.0
BasketC,1.0,,,


# Importing Dataset

# Importing EXCEL File

In [43]:
import pandas as pd
df1 = pd.read_excel("Tourism in india dataset.xlsx")
df1.head()

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
0,2000,2.65,6.7,220.11,15.4,3460,15.0,15626,20.7,3.53
1,2001,2.54,-4.2,236.47,7.4,3198,-7.6,15083,-3.5,4.56
2,2002,2.38,-6.0,269.6,14.0,3103,-3.0,15064,-0.1,4.94
3,2003,2.73,14.3,309.04,14.6,4463,43.8,20729,37.6,5.35
4,2004,3.46,26.8,366.27,18.5,6170,38.2,27944,34.8,6.21


# Importing Excel File With Particular Sheet

In [42]:
df2 = pd.read_excel('Tourism in india dataset.xlsx',
sheet_name='20 years data')
df2.head()

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
0,2000,2.65,6.7,220.11,15.4,3460,15.0,15626,20.7,3.53
1,2001,2.54,-4.2,236.47,7.4,3198,-7.6,15083,-3.5,4.56
2,2002,2.38,-6.0,269.6,14.0,3103,-3.0,15064,-0.1,4.94
3,2003,2.73,14.3,309.04,14.6,4463,43.8,20729,37.6,5.35
4,2004,3.46,26.8,366.27,18.5,6170,38.2,27944,34.8,6.21


# Operations on Dataset

In [16]:
df1 = pd.read_excel("Tourism in india dataset.xlsx")
df1.head()

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
0,2000,2.65,6.7,220.11,15.4,3460,15.0,15626,20.7,3.53
1,2001,2.54,-4.2,236.47,7.4,3198,-7.6,15083,-3.5,4.56
2,2002,2.38,-6.0,269.6,14.0,3103,-3.0,15064,-0.1,4.94
3,2003,2.73,14.3,309.04,14.6,4463,43.8,20729,37.6,5.35
4,2004,3.46,26.8,366.27,18.5,6170,38.2,27944,34.8,6.21


In [17]:
df1.head()

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
0,2000,2.65,6.7,220.11,15.4,3460,15.0,15626,20.7,3.53
1,2001,2.54,-4.2,236.47,7.4,3198,-7.6,15083,-3.5,4.56
2,2002,2.38,-6.0,269.6,14.0,3103,-3.0,15064,-0.1,4.94
3,2003,2.73,14.3,309.04,14.6,4463,43.8,20729,37.6,5.35
4,2004,3.46,26.8,366.27,18.5,6170,38.2,27944,34.8,6.21


In [18]:
df1.head(3)

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
0,2000,2.65,6.7,220.11,15.4,3460,15.0,15626,20.7,3.53
1,2001,2.54,-4.2,236.47,7.4,3198,-7.6,15083,-3.5,4.56
2,2002,2.38,-6.0,269.6,14.0,3103,-3.0,15064,-0.1,4.94


# Select Last Few Rows from Dataset

In [19]:
df1.tail()

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
15,2015,8.03,4.5,1431.97,11.6,21071,4.1,134844,12.0,20.38
16,2016,8.8,9.7,1615.39,12.8,22923,9.1,154146,14.3,21.87
17,2017,10.04,14.0,1657.55,2.6,27310,19.1,177874,15.4,23.94
18,2018,10.56,5.2,1853.79,11.8,28525,4.7,194882,9.6,26.3
19,2019,10.93,3.5,2321.98,25.3,30058,5.1,203658,13.4,26.92


In [20]:
df1.tail(5)

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
15,2015,8.03,4.5,1431.97,11.6,21071,4.1,134844,12.0,20.38
16,2016,8.8,9.7,1615.39,12.8,22923,9.1,154146,14.3,21.87
17,2017,10.04,14.0,1657.55,2.6,27310,19.1,177874,15.4,23.94
18,2018,10.56,5.2,1853.79,11.8,28525,4.7,194882,9.6,26.3
19,2019,10.93,3.5,2321.98,25.3,30058,5.1,203658,13.4,26.92


# Information about Dataset

In [21]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 10 columns):
 #   Column                                                        Non-Null Count  Dtype  
---  ------                                                        --------------  -----  
 0   Year                                                          20 non-null     int64  
 1   Foreign tourist arrivals in india(in million)                 20 non-null     float64
 2   % change over previous year                                   20 non-null     float64
 3   Number of domestic tourists visits(in million)to all states   20 non-null     float64
 4   % change over previous year                                   20 non-null     float64
 5   Foreign exchange earnings(US $million)                        20 non-null     int64  
 6   % change over previous year.1                                 20 non-null     float64
 7   Foreign exchange earnings(in crores)                          20 non-null

# Knowing the Variable Type

In [41]:
df1.dtypes

Year                                                              int64
Foreign tourist arrivals in india(in million)                   float64
% change over previous year                                     float64
Number of domestic tourists visits(in million)to all states     float64
% change over previous year                                     float64
Foreign exchange earnings(US $million)                            int64
% change over previous year.1                                   float64
Foreign exchange earnings(in crores)                              int64
% change over previous year.2                                   float64
No.of Indian Nationals Departures (in Millions)                 float64
dtype: object

# Summary Statistics Of Dataset

In [40]:
df1.describe()

Unnamed: 0,Year,Foreign tourist arrivals in india(in million),% change over previous year,Number of domestic tourists visits(in million)to all states,% change over previous year.3,Foreign exchange earnings(US $million),% change over previous year.1,Foreign exchange earnings(in crores),% change over previous year.2,No.of Indian Nationals Departures (in Millions)
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,2009.5,5.967,7.94,898.8895,13.425,14364.0,12.94,82707.65,15.635,13.405
std,5.91608,2.712825,7.524374,617.341056,5.368806,8722.934749,13.47342,62709.74625,9.88557,7.508508
min,2000.0,2.38,-6.0,220.11,2.6,3103.0,-7.6,15064.0,-3.5,3.53
25%,2004.75,3.805,4.225,385.5975,11.025,7162.25,4.55,31828.25,11.975,6.9375
50%,2009.5,5.53,7.95,708.25,13.35,13012.5,10.0,59963.0,14.7,12.03
75%,2014.25,7.7675,13.35,1320.0925,16.2,20444.75,19.675,123986.25,19.05,18.8425
max,2019.0,10.93,26.8,2321.98,25.3,30058.0,43.8,203658.0,37.6,26.92


# View only Some of the Rows at some specific location

In [7]:
df2 = pd.read_excel("Heritage visit.xlsx")
df2.head()

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals,Domestic Tourists arrivals
0,Ajanta Caves,41134,3352,37782
1,Ellora Caves,18000,3482,14518
2,Agra Fort,2500000,526000,1974000
3,Taj Mahal,6900000,883000,6017000
4,"Sun Temple, Konarak",2526359,65000,2461359


In [54]:
df2.iloc[3:7]

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals,Domestic Tourists arrivals
3,Taj Mahal,6900000,883000,6017000
4,"Sun Temple, Konarak",2526359,65000,2461359
5,"Golden temple,Amritsar",100000,26548,73452
6,Group of Monuments at Khajuraho,734831,80000,654831


# View only Some of the Rows at some specific location with Specific Columns

In [8]:
df2.iloc[3:7, 0:5]

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals,Domestic Tourists arrivals
3,Taj Mahal,6900000,883000,6017000
4,"Sun Temple, Konarak",2526359,65000,2461359
5,"Golden temple,Amritsar",100000,26548,73452
6,Group of Monuments at Khajuraho,734831,80000,654831


# Use of loc() for extracting Rows and Columns

In [62]:
df2.loc[:,["Heritage sites","Total Number of Tourists arrivals in 2019","Foreign Tourists arrivals"]]

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals
0,Ajanta Caves,41134,3352
1,Ellora Caves,18000,3482
2,Agra Fort,2500000,526000
3,Taj Mahal,6900000,883000
4,"Sun Temple, Konarak",2526359,65000
5,"Golden temple,Amritsar",100000,26548
6,Group of Monuments at Khajuraho,734831,80000


# Replace Function

In [68]:
df2.columns = df2.columns.str.replace('Heritage sites' , 'Heritage ')
df2.columns

Index(['Heritage ', 'Total Number of Tourists arrivals in 2019',
       'Foreign Tourists arrivals', 'Domestic Tourists arrivals'],
      dtype='object')

# Mean

In [67]:
df2.mean()

Total Number of Tourists arrivals in 2019    1.831475e+06
Foreign Tourists arrivals                    2.267689e+05
Domestic Tourists arrivals                   1.604706e+06
dtype: float64

# Mode

In [66]:
df2.mode()

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals,Domestic Tourists arrivals
0,Agra Fort,18000,3352,14518
1,Ajanta Caves,41134,3482,37782
2,Ellora Caves,100000,26548,73452
3,"Golden temple,Amritsar",734831,65000,654831
4,Group of Monuments at Khajuraho,2500000,80000,1974000
5,"Sun Temple, Konarak",2526359,526000,2461359
6,Taj Mahal,6900000,883000,6017000


# Median

In [69]:
df2.median()

Total Number of Tourists arrivals in 2019    734831.0
Foreign Tourists arrivals                     65000.0
Domestic Tourists arrivals                   654831.0
dtype: float64

# Max

In [70]:
df2.max()

Heritage                                     Taj Mahal
Total Number of Tourists arrivals in 2019      6900000
Foreign Tourists arrivals                       883000
Domestic Tourists arrivals                     6017000
dtype: object

# Min

In [71]:
df2.min()

Heritage                                     Agra Fort
Total Number of Tourists arrivals in 2019        18000
Foreign Tourists arrivals                         3352
Domestic Tourists arrivals                       14518
dtype: object

In [77]:
df2 = pd.read_excel("Heritage visit.xlsx")
df2.head()

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals,Domestic Tourists arrivals
0,Ajanta Caves,41134,3352,37782
1,Ellora Caves,18000,3482,14518
2,Agra Fort,2500000,526000,1974000
3,Taj Mahal,6900000,883000,6017000
4,"Sun Temple, Konarak",2526359,65000,2461359


# GroupBy Function

In [99]:
df2.groupby("Heritage sites")["Total Number of Tourists arrivals in 2019","Foreign Tourists arrivals"].min()


  df2.groupby("Heritage sites")["Total Number of Tourists arrivals in 2019","Foreign Tourists arrivals"].min()


Unnamed: 0_level_0,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals
Heritage sites,Unnamed: 1_level_1,Unnamed: 2_level_1
Agra Fort,2500000,526000
Ajanta Caves,41134,3352
Ellora Caves,18000,3482
"Golden temple,Amritsar",100000,26548
Group of Monuments at Khajuraho,734831,80000
"Sun Temple, Konarak",2526359,65000
Taj Mahal,6900000,883000


In [79]:
df2.groupby("Heritage sites").agg({"Foreign Tourists arrivals": ["min","max"],"Domestic Tourists arrivals" : "mean"})

Unnamed: 0_level_0,Foreign Tourists arrivals,Foreign Tourists arrivals,Domestic Tourists arrivals
Unnamed: 0_level_1,min,max,mean
Heritage sites,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Agra Fort,526000,526000,1974000
Ajanta Caves,3352,3352,37782
Ellora Caves,3482,3482,14518
"Golden temple,Amritsar",26548,26548,73452
Group of Monuments at Khajuraho,80000,80000,654831
"Sun Temple, Konarak",65000,65000,2461359
Taj Mahal,883000,883000,6017000


# isnull()

In [80]:
df2.isnull()

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals,Domestic Tourists arrivals
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False
5,False,False,False,False
6,False,False,False,False


# notnull()

In [82]:
df2.notnull()

Unnamed: 0,Heritage sites,Total Number of Tourists arrivals in 2019,Foreign Tourists arrivals,Domestic Tourists arrivals
0,True,True,True,True
1,True,True,True,True
2,True,True,True,True
3,True,True,True,True
4,True,True,True,True
5,True,True,True,True
6,True,True,True,True


# Number of Missing Values

In [83]:
df2.isnull().sum()

Heritage sites                               0
Total Number of Tourists arrivals in 2019    0
Foreign Tourists arrivals                    0
Domestic Tourists arrivals                   0
dtype: int64

# Filling of Missing Values

In [89]:
import pandas as pd
df3 = pd.DataFrame([[40, 50, 60, 70, 90], [87, 42, 63, 30], [4, 65, 34], [65, 34, 21], [57,89]],
columns=['Apple', 'Papaya', 'Guava', 'Carrot', 'Watermillion'],
index=['Basket A', 'Basket B', 'Basket C', 'Basket D', 'Basket E'])
print(df)

          Apple  Papaya  Guava  Carrot  Watermillion
Basket A     40      50   60.0    70.0          90.0
Basket B     87      42   63.0    30.0           NaN
Basket C      4      65   34.0     NaN           NaN
Basket D     65      34   21.0     NaN           NaN
Basket E     57      89    NaN     NaN           NaN


# Forward Filling

In [90]:
df3.ffill()

Unnamed: 0,Apple,Papaya,Guava,Carrot,Watermillion
Basket A,40,50,60.0,70.0,90.0
Basket B,87,42,63.0,30.0,90.0
Basket C,4,65,34.0,30.0,90.0
Basket D,65,34,21.0,30.0,90.0
Basket E,57,89,21.0,30.0,90.0


# Backward Filling

In [91]:
df3.bfill()

Unnamed: 0,Apple,Papaya,Guava,Carrot,Watermillion
Basket A,40,50,60.0,70.0,90.0
Basket B,87,42,63.0,30.0,
Basket C,4,65,34.0,,
Basket D,65,34,21.0,,
Basket E,57,89,,,


# Filling the Missing Values using Mean and fillna(

In [5]:
df3.fillna(df3.mean(),inplace=True)
df3

Unnamed: 0,Apple,Papaya,Guava,Carrot,Watermillion
Basket A,40,50,60.0,70.0,90.0
Basket B,87,42,63.0,30.0,90.0
Basket C,4,65,34.0,50.0,90.0
Basket D,65,34,21.0,50.0,90.0
Basket E,57,89,44.5,50.0,90.0


# Stacking using non-hierarchical indexes

In [4]:
import pandas as pd
df3 = pd.DataFrame([[40, 50, 60, 70, 90], [87, 42, 63, 30], [4, 65, 34], [65, 34, 21], [57,89]],
columns=['Apple', 'Papaya', 'Guava', 'Carrot', 'Watermillion'],
index=['Basket A', 'Basket B', 'Basket C', 'Basket D', 'Basket E'])
print(df3)

          Apple  Papaya  Guava  Carrot  Watermillion
Basket A     40      50   60.0    70.0          90.0
Basket B     87      42   63.0    30.0           NaN
Basket C      4      65   34.0     NaN           NaN
Basket D     65      34   21.0     NaN           NaN
Basket E     57      89    NaN     NaN           NaN


In [101]:
print(df.stack(level=-1))

Basket A  Apple           40.0
          Papaya          50.0
          Guava           60.0
          Carrot          70.0
          Watermillion    90.0
Basket B  Apple           87.0
          Papaya          42.0
          Guava           63.0
          Carrot          30.0
Basket C  Apple            4.0
          Papaya          65.0
          Guava           34.0
Basket D  Apple           65.0
          Papaya          34.0
          Guava           21.0
Basket E  Apple           57.0
          Papaya          89.0
dtype: float64
