In [13]:
import pandas as pd
import numpy as np

In [50]:
df = pd.DataFrame({
    'Name': ["Abhay","anil","akash","Asim"],
    'Age': [25,np.nan,30,np.nan],
    'City': ['Malda','Kolkata','Hooghly',np.nan]
})
df

Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,,Kolkata
2,akash,30.0,Hooghly
3,Asim,,


In [51]:
## subsetting the data

df['Age']

0    25.0
1     NaN
2    30.0
3     NaN
Name: Age, dtype: float64

In [52]:
df[['Name','City']]

Unnamed: 0,Name,City
0,Abhay,Malda
1,anil,Kolkata
2,akash,Hooghly
3,Asim,


In [53]:
## THIS IS HOW WE FILL THE NULL VALUE with some random number

df['Age'].fillna(23)

0    25.0
1    23.0
2    30.0
3    23.0
Name: Age, dtype: float64

In [54]:
## THIS IS HOW WE FILL THE NULL VALUE WITH THE MEAN VALUE

df['Age'].fillna(df['Age'].mean())

0    25.0
1    27.5
2    30.0
3    27.5
Name: Age, dtype: float64

In [55]:
## if we call the entire table we see that the replacement of the null values are not effective
## so it means it is a temporary operation
df

Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,,Kolkata
2,akash,30.0,Hooghly
3,Asim,,


In [56]:
## so to make a permanent change to the table we need too perform the below code "inplace=True"

df['Age'].fillna(df['Age'].mean(),inplace=True)
df

Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,27.5,Kolkata
2,akash,30.0,Hooghly
3,Asim,27.5,


In [46]:
## also we can change the city

df['City'].fillna("Burdwan",inplace=True)
df

Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,27.5,Kolkata
2,akash,30.0,Hooghly
3,Asim,27.5,Burdwan


In [57]:
df = pd.DataFrame({
    'Name': ["Abhay","anil","akash","Asim"],
    'Age': [25,np.nan,30,np.nan],
    'City': ['Malda','Kolkata','Hooghly',np.nan]
})
df

Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,,Kolkata
2,akash,30.0,Hooghly
3,Asim,,


In [61]:
## this "bfill" method stands for backward fill which means it fill the NaN using the backword direction

df.fillna(method='bfill')

  df.fillna(method='bfill')


Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,30.0,Kolkata
2,akash,30.0,Hooghly
3,Asim,,


In [62]:
## this "ffill" method stands for forward fill which means it fill the NaN using the forward direction

df.fillna(method='ffill')

  df.fillna(method='ffill')


Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,25.0,Kolkata
2,akash,30.0,Hooghly
3,Asim,30.0,Hooghly


In [65]:
## this functuin is to check if there is null value present in the table

df.isna()

Unnamed: 0,Name,Age,City
0,False,False,False
1,False,True,False
2,False,False,False
3,False,True,True


In [67]:
## this function is used to get the sum of all the null values present in the table coloumn wise

df.isna().sum()

Name    0
Age     2
City    1
dtype: int64

In [68]:
## this function is used to get the number of rows and the coloumns

df.shape

(4, 3)

In [76]:
# it will throw the output of the numbers of rows
df.shape[0]

4

In [78]:
# in this way also we can get the individual outputs for the number of rows and coloumns
rows,coloumn = df.shape

In [79]:
rows

4

In [81]:
coloumn

3

In [82]:
## in this way we can get the output for the percentage of null values for each row

(df.isna().sum()/rows)*100

Name     0.0
Age     50.0
City    25.0
dtype: float64

In [88]:
## in this way we can "drop" the null values as we have specified "how=any"

df.dropna(how='any',axis=0)

Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
2,akash,30.0,Hooghly


In [87]:
# here we will get the full table because not all the values are null as we have specified "how=all"

df.dropna(how="all",axis=0)

Unnamed: 0,Name,Age,City
0,Abhay,25.0,Malda
1,anil,,Kolkata
2,akash,30.0,Hooghly
3,Asim,,


In [89]:
# "dropna" is at coloumn level

df.dropna(how='any',axis=1)

Unnamed: 0,Name
0,Abhay
1,anil
2,akash
3,Asim


In [96]:
df = pd.DataFrame({
    'Name': ["Abhay","anil","akash","Asim","Atul",np.nan],
    'Age': [25,np.nan,30,np.nan,np.nan,np.nan],
    'City': ['Malda','Kolkata','Hooghly',np.nan,np.nan,np.nan],
    'Remarks': [np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]
})
df

Unnamed: 0,Name,Age,City,Remarks
0,Abhay,25.0,Malda,
1,anil,,Kolkata,
2,akash,30.0,Hooghly,
3,Asim,,,
4,Atul,,,
5,,,,


In [95]:
## as every rows contains null value we will get the empty output
df.dropna(how='any',axis=0)

Unnamed: 0,Name,Age,City,Remarks


In [98]:
## as every coloumns contains null value we will get the empty output
df.dropna(how='any',axis=1)

0
1
2
3
4
5


In [99]:
df.dropna(how='all', axis=0)

Unnamed: 0,Name,Age,City,Remarks
0,Abhay,25.0,Malda,
1,anil,,Kolkata,
2,akash,30.0,Hooghly,
3,Asim,,,
4,Atul,,,


In [104]:
## here we will get 5 rows because one row contains all the null values
df.dropna(how='all',axis=0).shape

(5, 4)

In [105]:
## hee we will get  coloums because one coloumn contains all the null values
df.dropna(how='all',axis=1).shape

(6, 3)

# MEMORY OPTIMIZATION 

In [107]:
import pandas as pd

# Create the DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hannah', 'Ivy', 'Jack'],
    'Age': [25, 27, 30, 32, 40, 28, 31, 33, 26, 29],
    'City': ['New York', 'Los Angeles', 'San Francisco', 'Chicago', 'Houston',
             'Miami', 'Boston', 'Dallas', 'Austin', 'Seattle'],
    'Score': [85, 90, 95, 88, 91, 76, 87, 92, 80, 89],
    'Remarks': ['Good', 'Excellent', 'Good', 'Average', 'Very Good',
                'Fair', 'Good', 'Excellent', 'Average', 'Good']
}

df = pd.DataFrame(data)

# Display the DataFrame
df


Unnamed: 0,Name,Age,City,Score,Remarks
0,Alice,25,New York,85,Good
1,Bob,27,Los Angeles,90,Excellent
2,Charlie,30,San Francisco,95,Good
3,David,32,Chicago,88,Average
4,Eve,40,Houston,91,Very Good
5,Frank,28,Miami,76,Fair
6,Grace,31,Boston,87,Good
7,Hannah,33,Dallas,92,Excellent
8,Ivy,26,Austin,80,Average
9,Jack,29,Seattle,89,Good


In [110]:
df.memory_usage()

Index      132
Name        80
Age         80
City        80
Score       80
Remarks     80
dtype: int64

In [112]:
df.dtypes

Name       object
Age         int64
City       object
Score       int64
Remarks    object
dtype: object

In [114]:
df['Age']

0    25
1    27
2    30
3    32
4    40
5    28
6    31
7    33
8    26
9    29
Name: Age, dtype: int64

In [115]:
df['Age'].min()

np.int64(25)

In [116]:
df['Age'].max()

np.int64(40)

In [118]:
## in this way we can chane the datatype from "int64" to "int8"
df['Age']=df['Age'].astype(np.int8)
df.dtypes

Name       object
Age          int8
City       object
Score       int64
Remarks    object
dtype: object

In [121]:
## now we can see that the memory usage for 'Age' was 80 initially but after changing the datatype it has reduced to 10
df.memory_usage()

Index      132
Name        80
Age         10
City        80
Score       80
Remarks     80
dtype: int64

In [11]:
import pandas as pd
df = pd.read_excel("C:/Users/dell/Downloads/Sample - Superstore.xls")
df


Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.9600,2,0.00,41.9136
1,2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.9400,3,0.00,219.5820
2,3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.6200,2,0.00,6.8714
3,4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.0310
4,5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.3680,2,0.20,2.5164
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9989,9990,CA-2014-110422,2014-01-21,2014-01-23,Second Class,TB-21400,Tom Boeckenhauer,Consumer,United States,Miami,...,33180,South,FUR-FU-10001889,Furniture,Furnishings,Ultra Door Pull Handle,25.2480,3,0.20,4.1028
9990,9991,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.9600,2,0.00,15.6332
9991,9992,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.5760,2,0.20,19.3932
9992,9993,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.6000,4,0.00,13.3200


In [12]:
df.shape

(9994, 21)

In [13]:
df.memory_usage()

Index              132
Row ID           79952
Order ID         79952
Order Date       79952
Ship Date        79952
Ship Mode        79952
Customer ID      79952
Customer Name    79952
Segment          79952
Country          79952
City             79952
State            79952
Postal Code      79952
Region           79952
Product ID       79952
Category         79952
Sub-Category     79952
Product Name     79952
Sales            79952
Quantity         79952
Discount         79952
Profit           79952
dtype: int64

In [14]:
df.memory_usage().sum()

np.int64(1679124)

In [15]:
df.dtypes

Row ID                    int64
Order ID                 object
Order Date       datetime64[ns]
Ship Date        datetime64[ns]
Ship Mode                object
Customer ID              object
Customer Name            object
Segment                  object
Country                  object
City                     object
State                    object
Postal Code               int64
Region                   object
Product ID               object
Category                 object
Sub-Category             object
Product Name             object
Sales                   float64
Quantity                  int64
Discount                float64
Profit                  float64
dtype: object

In [17]:
import numpy as np
df['Row ID']= df['Row ID'].astype(np.int8)

In [18]:
df.dtypes

Row ID                     int8
Order ID                 object
Order Date       datetime64[ns]
Ship Date        datetime64[ns]
Ship Mode                object
Customer ID              object
Customer Name            object
Segment                  object
Country                  object
City                     object
State                    object
Postal Code               int64
Region                   object
Product ID               object
Category                 object
Sub-Category             object
Product Name             object
Sales                   float64
Quantity                  int64
Discount                float64
Profit                  float64
dtype: object

In [19]:
df.memory_usage().sum()

np.int64(1609166)

In [20]:
# as we get the output as byte and if we need to convert it to kilobyte
df.memory_usage().sum()/1024

np.float64(1571.451171875)

In [21]:
# as we get the output as byte and if we need to convert it to megabyte
df.memory_usage().sum()/(1024*1024)

np.float64(1.5346202850341797)

In [1]:
import pandas as pd

# Create the DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hannah', 'Ivy', 'Jack'],
    'Age': [25, 27, 30, 32, 40, 28, 31, 33, 26, 29],
    'City': ['New York', 'Los Angeles', 'San Francisco', 'Chicago', 'Houston',
             'Miami', 'Boston', 'Dallas', 'Austin', 'Seattle'],
    'Score': [85, 90, 95, 88, 91, 76, 87, 92, 80, 89],
    'Remarks': ['Good', 'Excellent', 'Good', 'Average', 'Very Good',
                'Fair', 'Good', 'Excellent', 'Average', 'Good']
}

df = pd.DataFrame(data)

# Display the DataFrame
df


Unnamed: 0,Name,Age,City,Score,Remarks
0,Alice,25,New York,85,Good
1,Bob,27,Los Angeles,90,Excellent
2,Charlie,30,San Francisco,95,Good
3,David,32,Chicago,88,Average
4,Eve,40,Houston,91,Very Good
5,Frank,28,Miami,76,Fair
6,Grace,31,Boston,87,Good
7,Hannah,33,Dallas,92,Excellent
8,Ivy,26,Austin,80,Average
9,Jack,29,Seattle,89,Good


In [6]:
df['Remarks']

0         Good
1    Excellent
2         Good
3      Average
4    Very Good
5         Fair
6         Good
7    Excellent
8      Average
9         Good
Name: Remarks, dtype: object

# LABEL ENCODING

ONE_HOT label Encoding

In [14]:
# this is "ONE_HOT label Encoding" (which means the conversion of text format to numerical format) | True=1,False=0

pd.get_dummies(df['Remarks'])

Unnamed: 0,Average,Excellent,Fair,Good,Very Good
0,False,False,False,True,False
1,False,True,False,False,False
2,False,False,False,True,False
3,True,False,False,False,False
4,False,False,False,False,True
5,False,False,True,False,False
6,False,False,False,True,False
7,False,True,False,False,False
8,True,False,False,False,False
9,False,False,False,True,False


In [10]:
# converting True or False to 1 and 0

pd.get_dummies(df['Remarks'])+0

Unnamed: 0,Average,Excellent,Fair,Good,Very Good
0,0,0,0,1,0
1,0,1,0,0,0
2,0,0,0,1,0
3,1,0,0,0,0
4,0,0,0,0,1
5,0,0,1,0,0
6,0,0,0,1,0
7,0,1,0,0,0
8,1,0,0,0,0
9,0,0,0,1,0


In [12]:
df['City']

0         New York
1      Los Angeles
2    San Francisco
3          Chicago
4          Houston
5            Miami
6           Boston
7           Dallas
8           Austin
9          Seattle
Name: City, dtype: object

In [13]:
pd.get_dummies(df['City'])

Unnamed: 0,Austin,Boston,Chicago,Dallas,Houston,Los Angeles,Miami,New York,San Francisco,Seattle
0,False,False,False,False,False,False,False,True,False,False
1,False,False,False,False,False,True,False,False,False,False
2,False,False,False,False,False,False,False,False,True,False
3,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,True,False,False,False,False,False
5,False,False,False,False,False,False,True,False,False,False
6,False,True,False,False,False,False,False,False,False,False
7,False,False,False,True,False,False,False,False,False,False
8,True,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,True


In [16]:
import pandas as pd
df = pd.DataFrame({
    'City':['New York','Los Angeles','Chicago','New York','Chicago'],
    'Gender': ['male','female','female','male','male']
})
df

Unnamed: 0,City,Gender
0,New York,male
1,Los Angeles,female
2,Chicago,female
3,New York,male
4,Chicago,male


In [17]:
## this function is used to create label
pd.factorize(df['City'])

(array([0, 1, 2, 0, 2]),
 Index(['New York', 'Los Angeles', 'Chicago'], dtype='object'))

In [19]:
## this is how we can create a new coloumn of label

df['City Label']=pd.factorize(df['City'])[0]
df

Unnamed: 0,City,Gender,City Label
0,New York,male,0
1,Los Angeles,female,1
2,Chicago,female,2
3,New York,male,0
4,Chicago,male,2


In [2]:
import pandas as pd
df = pd.read_excel("C:/Users/dell/Downloads/Sample - Superstore.xls")
df


Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.9600,2,0.00,41.9136
1,2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.9400,3,0.00,219.5820
2,3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.6200,2,0.00,6.8714
3,4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.0310
4,5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.3680,2,0.20,2.5164
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9989,9990,CA-2014-110422,2014-01-21,2014-01-23,Second Class,TB-21400,Tom Boeckenhauer,Consumer,United States,Miami,...,33180,South,FUR-FU-10001889,Furniture,Furnishings,Ultra Door Pull Handle,25.2480,3,0.20,4.1028
9990,9991,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.9600,2,0.00,15.6332
9991,9992,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.5760,2,0.20,19.3932
9992,9993,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.6000,4,0.00,13.3200


In [4]:
df.columns

Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
       'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State',
       'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category',
       'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit'],
      dtype='object')

In [5]:
df['Sales'].min()

np.float64(0.44399999999999995)

In [6]:
df['Sales'].max()

np.float64(22638.48)

# OUTLIERS

In [8]:
import numpy as np
np.quantile(df['Sales'],0.25)

np.float64(17.28)

In [9]:
np.quantile(df['Sales'],0.50),np.median(df['Sales'])

(np.float64(54.489999999999995), np.float64(54.489999999999995))

In [10]:
np.quantile(df['Sales'],0.75)

np.float64(209.94)

In [12]:
iqr = np.quantile(df['Sales'],0.75)-np.quantile(df['Sales'],0.25)
iqr

np.float64(192.66)

In [18]:
import numpy as np
np.quantile(df['Sales'],0.75)+1.5*(iqr)

np.float64(498.93)

In [19]:
np.quantile(df['Sales'],0.25)-1.5*(iqr)

np.float64(-271.71000000000004)

In [24]:
(df['Sales'][df['Sales']>np.quantile(df['Sales'],0.75)+1.5*(iqr)].shape[0]/df.shape[0])*100

11.677006203722232

In [25]:
## to get the unique values

df['Region'].unique()

array(['South', 'West', 'Central', 'East'], dtype=object)

In [27]:
df['Region'].value_counts()

Region
West       3203
East       2848
Central    2323
South      1620
Name: count, dtype: int64

In [28]:
(df['Region'].value_counts(normalize=True))*100

Region
West       32.049230
East       28.497098
Central    23.243946
South      16.209726
Name: proportion, dtype: float64

In [30]:
# to get the first 5 rows
df.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [31]:
# to get the last 5 rows
df.tail()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
9989,9990,CA-2014-110422,2014-01-21,2014-01-23,Second Class,TB-21400,Tom Boeckenhauer,Consumer,United States,Miami,...,33180,South,FUR-FU-10001889,Furniture,Furnishings,Ultra Door Pull Handle,25.248,3,0.2,4.1028
9990,9991,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.96,2,0.0,15.6332
9991,9992,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.576,2,0.2,19.3932
9992,9993,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.6,4,0.0,13.32
9993,9994,CA-2017-119914,2017-05-04,2017-05-09,Second Class,CC-12220,Chris Cortes,Consumer,United States,Westminster,...,92683,West,OFF-AP-10002684,Office Supplies,Appliances,"Acco 7-Outlet Masterpiece Power Center, Wihtou...",243.16,2,0.0,72.948


In [32]:
# to get the last 2 rows
df.tail(2)

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
9992,9993,CA-2017-121258,2017-02-26,2017-03-03,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.6,4,0.0,13.32
9993,9994,CA-2017-119914,2017-05-04,2017-05-09,Second Class,CC-12220,Chris Cortes,Consumer,United States,Westminster,...,92683,West,OFF-AP-10002684,Office Supplies,Appliances,"Acco 7-Outlet Masterpiece Power Center, Wihtou...",243.16,2,0.0,72.948
