In [1]:
import pandas as pd
import numpy as np

In [2]:
df4 = pd.DataFrame({'Product Name':['Shirt','Boot','Bag'], 
              'Order Number':[45,56,64], 
              'Total Quantity':[10,5,9]}, 
              columns = ['Product Name', 'Order Number', 'Total Quantity'])

In [3]:
#Retrieving basic info about the Dataframe
# Return a summary about the dataframe

df4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Product Name    3 non-null      object
 1   Order Number    3 non-null      int64 
 2   Total Quantity  3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 200.0+ bytes


In [4]:
# Return dataframe columns

df4.columns

Index(['Product Name', 'Order Number', 'Total Quantity'], dtype='object')

In [5]:
# Return dataframe data

df4.keys

<bound method NDFrame.keys of   Product Name  Order Number  Total Quantity
0        Shirt            45              10
1         Boot            56               5
2          Bag            64               9>

In [6]:
# Return the head of the dataframe ....could make sense if you have long frame
# Choose how many rows you want in head()

df4.head(1)

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10


In [7]:
# Return the tail of the dataframe

df4.tail(1)

Unnamed: 0,Product Name,Order Number,Total Quantity
2,Bag,64,9


In [8]:
# Return NumPy array of the dataframe

df4.values

array([['Shirt', 45, 10],
       ['Boot', 56, 5],
       ['Bag', 64, 9]], dtype=object)

In [9]:
# Return the size or number of elements in a dataframe

df4.size

9

In [10]:
# Return the shape

df4.shape

(3, 3)

In [11]:
# Return the length of the dataframe/the number of rows in a dataframe

df4.shape[0]

3

In [14]:
# Return the length of the dataframe/the number of columns in a dataframe

df4.shape[1]

3

In [15]:
#Unique Values
# Return unique values in a given column 

df4['Product Name'].unique()

array(['Shirt', 'Boot', 'Bag'], dtype=object)

In [16]:
# Return a number of unique values
df4['Product Name'].nunique()

3

In [17]:
# Counting the occurence of each value in a column 

df4['Product Name'].value_counts()

Shirt    1
Boot     1
Bag      1
Name: Product Name, dtype: int64

In [18]:
#Applying a Function to Dataframe
# Double the quantity product

def double_quantity(x):
  return x * x

In [19]:
df4['Total Quantity'].apply(double_quantity)

0    100
1     25
2     81
Name: Total Quantity, dtype: int64

In [20]:
# You can also apply an anonymous function to a dataframe
# Squaring each value in dataframe

df5 = pd.DataFrame([[1,2], [4,5]], columns=['col1', 'col2'])

df5.applymap(lambda x: x**2)

Unnamed: 0,col1,col2
0,1,4
1,16,25


In [21]:
#Sorting values in dataframe
# Sort the df4 by the order number

df4.sort_values(['Order Number'])

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9


In [22]:
df4.sort_values(['Order Number'], ascending = False)

Unnamed: 0,Product Name,Order Number,Total Quantity
2,Bag,64,9
1,Boot,56,5
0,Shirt,45,10


In [23]:
#Aggregation Methods
df4

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9


In [24]:
# summary statistics

df4.describe()

Unnamed: 0,Order Number,Total Quantity
count,3.0,3.0
mean,55.0,8.0
std,9.539392,2.645751
min,45.0,5.0
25%,50.5,7.0
50%,56.0,9.0
75%,60.0,9.5
max,64.0,10.0


In [25]:
df4.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Order Number,3.0,55.0,9.539392,45.0,50.5,56.0,60.0,64.0
Total Quantity,3.0,8.0,2.645751,5.0,7.0,9.0,9.5,10.0


In [26]:
# Mode of the dataframe
# Mode is the most recurring values

df4['Total Quantity'].mode()

0     5
1     9
2    10
Name: Total Quantity, dtype: int64

In [27]:
# The maximum value

df4['Total Quantity'].max()

10

In [28]:
# The minimum value

df4['Total Quantity'].min()

5

In [29]:
# The mean

df4['Total Quantity'].mean()

8.0

In [30]:
# The median value in a dataframe

df4['Total Quantity'].median()

9.0

In [31]:
# Standard deviation

df4['Total Quantity'].std()

2.6457513110645907

In [32]:
# Variance 

df4['Total Quantity'].var()

7.0

In [33]:
# Sum of all values in a column

df4['Total Quantity'].sum()

24

In [34]:
# Product of all values in dataframe

df4['Total Quantity'].prod()

450

In [35]:
#Groupby
df4 = pd.DataFrame({'Product Name':['Shirt','Boot','Bag', 'Ankle', 'Pullover', 'Boot', 'Ankle', 'Tshirt', 'Shirt'], 
              'Order Number':[45,56,64, 34, 67, 56, 34, 89, 45], 
              'Total Quantity':[10,5,9, 11, 11, 8, 14, 23, 10]}, 
              columns = ['Product Name', 'Order Number', 'Total Quantity'])

In [36]:
df4

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9
3,Ankle,34,11
4,Pullover,67,11
5,Boot,56,8
6,Ankle,34,14
7,Tshirt,89,23
8,Shirt,45,10


In [37]:
#Let group the df by product name
df4.groupby('Product Name').mean()

Unnamed: 0_level_0,Order Number,Total Quantity
Product Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ankle,34.0,12.5
Bag,64.0,9.0
Boot,56.0,6.5
Pullover,67.0,11.0
Shirt,45.0,10.0
Tshirt,89.0,23.0


In [38]:
df4.groupby('Product Name').sum()

Unnamed: 0_level_0,Order Number,Total Quantity
Product Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ankle,68,25
Bag,64,9
Boot,112,13
Pullover,67,11
Shirt,90,20
Tshirt,89,23


In [39]:
df4.groupby('Product Name').min()

Unnamed: 0_level_0,Order Number,Total Quantity
Product Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ankle,34,11
Bag,64,9
Boot,56,5
Pullover,67,11
Shirt,45,10
Tshirt,89,23


In [40]:
df4.groupby('Product Name').max()

Unnamed: 0_level_0,Order Number,Total Quantity
Product Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ankle,34,14
Bag,64,9
Boot,56,8
Pullover,67,11
Shirt,45,10
Tshirt,89,23


In [41]:
df4.groupby(['Product Name', 'Order Number']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Quantity
Product Name,Order Number,Unnamed: 2_level_1
Ankle,34,14
Bag,64,9
Boot,56,8
Pullover,67,11
Shirt,45,10
Tshirt,89,23


In [42]:
df4.groupby(['Product Name', 'Order Number']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Quantity
Product Name,Order Number,Unnamed: 2_level_1
Ankle,34,25
Bag,64,9
Boot,56,13
Pullover,67,11
Shirt,45,20
Tshirt,89,23


In [43]:
df4.groupby('Product Name').aggregate(['min', 'max', 'sum'])

Unnamed: 0_level_0,Order Number,Order Number,Order Number,Total Quantity,Total Quantity,Total Quantity
Unnamed: 0_level_1,min,max,sum,min,max,sum
Product Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Ankle,34,34,68,11,14,25
Bag,64,64,64,9,9,9
Boot,56,56,112,5,8,13
Pullover,67,67,67,11,11,11
Shirt,45,45,90,10,10,20
Tshirt,89,89,89,23,23,23
