# Comparing DataFrames

In [4]:
import pandas as pd

dict = {
    "Fruits": ["Mango", "Banana", "Litchi", "Grapes"],
    "Price": [120,70,80,70],
    "Quantity": [5,12,3,2]
}

df1 = pd.DataFrame(dict)
print(df1)

print()

# Creating a copy of the DataFrame
df2 = df1.copy()
print(df2)

print()

# Modifying specific values in the copied DataFrame
df2.loc[0,"Price"] = 100
df2.loc[1,"Price"] = 60
df2.loc[2,"Price"] = 90

df2.loc[0,"Quantity"] = 6
df2.loc[1,"Quantity"] = 8
df2.loc[2,"Quantity"] = 2
df2.loc[3,"Quantity"] = 1

# Print the modified DataFrame
print("Modified DataFrame:")
print(df2)

   Fruits  Price  Quantity
0   Mango    120         5
1  Banana     70        12
2  Litchi     80         3
3  Grapes     70         2

   Fruits  Price  Quantity
0   Mango    120         5
1  Banana     70        12
2  Litchi     80         3
3  Grapes     70         2

Modified DataFrame:
   Fruits  Price  Quantity
0   Mango    100         6
1  Banana     60         8
2  Litchi     90         2
3  Grapes     70         1


# Compare DataFrames with Different `keep_equal` and `keep_shape` Arguments

In [7]:
# Default behavior: keep_equal=False, keep_shape=False (shows only differences)
print("Default comparison (differences only):")
print(df1.compare(df2))
print()

# keep_equal=False, keep_shape=True (shows all elements, NaNs for unchanged)
print("Show all elements, NaNs for unchanged (keep_equal=False, keep_shape=True):")
print(df1.compare(df2, keep_shape=True))
print()

# keep_equal=True, keep_shape=False (shows all elements, including unchanged)
print("Show all elements, including unchanged (keep_equal=True, keep_shape=False):")
print(df1.compare(df2, keep_equal=True))
print()

# keep_equal=True, keep_shape=True (shows all elements, NaNs for same, values for different)
print("Show all elements, NaNs for same, values for different (keep_equal=True, keep_shape=True):")
print(df1.compare(df2, keep_equal=True, keep_shape=True))
print()

# Align comparison by rows (align_axis=0)
print("Comparison aligned by rows (align_axis=0):")
print(df1.compare(df2, align_axis=0))
print()

Default comparison (differences only):
   Price        Quantity      
    self  other     self other
0  120.0  100.0        5     6
1   70.0   60.0       12     8
2   80.0   90.0        3     2
3    NaN    NaN        2     1

Show all elements, NaNs for unchanged (keep_equal=False, keep_shape=True):
  Fruits        Price        Quantity      
    self other   self  other     self other
0    NaN   NaN  120.0  100.0        5     6
1    NaN   NaN   70.0   60.0       12     8
2    NaN   NaN   80.0   90.0        3     2
3    NaN   NaN    NaN    NaN        2     1

Show all elements, including unchanged (keep_equal=True, keep_shape=False):
  Price       Quantity      
   self other     self other
0   120   100        5     6
1    70    60       12     8
2    80    90        3     2
3    70    70        2     1

Show all elements, NaNs for same, values for different (keep_equal=True, keep_shape=True):
   Fruits         Price       Quantity      
     self   other  self other     self other
0 