In [1]:
import pandas as pd
import numpy as np

In [2]:
# 1️⃣ Create Two DataFrames: sales_region_A and sales_region_B

sales_region_A = pd.DataFrame({
    'product_id': [101, 102, 103, 104],
    'product_name': ['Laptop', 'Mouse', 'Keyboard', 'Monitor'],
    'sales': [1200, 150, np.nan, 300]
})

sales_region_B = pd.DataFrame({
    'product_code': [105, 106, 107, np.nan],
    'product_name': ['Printer', 'Tablet', 'Speaker', 'Camera'],
    'sales': [400, 800, 350, 500]
})

print("Sales Region A:")
print(sales_region_A, "\n")

print("Sales Region B:")
print(sales_region_B, "\n")

Sales Region A:
   product_id product_name   sales
0         101       Laptop  1200.0
1         102        Mouse   150.0
2         103     Keyboard     NaN
3         104      Monitor   300.0 

Sales Region B:
   product_code product_name  sales
0         105.0      Printer    400
1         106.0       Tablet    800
2         107.0      Speaker    350
3           NaN       Camera    500 



In [3]:
# Rename column in region B to match region A
sales_region_B = sales_region_B.rename(columns={'product_code': 'product_id'})

# Combine using pd.concat()
combined_sales = pd.concat([sales_region_A, sales_region_B], ignore_index=True)

print("Combined DataFrame:")
print(combined_sales, "\n")

Combined DataFrame:
   product_id product_name   sales
0       101.0       Laptop  1200.0
1       102.0        Mouse   150.0
2       103.0     Keyboard     NaN
3       104.0      Monitor   300.0
4       105.0      Printer   400.0
5       106.0       Tablet   800.0
6       107.0      Speaker   350.0
7         NaN       Camera   500.0 



In [4]:
# Check for missing values
print("Missing Values Summary:")
print(combined_sales.isnull().sum(), "\n")

Missing Values Summary:
product_id      1
product_name    0
sales           1
dtype: int64 



In [5]:
# Fill missing sales values with average sales
avg_sales = combined_sales['sales'].mean()
combined_sales['sales'] = combined_sales['sales'].fillna(avg_sales)

In [7]:
# Drop rows where product_id is missing (cannot be imputed)
combined_sales = combined_sales.dropna(subset=['product_id'])

print("Cleaned DataFrame:")
print(combined_sales, "\n")

Cleaned DataFrame:
   product_id product_name        sales
0       101.0       Laptop  1200.000000
1       102.0        Mouse   150.000000
2       103.0     Keyboard   528.571429
3       104.0      Monitor   300.000000
4       105.0      Printer   400.000000
5       106.0       Tablet   800.000000
6       107.0      Speaker   350.000000 



In [9]:
total_sales_per_product = combined_sales.groupby(['product_id', 'product_name'])['sales'].sum().reset_index()

print("Total Sales per Product:")
print(total_sales_per_product)

Total Sales per Product:
   product_id product_name        sales
0       101.0       Laptop  1200.000000
1       102.0        Mouse   150.000000
2       103.0     Keyboard   528.571429
3       104.0      Monitor   300.000000
4       105.0      Printer   400.000000
5       106.0       Tablet   800.000000
6       107.0      Speaker   350.000000
