In [1]:
import pandas as pd
import numpy as np

# concat() — stack DataFrames row-wise or column-wise
    - concat() stacks DataFrames along an axis.
    - Use axis=0 to append rows, axis=1 to join columns side-by-side.
    - It preserves indexes by default; use ignore_index=True to reset.ta
    - syntax : pd.concat([df1, df2, ...], axis=0 or 1, ignore_index=True or False)
    - how to use :
        (1) Row-wise concatenation (axis=0):
            - Stacks DataFrames vertically, adding rows.
            - Useful for combining datasets with the same columns.
        (2) Column-wise concatenation (axis=1):
            - Joins DataFrames side-by-side, adding columns.
            - Useful for merging datasets with the same index.
        (3) Ignore  index:
            - Use ignore_index=True to reset the index in the result.
            - This is helpful when the original indexes are not meaningful.

In [2]:
jan = pd.DataFrame({
    'OrderID': [101, 102],
    'CustomerID': [1, 2],
    'Product': ['Laptop', 'Phone'],
    'Total': [1200, 800]
})

feb = pd.DataFrame({
    'OrderID': [103, 104],
    'CustomerID': [2, 3],
    'Product': ['Tablet', 'Mouse'],
    'Total': [450, 50]
})

In [3]:
# Row-wise append (axis=0)
pd.concat([jan, feb])

Unnamed: 0,OrderID,CustomerID,Product,Total
0,101,1,Laptop,1200
1,102,2,Phone,800
0,103,2,Tablet,450
1,104,3,Mouse,50


In [4]:
# Often you want a clean index:
pd.concat([jan, feb], ignore_index=True)

Unnamed: 0,OrderID,CustomerID,Product,Total
0,101,1,Laptop,1200
1,102,2,Phone,800
2,103,2,Tablet,450
3,104,3,Mouse,50


In [5]:
# Column-wise combine (axis=1)
# Create two frames with same index (say customer info and metrics):

info = pd.DataFrame({
    'CustomerID': [1,2,3],
    'Name': ['Alice','Bob','Charlie']
}).set_index('CustomerID')

metrics = pd.DataFrame({
    'CustomerID': [1,2,4],
    'TotalSpent': [1200, 750, 300]
}).set_index('CustomerID')

pd.concat([info, metrics], axis=1)
# Explanation: pandas aligns on the index. Missing values appear where no match exists.

Unnamed: 0_level_0,Name,TotalSpent
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Alice,1200.0
2,Bob,750.0
3,Charlie,
4,,300.0


In [6]:


# 1. Sales_Q1 DataFrame (Jan-Mar)
sales_q1_data = {
    'OrderID': range(1001, 1011),
    'Product': np.random.choice(['Laptop', 'Mouse', 'Keyboard', 'Monitor', 'Webcam'], 10),
    'Quantity': np.random.randint(1, 10, 10),
    'Revenue': np.random.randint(5000, 100000, 10)
}
sales_q1_df = pd.DataFrame(sales_q1_data)

# 2. Sales_Q2 DataFrame (Apr-Jun)
sales_q2_data = {
    'OrderID': range(1011, 1021),
    'Product': np.random.choice(['Laptop', 'Mouse', 'Keyboard', 'Monitor', 'Webcam'], 10),
    'Quantity': np.random.randint(1, 10, 10),
    'Revenue': np.random.randint(5000, 100000, 10)
}
sales_q2_df = pd.DataFrame(sales_q2_data)

# 3. Customer_Info DataFrame
customer_info_data = {
    'CustomerID': [f'C{i}' for i in range(201, 211)],
    'CustomerName': ['Aarav Patel', 'Priya Sharma', 'Rohan Mehta', 'Sneha Desai', 'Vikram Singh', 'Anjali Gupta', 'Mohit Kumar', 'Isha Shah', 'Karan Joshi', 'Diya Verma'],
    'Region': np.random.choice(['West', 'North', 'South', 'East'], 10)
}
customer_info_df = pd.DataFrame(customer_info_data)

##  Practice Question
    Easy: Combine the Q1 and Q2 sales data to create a single 'total_sales' DataFrame.

    Medium: Join the customer information next to the Q1 sales data. Combine both DataFrames horizontally (column-wise).

    Medium: Combine the Q1 and Q2 sales data, but after joining, reset the index to start from 0, making it continuous.

    Hard: Combine the Q1 and Q2 sales data and create a new index level that identifies whether the data is from 'Q1' or 'Q2'.

    Hard: Combine the Sales_Q1 data with Sales_Returns data, noting that both DataFrames have different columns (schemas).

    Hard: First, combine the Q1 and Q2 sales data. Then, from this combined data, find only those sales where the 'Revenue' is greater than 75,000.

In [22]:
# 1. Combine the Q1 and Q2 sales data to create a single 'total_sales' DataFrame.
pd.concat([sales_q1_df, sales_q2_df])

Unnamed: 0,OrderID,Product,Quantity,Revenue
0,1001,Laptop,1,24699
1,1002,Webcam,9,43474
2,1003,Mouse,8,93412
3,1004,Webcam,5,43713
4,1005,Mouse,3,41721
5,1006,Webcam,9,66233
6,1007,Keyboard,5,45341
7,1008,Mouse,5,63960
8,1009,Webcam,2,68145
9,1010,Mouse,9,47710


In [None]:
# 2. Join the customer information next to the Q1 sales data. Combine both DataFrames horizontally (column-wise).
pd.concat([sales_q1_df,customer_info_df],axis=1)

In [20]:
# 3. Combine the Q1 and Q2 sales data, but after joining, reset the index to start from 0, making it continuous.
pd.concat([sales_q1_df,sales_q2_df],axis=0,ignore_index=True)

Unnamed: 0,OrderID,Product,Quantity,Revenue
0,1001,Laptop,1,24699
1,1002,Webcam,9,43474
2,1003,Mouse,8,93412
3,1004,Webcam,5,43713
4,1005,Mouse,3,41721
5,1006,Webcam,9,66233
6,1007,Keyboard,5,45341
7,1008,Mouse,5,63960
8,1009,Webcam,2,68145
9,1010,Mouse,9,47710


In [21]:
# 4. Combine the Q1 and Q2 sales data and create a new index level that identifies whether the data is from 'Q1' or 'Q2'.
pd.concat([sales_q1_df, sales_q2_df], keys=['Q1', 'Q2'])

Unnamed: 0,Unnamed: 1,OrderID,Product,Quantity,Revenue
Q1,0,1001,Laptop,1,24699
Q1,1,1002,Webcam,9,43474
Q1,2,1003,Mouse,8,93412
Q1,3,1004,Webcam,5,43713
Q1,4,1005,Mouse,3,41721
Q1,5,1006,Webcam,9,66233
Q1,6,1007,Keyboard,5,45341
Q1,7,1008,Mouse,5,63960
Q1,8,1009,Webcam,2,68145
Q1,9,1010,Mouse,9,47710


In [17]:
# 5. Combine the Sales_Q1 data with Sales_Returns data, noting that both DataFrames have different columns (schemas).
returns_data = {
    'OrderID': [1001, 1004],
    'Product': ['Laptop', 'Laptop'],
    'Return_Reason': ['Defective', 'Wrong Model']
}
returns_df = pd.DataFrame(returns_data)

pd.concat([sales_q1_df,returns_df] , axis=1)

Unnamed: 0,OrderID,Product,Quantity,Revenue,OrderID.1,Product.1,Return_Reason
0,1001,Laptop,1,24699,1001.0,Laptop,Defective
1,1002,Webcam,9,43474,1004.0,Laptop,Wrong Model
2,1003,Mouse,8,93412,,,
3,1004,Webcam,5,43713,,,
4,1005,Mouse,3,41721,,,
5,1006,Webcam,9,66233,,,
6,1007,Keyboard,5,45341,,,
7,1008,Mouse,5,63960,,,
8,1009,Webcam,2,68145,,,
9,1010,Mouse,9,47710,,,


In [13]:
# 6. First, combine the Q1 and Q2 sales data. Then, from this combined data, find only those sales where the 'Revenue' is greater than 75,000.
final_result=pd.concat([sales_q1_df,sales_q2_df],axis=0)
print(final_result[final_result['Revenue']>75000])

   OrderID   Product  Quantity  Revenue
2     1003     Mouse         8    93412
1     1012     Mouse         4    94788
2     1013    Laptop         5    75370
5     1016  Keyboard         2    83269
8     1019  Keyboard         1    84889
