In [3]:
## 1 - Dataset Preparation
import os
# List all files in the current directory
os.listdir()

['.config',
 'inventory_data.csv',
 'customer_data.csv',
 'api_data1.json',
 '[Google Colab] Class Demo 9.ipynb',
 'api_data2.json',
 'sales_data1.csv',
 'sales_data2.csv',
 'sample_data']

In [8]:
# 2 - Concatenation (Stacking)
import pandas as pd
df_sales1 = pd.read_csv('sales_data1.csv')
df_sales2 = pd.read_csv('sales_data2.csv')
combined_sales = pd.concat([df_sales1,df_sales2])
print("\nThe first 5 rows of the combined sales are:\n", combined_sales.head().to_string())


# 3 - Data Merging/Joining
## Load customer_data.csv
df_customer = pd.read_csv('customer_data.csv')
## Use the pd.merge() function to merge the combined sales data with customer data on CustomerID.
customer_combined_sales = pd.merge(df_customer,combined_sales, on='CustomerID', how='inner')

##Print the first few rows of the merged dataset to verify the join.
print("\nThe first 5 rows of the merged dataset (customers and sales) are:\n", customer_combined_sales.head().to_string())

# 4- Database Joins using SQLite
# Create an SQLite database and load sales and inventory data. Perform SQL joins to integrate these datasets.
import sqlite3

df_invetory = pd.read_csv('inventory_data.csv')
# Create a connection to an SQLite database using sqlite3.
conn = sqlite3.connect('retail_data.db')

# Load the combined_sales_data and inventory_data.csv into the database as tables.
# Load combined sales data into SQLite
combined_sales.to_sql('sales', conn, index=False, if_exists='replace')
df_invetory.to_sql('inventory', conn, index=False, if_exists='replace')

# Perform an SQL join on ProductID to combine the sales and inventory data.
query = """
    SELECT sales.*, inventory.ProductName, inventory.StockLevel
    FROM sales
    JOIN inventory ON sales.ProductID = inventory.ProductID
"""

merged_db_data = pd.read_sql_query(query, conn)
# Print the first few rows of the joined dataset to verify the join.
print("\nThe first 5 rows of the joined dataset are:\n", merged_db_data.head().to_string())



# 5 - API Integration

import requests
##  Use a simulated API provided and combine the results with existing datasets.
#  Load the API response data from api_data1.json and api_data2.json (assuming these are simulated responses).
api_data1 = pd.read_json('api_data1.json')
api_data2 = pd.read_json('api_data2.json')

#  Use pd.concat() to concatenate the API data.
combined_api_data = pd.concat([api_data1, api_data2])
#  Print the first few rows of the combined API dataset to verify.
print("\nThe first 5 rows of the combined API dataset are:\n", combined_api_data.head().to_string())




The first 5 rows of the combined sales are:
    OrderID  CustomerID  ProductID  Quantity   OrderDate
0     1001           1        101         5  01/08/2024
1     1002           2        103         2  02/08/2024
2     1003           1        102         1  03/08/2024
0     1004           3        104         4  04/08/2024
1     1005           4        105         3  05/08/2024

The first 5 rows of the merged dataset (customers and sales) are:
    CustomerID   CustomerName ContactName Country  OrderID  ProductID  Quantity   OrderDate
0           1       John Doe        John     USA     1001        101         5  01/08/2024
1           1       John Doe        John     USA     1003        102         1  03/08/2024
2           2     Jane Smith        Jane  Canada     1002        103         2  02/08/2024
3           3    Emily Davis       Emily     USA     1004        104         4  04/08/2024
4           4  Michael Brown     Michael      UK     1005        105         3  05/08/2024

The