In [1]:
## 1 - Dataset Preparation
import os
# List all files in the current directory
os.listdir()

['.config',
 'sales_data2_homework.csv',
 'api_data1_homework.json',
 'inventory_data_homework.csv',
 'api_data2_homework.json',
 'sales_data1_homework.csv',
 'customer_data_homework.csv',
 'sample_data']

In [2]:
# 1 - Concatenation (Stacking) :	Combine sales data from two different CSV files.
import pandas as pd
df_sales1 = pd.read_csv('sales_data1_homework.csv')
df_sales2 = pd.read_csv('sales_data2_homework.csv')
combined_sales = pd.concat([df_sales1,df_sales2])
print("\nThe first 5 rows of the combined sales are:\n", combined_sales.head().to_string())


# 2 - Data Merging/Joining: Merge sales data with customer data on a common key (e.g., customer ID)
## Load customer_data_homework.csv
df_customer = pd.read_csv('customer_data_homework.csv')
## Use the pd.merge() function to merge the combined sales data with customer data on CustomerID.
customer_combined_sales = pd.merge(df_customer,combined_sales, on='CustomerID', how='inner')

##Print the first few rows of the merged dataset to verify the join.
print("\nThe first 5 rows of the merged dataset (customers and sales) are:\n", customer_combined_sales.head().to_string())

# 3- Database Joins using SQLite:
# Create an SQLite database and load sales and inventory data.
import sqlite3

##Load the inventory_data_homework.csv
df_invetory = pd.read_csv('inventory_data_homework.csv')
# Create a connection to an SQLite database using sqlite3.
conn = sqlite3.connect('Retail_Homework_data.db')

# Load the combined_sales_data and inventory_data.csv into the database as tables.
# Load combined sales data into SQLite
combined_sales.to_sql('sales', conn, index=False, if_exists='replace')
df_invetory.to_sql('inventory', conn, index=False, if_exists='replace')

# Perform an SQL join on ProductID to combine the sales and inventory data.
query = """
    SELECT sales.*, inventory.ProductName, inventory.StockLevel
    FROM sales
    JOIN inventory ON sales.ProductID = inventory.ProductID
"""

merged_db_data = pd.read_sql_query(query, conn)
# Print the first few rows of the joined dataset to verify the join.
print("\nThe first 5 rows of the joined dataset are:\n", merged_db_data.head().to_string())



# 4 - API Integration

import requests
##  Use a simulated API provided and combine the results with existing datasets.
#  Load the API response data from api_data1.json and api_data2.json.
# Simulate API calls (use local files in place of actual API calls)
api_data1 = pd.read_json('api_data1_homework.json')
api_data2 = pd.read_json('api_data2_homework.json')

#  Use pd.concat() to concatenate the API data.
combined_api_data = pd.concat([api_data1, api_data2])
#  Print the first few rows of the combined API dataset to verify.
print("\nThe first 5 rows of the combined API dataset are:\n", combined_api_data.head().to_string())




The first 5 rows of the combined sales are:
    OrderID  CustomerID  ProductID  Quantity   OrderDate
0     1011          11        111         4  11/08/2024
1     1012          12        112         3  12/08/2024
2     1013          13        113         2  13/08/2024
3     1014          14        114         6  14/08/2024
4     1015          15        115         1  15/08/2024

The first 5 rows of the merged dataset (customers and sales) are:
    CustomerID   CustomerName ContactName    Country  OrderID  ProductID  Quantity   OrderDate
0          11     Alice Blue       Alice        USA     1011        111         4  11/08/2024
1          12      Bob Green         Bob     Canada     1012        112         3  12/08/2024
2          13      Clara Red       Clara        USA     1013        113         2  13/08/2024
3          14  Daniel Yellow      Daniel         UK     1014        114         6  14/08/2024
4          15      Eve Brown         Eve  Australia     1015        115         

In [3]:
# Combine API data with existing datasets
print("\nCombining API data with existing datasets")


# Check if there's a common column to merge on
if 'combined_sales' in locals() and not combined_sales.empty:
  common_cols = set(combined_api_data.columns).intersection(set(combined_sales.columns))
  if common_cols:
    merge_key = list(common_cols)[0]  # Use first common column
    api_sales_merged = pd.merge(combined_sales, combined_api_data, on=merge_key, how='left')
    print(f"\nAPI data merged with sales data on '{merge_key}':")
    print(f"Merged dataset shape: {api_sales_merged.shape}")
    print(api_sales_merged.head().to_string())
  else:
    print("No common columns found between API data and sales data for merging")


Combining API data with existing datasets...

API data merged with sales data on 'ProductID':
Merged dataset shape: (10, 7)
   OrderID  CustomerID  ProductID  Quantity   OrderDate ProductName  Price
0     1011          11        111         4  11/08/2024    Widget F     27
1     1012          12        112         3  12/08/2024    Widget G     37
2     1013          13        113         2  13/08/2024    Widget H     47
3     1014          14        114         6  14/08/2024    Widget I     57
4     1015          15        115         1  15/08/2024    Widget J     67
