In [None]:
## Market Basket Analysis of Instacart Data
## Convert from R to Python

# Loading Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Loading Data
products = pd.read_csv("Data/products.csv")
orders = pd.read_csv("Data/orders.csv")
prior = pd.read_csv("Data/order_products__prior.csv")
aisles = pd.read_csv("Data/aisles.csv")
departments = pd.read_csv("Data/departments.csv")
Ordertrain = pd.read_csv("Data/order_products__train.csv")

# Viewing Data
print(orders.head(5))
print(products.head(5))
print(aisles.head(5))
print(departments.head(5))
print(prior.head(5))

# Converting the data to the required formats for the orders, products, aisles and departments data set to factors and numeric values.

# Final data set types of each of the data set is as below.
print(pd.DataFrame({"orders": orders.dtypes}))
print(pd.DataFrame({"aisles": aisles.dtypes}))
print(pd.DataFrame({"departments": departments.dtypes}))
print(pd.DataFrame({"prior": prior.dtypes}))
print(pd.DataFrame({"products": products.dtypes}))

# Merging the dataset of products, aisles and department data sets to view the product offerings.
ProductsNAisles = pd.merge(products, aisles, on = "aisle_id")
ProductsNAislesNDepartments = pd.merge(ProductsNAisles, departments, on = "department_id")
print(ProductsNAislesNDepartments.head(5))
print("After merging the data below is the results. The Merged products, Aisles and Departments data has",ProductsNAislesNDepartments.shape[0],"Rows and",ProductsNAislesNDepartments.shape[1],"Columns")

# Top 15 and Bottom 15 Aisle by Variety of Product Offering
Number_of_Product_each_Aisle = ProductsNAislesNDepartments.groupby("aisle")["product_id"].count().reset_index(name="Number_of_Products")
Number_of_Product_each_Aisle = Number_of_Product_each_Aisle.sort_values(by="Number_of_Products",ascending=False)

Top_15 = Number_of_Product_each_Aisle.head(15)

plt.figure(figsize=(10,6))
plt.barh(Top_15['aisle'], Top_15['Number_of_Products'])
plt.gca().invert_yaxis()
plt.title("Top 15 Aisle by Variety of Product Offering")
plt.xlabel("Number of Products")
plt.ylabel("Aisle")
for i, v in enumerate(Top_15['Number_of_Products']):
    plt.text(v, i, " "+str(round(v,0)), color='black', va="center")
plt.show()

Bottom_15 = Number_of_Product_each_Aisle.tail(15)

plt.figure(figsize=(10,6))
plt.barh(Bottom_15['aisle'], Bottom_15['Number_of_Products'])
plt.gca().invert_yaxis()
plt.title("Bottom 15 Aisle by Variety of Product Offering")
plt.xlabel("Number of Products")
plt.ylabel("Aisle")
for i, v in enumerate(Bottom_15['Number_of_Products']):
    plt.text(v, i, " "+str(round(v,0)), color='black', va="center")
plt.show()

#


In [None]:
# Import the necessary libraries
## Pytorch Version Template
import torch
import numpy as np
import pandas as pd

# Load the dataset
data = pd.read_csv('instacart_data.csv')

# Preprocess the data
# ...

# Prepare the data for training
X = torch.tensor(data.drop('target', axis=1).values, dtype=torch.float32)
y = torch.tensor(data['target'].values, dtype=torch.float32)
dataset = torch.utils.data.TensorDataset(X, y)
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [80000, 10000, 10000])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

# Define the model
class MyModel(torch.nn.Module):
    def __init__(self, input_dim):
        super(MyModel, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, 64)
        self.fc2 = torch.nn.Linear(64, 32)
        self.fc3 = torch.nn.Linear(32, 1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

model = MyModel(X.shape[1])

# Define the loss function and optimizer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
for epoch in range(100):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        y_val_pred = model(val_loader.dataset.tensors[0])
        val_loss = criterion(y_val_pred, val_loader.dataset.tensors[1].unsqueeze(1))
    print(f'Epoch {epoch+1}: Train loss = {loss:.4f}, Val loss = {val_loss:.4f}')

# Evaluate the model
with torch.no_grad():
    y_test_pred = model(test_loader.dataset.tensors[0])
    test_loss = criterion(y_test_pred, test_loader.dataset.tensors[1].unsqueeze(1))
    y_test_pred = y_test_pred.numpy().flatten()
    y_test_true = test_loader.dataset.tensors[1].numpy()
    accuracy = ((y_test_pred > 0.5) == y_test_true).mean()
print(f'Test loss = {test_loss:.4f}, Accuracy = {accuracy:.4f}')

# Save the model
torch.save(model.state_dict(), 'my_model.pth')
