In [1]:
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.max_rows', 10)  # For pandas DataFrames
pd.set_option('display.max_columns', 10)  # For pandas DataFrames


In [3]:
# df = pd.read_csv('customer_purchases.csv')
df = pd.read_csv('customer_purchases_10000.csv')
df

Unnamed: 0,customer_id,age,gender,country,city,item_purchase_date,item_price,item_category,item_rating,item_qty
0,1,54,Female,Australia,Sydney,2024-06-12,2352.60,Home & Kitchen,4.3,3
1,1,54,Female,Australia,Sydney,2024-08-17,75.10,Home & Kitchen,4.8,3
2,1,54,Female,Australia,Sydney,2024-03-20,1373.37,Home & Kitchen,4.7,3
3,1,54,Female,Australia,Sydney,2024-10-05,1462.92,Home & Kitchen,4.1,3
4,1,54,Female,Australia,Sydney,2024-02-16,4983.45,Home & Kitchen,2.0,2
...,...,...,...,...,...,...,...,...,...,...
9995,100,29,Male,Australia,Toronto,2024-10-11,2804.46,Sporting Goods,2.8,2
9996,100,29,Male,Australia,Toronto,2024-06-17,4940.21,Sporting Goods,4.4,3
9997,100,29,Male,Australia,Toronto,2024-03-07,3218.39,Sporting Goods,4.3,1
9998,100,29,Male,Australia,Toronto,2023-12-25,2812.24,Sporting Goods,4.8,4


In [4]:
item_categories = {
0:'Beauty' ,
1:'Electronics' ,
2:'Clothing' ,
3:'Home & Kitchen' ,
4:'Sporting Goods' ,
5:'Toys & Games' ,
6:'Books' ,
7:'Automotive' ,
8:'Jewelry' ,
9:'Furniture' , 
}


In [5]:
df['item_category'] = df['item_category'].map({v:k for k,v in item_categories.items()})
df['gender'] = df['gender'].map({'Male':0, "Female":1})
df = df.drop(['country', 'city', 'item_purchase_date'], axis=1)

In [6]:
df

Unnamed: 0,customer_id,age,gender,item_price,item_category,item_rating,item_qty
0,1,54,1,2352.60,3,4.3,3
1,1,54,1,75.10,3,4.8,3
2,1,54,1,1373.37,3,4.7,3
3,1,54,1,1462.92,3,4.1,3
4,1,54,1,4983.45,3,2.0,2
...,...,...,...,...,...,...,...
9995,100,29,0,2804.46,4,2.8,2
9996,100,29,0,4940.21,4,4.4,3
9997,100,29,0,3218.39,4,4.3,1
9998,100,29,0,2812.24,4,4.8,4


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   customer_id    10000 non-null  int64  
 1   age            10000 non-null  int64  
 2   gender         10000 non-null  int64  
 3   item_price     10000 non-null  float64
 4   item_category  10000 non-null  int64  
 5   item_rating    10000 non-null  float64
 6   item_qty       10000 non-null  int64  
dtypes: float64(2), int64(5)
memory usage: 547.0 KB


In [14]:
class MLP(nn.Module):
    def __init__(self,inp_size, h1, out_size):
        super(MLP,self).__init__()
        self.lay1 = nn.Linear(inp_size, h1)
        self.lay2 = nn.Linear(h1, out_size)
        
    def forward(self, X):
        out = self.lay1(X)
        out = self.lay2(out)
        return out
    
    def train(self, X_train, y_train, optimiser, loss_category, epochs=10):
        for i in range(epochs):
            optimiser.zero_grad()
            loss_category = nn.MSELoss()
            
            y_pred = self.forward(X_train)
            loss = loss_category(y_pred, y_train)
            loss.backward()
            optimiser.step()
            print(loss)

In [15]:
model = MLP(10,10,1)

In [16]:
optimiser = torch.optim.SGD(model.parameters(), lr=0.1)
loss_category = nn.MSELoss()

In [19]:
model.train(torch.randn(10), torch.randn(1), optimiser, loss_category)

tensor(1.1920, grad_fn=<MseLossBackward0>)
tensor(0.0115, grad_fn=<MseLossBackward0>)
tensor(0.0002, grad_fn=<MseLossBackward0>)
tensor(5.9820e-06, grad_fn=<MseLossBackward0>)
tensor(1.6897e-07, grad_fn=<MseLossBackward0>)
tensor(4.7929e-09, grad_fn=<MseLossBackward0>)
tensor(1.3718e-10, grad_fn=<MseLossBackward0>)
tensor(3.9870e-12, grad_fn=<MseLossBackward0>)
tensor(1.0747e-13, grad_fn=<MseLossBackward0>)
tensor(7.9936e-15, grad_fn=<MseLossBackward0>)
