<a href="https://colab.research.google.com/github/Rezarsa82/online_retail_matrix_monitoring/blob/main/OnlineRetail.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import zipfile
import requests
from io import BytesIO

url = "https://archive.ics.uci.edu/static/public/352/online+retail.zip"

r = requests.get(url)
zip_file = zipfile.ZipFile(BytesIO(r.content))

print(zip_file.namelist())  # ['Online Retail.xlsx']

excel_file = zip_file.open("Online Retail.xlsx")
df = pd.read_excel(excel_file)

print(df.columns)
print(df.head())

['Online Retail.xlsx']
Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

          InvoiceDate  UnitPrice  CustomerID         Country  
0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  
1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  
3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  


In [None]:
print(df.isna().sum())
df = df.dropna()
print(df.isna().sum())

InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64
InvoiceNo      0
StockCode      0
Description    0
Quantity       0
InvoiceDate    0
UnitPrice      0
CustomerID     0
Country        0
dtype: int64


In [None]:
df = df[df["Quantity"] > 0]
print((df["Quantity"] <= 0).sum())
num_cancelled = df[df["InvoiceNo"].astype(str).str.startswith("C")].shape[0]
print("Cannelled:", num_cancelled)
df_sorted = df.sort_values('InvoiceDate')

0
Cannelled: 0


In [None]:
item_transaction_matrix = df_sorted.groupby(['InvoiceNo', 'StockCode'])['Quantity'].sum().unstack().fillna(0)

item_transaction_matrix[item_transaction_matrix > 0] = 1

print(item_transaction_matrix.head())

StockCode  10002  10080  10120  10125  10133  10135  11001  15030  15034  \
InvoiceNo                                                                  
536365       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536366       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536367       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536368       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536369       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   

StockCode  15036  ...  90214V  90214W  90214Y  90214Z  BANK CHARGES   C2  DOT  \
InvoiceNo         ...                                                           
536365       0.0  ...     0.0     0.0     0.0     0.0           0.0  0.0  0.0   
536366       0.0  ...     0.0     0.0     0.0     0.0           0.0  0.0  0.0   
536367       0.0  ...     0.0     0.0     0.0     0.0           0.0  0.0  0.0   
536368       0.0  ...     0.0     0.0     0.0     0.0         

In [None]:
import random

random_invoices = random.sample(list(item_transaction_matrix.index), 5)

for invoice in random_invoices:
    purchased_items = item_transaction_matrix.loc[invoice][item_transaction_matrix.loc[invoice] == 1]
    print(f"ّInvoice {invoice}: {len(purchased_items)} Products")
    if len(purchased_items) > 0:
        print(f"   Products: {list(purchased_items.index[:5])}")

ّInvoice 574534: 16 Products
   Products: [16237, 20685, 21498, 21499, 21500]
ّInvoice 551234: 3 Products
   Products: [21897, 21898, 22459]
ّInvoice 561909: 8 Products
   Products: [15036, 20725, 21876, 21877, 21914]
ّInvoice 556520: 19 Products
   Products: [21381, 21468, 22197, 22222, 22551]
ّInvoice 539041: 25 Products
   Products: [17003, 20725, 21165, 21166, 21175]


In [None]:
batch_size = 500
batches = []

for i in range(0, len(item_transaction_matrix), batch_size):
    batch_matrix = item_transaction_matrix.iloc[i : i + batch_size]
    batches.append(batch_matrix)

print(f"Total batches: {len(batches)}")
print(f"First batch shape: {batches[0].shape}")
print(f"Last batch shape: {batches[-1].shape}")

Total batches: 38
First batch shape: (500, 3665)
Last batch shape: (36, 3665)


In [1]:
import numpy as np

class FrequentDirections:
    def __init__(self, ell):
        self.ell = ell
        self.d = None
        self.sketch = None
        self.next_row = 0

    def fit(self, data_stream):
        for row in data_stream:
            self._update(row)

    def _is_full(self):
        return self.next_row >= self.ell

    def _update(self, row):
        row = np.array(row)

        if self.d is None:
            self.d = len(row)
            self.sketch = np.zeros((self.ell, self.d))
            self.next_row = 0

        if self._is_full():
            self._compress()

        self.sketch[self.next_row] = row
        self.next_row += 1

    def _compress(self):
        U, sigma, Vt = np.linalg.svd(self.sketch, full_matrices=False)

        delta_sq = sigma[self.ell // 2 - 1] ** 2   # ← اصلاح کلیدی

        new_sigma_sq = np.maximum(sigma ** 2 - delta_sq, 0)
        new_sigma = np.sqrt(new_sigma_sq)

        # فقط Vt و مقدارهای جدید را نگه‌دار
        self.sketch = np.diag(new_sigma) @ Vt

        self.next_row = self.ell // 2

    def get_sketch(self):
        return self.sketch
