<a href="https://colab.research.google.com/github/Rezarsa82/online_retai-_matrix_monitoring/blob/main/matrix_monitoring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
import pandas as pd
import zipfile
import requests
from io import BytesIO

url = "https://archive.ics.uci.edu/static/public/352/online+retail.zip"

r = requests.get(url)
zip_file = zipfile.ZipFile(BytesIO(r.content))

print(zip_file.namelist())  # ['Online Retail.xlsx']

excel_file = zip_file.open("Online Retail.xlsx")
df = pd.read_excel(excel_file)

print(df.columns)
print(df.head())


['Online Retail.xlsx']
Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

          InvoiceDate  UnitPrice  CustomerID         Country  
0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  
1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  
3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  


In [41]:
df = df.dropna()
print(df.isna().sum())

InvoiceNo      0
StockCode      0
Description    0
Quantity       0
InvoiceDate    0
UnitPrice      0
CustomerID     0
Country        0
dtype: int64
InvoiceNo      0
StockCode      0
Description    0
Quantity       0
InvoiceDate    0
UnitPrice      0
CustomerID     0
Country        0
dtype: int64


In [55]:
df = df[df["Quantity"] > 0]
print((df["Quantity"] <= 0).sum())
num_cancelled = df[df["InvoiceNo"].astype(str).str.startswith("C")].shape[0]
print("تعداد فاکتورهای بازگشتی:", num_cancelled)
df_sorted = df.sort_values('InvoiceDate')

0
تعداد فاکتورهای بازگشتی: 0


In [56]:
item_transaction_matrix = df_sorted.groupby(['InvoiceNo', 'StockCode'])['Quantity'].sum().unstack().fillna(0)

item_transaction_matrix[item_transaction_matrix > 0] = 1

print(item_transaction_matrix.head())

StockCode  10002  10080  10120  10125  10133  10135  11001  15030  15034  \
InvoiceNo                                                                  
536365       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536366       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536367       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536368       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536369       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   

StockCode  15036  ...  90214V  90214W  90214Y  90214Z  BANK CHARGES   C2  DOT  \
InvoiceNo         ...                                                           
536365       0.0  ...     0.0     0.0     0.0     0.0           0.0  0.0  0.0   
536366       0.0  ...     0.0     0.0     0.0     0.0           0.0  0.0  0.0   
536367       0.0  ...     0.0     0.0     0.0     0.0           0.0  0.0  0.0   
536368       0.0  ...     0.0     0.0     0.0     0.0         

In [57]:
import random

random_invoices = random.sample(list(item_transaction_matrix.index), 5)

for invoice in random_invoices:
    purchased_items = item_transaction_matrix.loc[invoice][item_transaction_matrix.loc[invoice] == 1]
    print(f"فاکتور {invoice}: {len(purchased_items)} کالا خریداری شده")
    if len(purchased_items) > 0:
        print(f"   نمونه کالاها: {list(purchased_items.index[:3])}")

فاکتور 568919: 32 کالا خریداری شده
   نمونه کالاها: [20725, 20726, 20728]
فاکتور 561800: 8 کالا خریداری شده
   نمونه کالاها: [21928, 21929, 22411]
فاکتور 578015: 10 کالا خریداری شده
   نمونه کالاها: [22086, 22573, 22574]
فاکتور 547516: 2 کالا خریداری شده
   نمونه کالاها: [22423, 22457]
فاکتور 544096: 7 کالا خریداری شده
   نمونه کالاها: [21586, 21621, 21878]


In [60]:
batch_size = 1000
batches = []

for i in range(0, len(item_transaction_matrix), batch_size):
    batch_matrix = item_transaction_matrix.iloc[i:i+batch_size]
    batches.append(batch_matrix)

print(f"\nTotal Batches: {len(batches)}")
print(f"First Batch Shape: {batches[0]}")


Total Batches: 19
First Batch Shape: StockCode  10002  10080  10120  10125  10133  10135  11001  15030  15034  \
InvoiceNo                                                                  
536365       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536366       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536367       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536368       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
536369       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
...          ...    ...    ...    ...    ...    ...    ...    ...    ...   
538830       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
538831       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
538832       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
538836       0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   
538839       0.0    0.0    0.0    0.0    0.0    0.

In [61]:
first_batch = batches[0]
first_customer = first_batch.iloc[0]
items_bought = first_customer[first_customer == 1].index.tolist()

print("دسته اول، نفر اول خریداری کرده کالاها:")
print(items_bought)

دسته اول، نفر اول خریداری کرده کالاها:
[21730, 22752, 71053, '84029E', '84029G', '84406B', '85123A']


In [64]:
import numpy as np
import pandas as pd
from scipy import linalg

class RandomGaussianProjection:
    def __init__(self, n_components='auto', eps=0.1, random_state=None):
        self.n_components = n_components
        self.eps = eps
        self.random_state = random_state
        self.components_ = None
        self.n_components_ = None
        self.n_features_ = None

    def _calculate_n_components(self, n_features):
        if self.n_components == 'auto':
            if n_features <= 1:
                return 1
            n_components = int(4 * np.log(n_features) / (self.eps**2 / 2 - self.eps**3 / 3))
            self.n_components_ = max(1, min(n_components, n_features))
        else:
            self.n_components_ = min(self.n_components, n_features)
        return self.n_components_

    def _generate_random_matrix(self, n_features):
        rng = np.random.RandomState(self.random_state)

        n_components = self._calculate_n_components(n_features)
        self.components_ = rng.normal(
            loc=0,
            scale=1/np.sqrt(n_components),
            size=(n_components, n_features)
        )
        return self.components_

    def fit(self, X):
        if X is None or len(X) == 0:
            raise ValueError("X cannot be None or empty")

        if isinstance(X, pd.DataFrame):
            X = X.values
        elif not isinstance(X, np.ndarray):
            X = np.array(X)

        n_samples, self.n_features_ = X.shape

        if self.n_features_ <= 1:
            raise ValueError("X must have at least 2 features")

        self._generate_random_matrix(self.n_features_)
        return self

    def transform(self, X):
        if self.components_ is None:
            raise ValueError("Model must be fitted before transformation")

        if isinstance(X, pd.DataFrame):
            X = X.values
        elif not isinstance(X, np.ndarray):
            X = np.array(X)

        if X.shape[1] != self.components_.shape[1]:
            raise ValueError(f"Input has {X.shape[1]} features, but model was fitted with {self.components_.shape[1]} features")

        return X @ self.components_.T

    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

    def inverse_transform(self, X_transformed):
        if self.components_ is None:
            raise ValueError("Model must be fitted before inverse transformation")

        components_pinv = linalg.pinv(self.components_)
        return X_transformed @ components_pinv.T

    def reconstruction_error(self, X):
        X_transformed = self.transform(X)
        X_reconstructed = self.inverse_transform(X_transformed)
        return np.mean((X - X_reconstructed) ** 2)

    def __repr__(self):
        return f"RandomGaussianProjection(n_components={self.n_components_})"

In [66]:
rgp = RandomGaussianProjection(n_components=3, random_state=42)
transformed = rgp.fit_transform(first_batch)
print(f"Success! Reduced from {first_batch.shape} to {transformed.shape}")
print(f"Components shape: {rgp.components_.shape}")
reconstructed = rgp.inverse_transform(transformed)
print(f"Reconstruction works: {reconstructed.shape == first_batch.shape}")

Success! Reduced from (1000, 3665) to (1000, 3)
Components shape: (3, 3665)
Reconstruction works: True
