In [11]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义简单的模型
class SimpleModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 定义模型参数
input_dim = 10
hidden_dim = 5
output_dim = 2  # 二分类任务的输出维度（两个类别）
batch_size = 8  # 假设我们有8个样本

# 创建模型、损失函数和优化器
model = SimpleModel(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 生成示例数据
inputs = torch.randn(batch_size, input_dim)
labels = torch.randint(0, 2, (batch_size,))  # 标签为 0 或 1

# 前向传播
outputs = model(inputs)

# 打印 logits
print(f'Logits: {outputs}')

print(f'labels: {labels}')
# 计算损失
loss = criterion(outputs, labels)

# 后向传播和优化
loss.backward()
optimizer.step()

# 打印结果
print(f'Loss: {loss.item()}')

# 将 logits 转换为概率
probabilities = torch.softmax(outputs, dim=1)
print(f'Probabilities: {probabilities}')

# 预测类别
predicted_classes = torch.argmax(probabilities, dim=1)
print(f'Predicted Classes: {predicted_classes}')
print(f'True Labels: {labels}')

preds = outputs.argmax(1).cpu().numpy()
print("preds: ",preds)


Logits: tensor([[ 0.1460,  0.1643],
        [ 0.1175,  0.1653],
        [ 0.0888,  0.1147],
        [-0.5661, -0.3051],
        [ 0.0424,  0.1226],
        [ 0.1516,  0.0336],
        [ 0.0943,  0.1444],
        [ 0.1020,  0.1615]], grad_fn=<AddmmBackward0>)
labels: tensor([0, 1, 0, 0, 0, 0, 0, 0])
Loss: 0.7152491807937622
Probabilities: tensor([[0.4954, 0.5046],
        [0.4881, 0.5119],
        [0.4935, 0.5065],
        [0.4351, 0.5649],
        [0.4800, 0.5200],
        [0.5295, 0.4705],
        [0.4875, 0.5125],
        [0.4851, 0.5149]], grad_fn=<SoftmaxBackward0>)
Predicted Classes: tensor([1, 1, 1, 1, 1, 0, 1, 1])
True Labels: tensor([0, 1, 0, 0, 0, 0, 0, 0])
preds:  [1 1 1 1 1 0 1 1]


In [13]:
import numpy as np
import sys
from anndata import AnnData
import scanpy as sc
sc.settings.verbosity = 'info'
sc.settings.logfile = sys.stdout  # for doctests
np.set_printoptions(precision=2)
adata = AnnData(np.array([
         [3, 3, 3, 6, 6],
    [1, 1, 1, 2, 2],
     [1, 22, 1, 2, 2],
     ], dtype='float32'))


In [14]:
adata.X

array([[ 3.,  3.,  3.,  6.,  6.],
       [ 1.,  1.,  1.,  2.,  2.],
       [ 1., 22.,  1.,  2.,  2.]], dtype=float32)

In [17]:
X_norm = sc.pp.normalize_total(adata, target_sum=3, inplace=False)['X']
X_norm

normalizing counts per cell
    finished (0:00:00)


array([[0.43, 0.43, 0.43, 0.86, 0.86],
       [0.43, 0.43, 0.43, 0.86, 0.86],
       [0.11, 2.36, 0.11, 0.21, 0.21]], dtype=float32)

In [20]:
np.sum(X_norm[1])

3.0

In [26]:
binning = 3

n_bins = binning  # NOTE: the first bin is always a spectial for zero
binned_rows = []
bin_edges = []

layer_data = X_norm
# layer_data = layer_data.A if issparse(layer_data) else layer_data
if layer_data.min() < 0:
    raise ValueError(
        f"Assuming non-negative data, but got min value {layer_data.min()}."
    )
for row in layer_data:
    if row.max() == 0:
        print(111)
        binned_rows.append(np.zeros_like(row, dtype=np.int64))
        bin_edges.append(np.array([0] * n_bins))
        continue
    non_zero_ids = row.nonzero()
    non_zero_row = row[non_zero_ids]
    bins = np.quantile(non_zero_row, np.linspace(0, 1, n_bins - 1))
    # bins = np.sort(np.unique(bins))
    # NOTE: comment this line for now, since this will make the each category
    # has different relative meaning across datasets
    non_zero_digits = _digitize(non_zero_row, bins)
    assert non_zero_digits.min() >= 1
    assert non_zero_digits.max() <= n_bins - 1
    binned_row = np.zeros_like(row, dtype=np.int64)
    binned_row[non_zero_ids] = non_zero_digits
    binned_rows.append(binned_row)
    bin_edges.append(np.concatenate([[0], bins]))
adata.layers[self.result_binned_key] = np.stack(binned_rows)
adata.obsm["bin_edges"] = np.stack(bin_edges)

NameError: name '_digitize' is not defined

In [25]:
adata

AnnData object with n_obs × n_vars = 3 × 5