In [None]:
import pandas as pd
import scipy.io
import numpy as np

# Load expression matrix
matrix = scipy.io.mmread('../data/PBMC/PBMC3K_hg19/matrix.mtx')

# Load genes
genes = pd.read_csv('../data/PBMC/PBMC3K_hg19/genes.tsv', 
                    header=None, sep='\t', names=['gene_id', 'gene_symbol'])

# Load barcodes
barcodes = pd.read_csv('../data/PBMC/PBMC3K_hg19/barcodes.tsv', 
                       header=None, sep='\t', names=['barcode'])

# Transform sparse matrix into pandas sparse dataframe
expression_matrix = pd.DataFrame.sparse.from_spmatrix(matrix)
expression_matrix.index = genes['gene_symbol']
expression_matrix.columns = barcodes['barcode']

# Check basic information about the DataFrame
print(expression_matrix.info())

In [None]:
# Calculate the mean expression of each gene
mean_expression = expression_matrix.mean(axis=1)  # Mean per row (genes)

# Calculate the total expression of each cell
total_expression_per_cell = expression_matrix.sum(axis=0)  # Sum per column (cells)

# Count non-zero values
non_zero_counts = (expression_matrix != 0).sum(axis=1)  # Count of non-zero values per gene

print(mean_expression.head())
print(total_expression_per_cell.head())
print(non_zero_counts.head())

In [None]:
dense_matrix = expression_matrix.sparse.to_dense()

# For example, to calculate the standard deviation
std_dev_per_gene = dense_matrix.std(axis=1)

print(std_dev_per_gene.head())