# Convert 10x `mtx` data to `H5AD` 

Even though `scanpy` has a specialized function for 10x Genomics' `mtx` format, it may be useful to know how to read it in manually.

In [1]:
import scanpy as sc
import os
import csv

### Set up the I/O parameters

In [2]:
dataset_dir = "/home/ubuntu/SCRealVAE_68K/Raw_Data/filtered_matrices_mex/hg19/"
data_file = "matrix.mtx"
var_names_file = "genes.tsv"
obs_names_file = "barcodes.tsv"
output_h5ad_file = "/home/ubuntu/SCRealVAE_68K/Raw_Data/raw_68kPBMCs.h5ad"

data_path = os.path.join(dataset_dir,data_file)
var_names_path = os.path.join(dataset_dir,var_names_file)
obs_names_path = os.path.join(dataset_dir,obs_names_file)
output_h5ad_path = os.path.join(dataset_dir,output_h5ad_file)

## Load in genes/features

In [3]:
with open(var_names_path, "r") as var_file:
    var_read = csv.reader(var_file, delimiter='\t')
    var_names = []
    for row in var_read:
        var_names.append(row[1])

### Load in UMI names (obervations)

In [4]:
with open(obs_names_path, "r") as obs_file:
    obs_read = csv.reader(obs_file, delimiter='\t')
    obs_names = []
    for row in obs_read:
        obs_names.append(row[0])

### Use `scanpy` to read in the `mtx` data (be patient for this par)

In [5]:
adata = sc.read(data_path) 
adata = adata.transpose()

### Loading the gene and UMI names and making them unique

In [6]:
adata.var_names = var_names
adata.var_names_make_unique()
adata.obs_names = obs_names
adata.obs_names_make_unique()

### Write out the data

In [7]:
adata.write(filename=output_h5ad_path)