### Imports

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

### Data loading

In [3]:
DATA_FILE = 'CPCT02220079.annotated.processed.part.tsv'

In [4]:
def load_data(file):
    return pd.read_csv(file, sep='\t', usecols=['CHROM', 'POS', 'REF', 'ALT'])

In [5]:
df = load_data(DATA_FILE)

### Variant classification

In [6]:
def classify_variant(s):
    ref_len = len(s['REF'])
    alt_len = len(s['ALT'])

    if ref_len < alt_len:
        return 'INS'
    elif ref_len > alt_len:
        return 'DEL'
    elif ref_len == alt_len == 1:
        return 'SNP'
    else:
        return None

In [7]:
df['VARIANT_CLASS'] = df.apply(classify_variant, axis=1)

### Plot for every chromosome

In [32]:
for name, group in df[['CHROM', 'VARIANT_CLASS']].groupby('CHROM'):
    figure = group['VARIANT_CLASS'].value_counts().plot(kind='bar')
    figure.set_title('Chromosome {}'.format(name))
    fig = figure.get_figure()
    fig.savefig(f'{name}.png')
    plt.close()