In [1]:
from comet_ml import Experiment

import sys
sys.path.append('..')

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline 

import seaborn as sns
from matplotlib.colors import LogNorm

from tqdm import tqdm

from modules.mrartemev_ganlib import gans
from modules.mrartemev_ganlib import nn as gans_nn

import torch
from torch import nn
from torch.nn import functional as F
import os


In [2]:
GPU_NUM = 1

In [3]:
cosmic = pd.read_csv('../data/processed/cosmic.csv.gz')
cosmic.head()

Unnamed: 0,Channel,ADC_ADC_Sig,ADC_ADC_bg,Board,Nhit,Asic_TDC0,Asic_ADC0,Asic_TOT0,Asic_TDC1,Asic_ADC1,...,Asic_TOT4,Asic_TDC5,Asic_ADC5,Asic_TOT5,Asic_TDC6,Asic_ADC6,Asic_TOT6,Asic_TDC7,Asic_ADC7,Asic_TOT7
0,41.0,-0.001751,-1.0,3.0,1.0,-1.0,-1.0,-1.0,0.143853,-0.002085,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,14.0,0.000875,-1.0,15.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,0.10177,0.001617,0.0,-1.0,-1.0,-1.0
2,15.0,-0.003015,-1.0,34.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.024574,-0.004929,0.047619
3,12.0,0.003501,-1.0,35.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,0.047619,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,12.0,0.014588,-1.0,56.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,0.047619,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [4]:
os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU_NUM)

c_cols = ['Channel', 'Board', 'ADC_ADC_Sig']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Embeddings

For conditional data

In [5]:
c_disc_cols = ['Channel', 'Board']

Channels: [0, 47]

Boards: [0, 255]



In [6]:
def encode_context(embeddings, context):
    """
    context: Batch_size x [Channel, Board, ADC_ADC_Sig]
    """
    emb_ind = context[:, 0] + context[:, 1] * 48
    context_emb = embeddings(emb_ind.long()).float()
    return torch.cat([context_emb, context[:, 2].unsqueeze(1).float()], dim=1)


### Data

In [7]:
meta_x_cols = ['Nhit', 'ADC_ADC_bg']
group_x_cols = {i: [f'Asic_ADC{i}', f'Asic_TDC{i}', f'Asic_TOT{i}'] for i in range(8)}
x_cols = ['Nhit', 'ADC_ADC_bg']
for i in range(len(group_x_cols)):
    x_cols.extend(group_x_cols[i])

cosmic[x_cols].head()

Unnamed: 0,Nhit,ADC_ADC_bg,Asic_ADC0,Asic_TDC0,Asic_TOT0,Asic_ADC1,Asic_TDC1,Asic_TOT1,Asic_ADC2,Asic_TDC2,...,Asic_TOT4,Asic_ADC5,Asic_TDC5,Asic_TOT5,Asic_ADC6,Asic_TDC6,Asic_TOT6,Asic_ADC7,Asic_TDC7,Asic_TOT7
0,1.0,-1.0,-1.0,-1.0,-1.0,-0.002085,0.143853,-0.090909,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,0.001617,0.10177,0.0,-1.0,-1.0,-1.0
2,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.004929,-0.024574,0.047619
3,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,0.047619,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,0.047619,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [8]:
sorted(cosmic['Nhit'].unique())

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]

Data structure:

**context**: [Channel, Board, ADC_ADC_Sig]

**data**: ['Nhit', 'ADC_ADC_bg', 'Asic_ADC{i}', f'Asic_TDC{i}', f'Asic_TOT{i}']

In [25]:
class CustomGenerator(nn.Module):
    def __init__(self, in_features, out_features, hidden_features=32, depth=4, context=False):
        super().__init__()
        self.initial_net = gans_nn.MLP(in_features, hidden_features, hidden_features, depth, context)
        self.hits_net = gans_nn.MLP(hidden_features, 8, hidden_features, depth, context) # Nhits
        self.group_nets = gans_nn.MLP(hidden_features, 3 * len(group_x_cols), hidden_features, depth, context)
        self.bg_net = gans_nn.MLP(3*len(group_x_cols), 1, hidden_features, depth, context)
        
    def forward(self, x, context=None):
        initial = self.initial_net(x, context)
        hits = torch.sigmoid(self.hits_net(initial, context)).unsqueeze(-1)
#         hard_hits = (hits > 0.5).float() # deterministic
        hard_hits = torch.bernoulli(hits).float() # stochastic
        hard_hits_grad = hits + (hard_hits - hits).detach()
        # one - hit, zero - nohit
        num_hits = hard_hits_grad.sum(1)
        
        groups = self.group_nets(initial, context=context).view(initial.size(0), len(group_x_cols), 3)
        groups = (1 - hard_hits_grad) * (torch.zeros_like(groups).detach() - 1) + hard_hits_grad * groups
        groups = groups.view(groups.size(0), len(group_x_cols) * 3)
        
        adc_bg = self.bg_net(groups, context=context) # связать с num_hits
        #  ['Nhit', 'ADC_ADC_bg', 'Asic_ADC{i}', f'Asic_TDC{i}', f'Asic_TOT{i}']
        generated = torch.cat([num_hits, adc_bg, groups], dim=1)
        return generated
        
    

In [26]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.01)
    if isinstance(m, nn.BatchNorm1d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)


In [27]:
dim = len(x_cols)
context_dim = 64


prior = torch.distributions.MultivariateNormal(torch.zeros(64).to(device),
                                               torch.eye(64).to(device))

model = gans.LSGAN(dim, prior, base_network=gans_nn.MLP,
                   hidden_features=64, depth=2, context=context_dim
                  )
model.c_embs = nn.Embedding(256*48, 63)
model.c_embs.weight.requires_grad = True
model.generator = CustomGenerator(prior.event_shape[0], dim, hidden_features=128, depth=5, context=context_dim)
model.apply(init_weights)
model.to(device)

gen_optimizer = torch.optim.Adam([{'params': model.generator.parameters(), 'lr': 1e-4, 'weight_decay': 0.0005},
                                  {'params': model.c_embs.parameters(), 'lr': 1e-5}])
disc_optimizer = torch.optim.Adam(model.discriminator.parameters(), lr=1e-6)

gen_scheduler = torch.optim.lr_scheduler.ExponentialLR(gen_optimizer, 0.99)
disc_scheduler = torch.optim.lr_scheduler.ExponentialLR(disc_optimizer, 0.99)

In [28]:
import pickle 

# context: Channel, Board, ADC_ADC_Sig
# data: 'Nhit', 'ADC_ADC_bg', ['Asic_ADC{i}', f'Asic_TDC{i}', f'Asic_TOT{i}']

In [29]:
from torch.utils.data import TensorDataset, DataLoader

def get_infinite_loader(loader):
    iter_loader = iter(loader)
    while True:
        try:
            yield next(iter_loader)
        except StopIteration:
            iter_loader = iter(loader)
            yield next(iter_loader)
            
# balance dataset
sample_num = 100000
balanced_cosmic = cosmic[cosmic['Nhit'] != 1]

# balanced_cosmic = pd.concat([cosmic[cosmic['Nhit'] != 1].sample(sample_num), 
#                              cosmic[cosmic['Nhit'] == 1].sample(sample_num)], axis=0)


            
train_dataloader = TensorDataset(torch.tensor(balanced_cosmic[x_cols].values),
                                 torch.tensor(balanced_cosmic[c_cols].values))
train_dataloader = DataLoader(train_dataloader, batch_size=128, shuffle=True, drop_last=True)
train_dataloader = get_infinite_loader(train_dataloader)

In [30]:
batch = next(train_dataloader)
x, context = [i.to(device).float() for i in batch]
context = encode_context(model.c_embs, context)
gen_loss = model.calculate_loss_gen(x, context=context)
disc_loss = model.calculate_loss_disc(x, context=context)
gen_loss.mean().item(), disc_loss.mean().item()


(1.604836106300354, 1.8196141719818115)

In [31]:
experiment = Experiment(api_key="HIZapbzNjFips0c32Co7gXkQZ",
                        project_name="richgans", workspace="maximartemev")


COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/maximartemev/richgans/ee38ca4a54d64b9989020b8d2473f92d
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     disc/lr [7]            : (9.414801494010001e-07, 1e-06)
COMET INFO:     gen/lr [7]             : (9.414801494010001e-05, 0.0001)
COMET INFO:     loss [3882]            : (0.0003460478037595749, 3.688354969024658)
COMET INFO:     train/disc/loss [9720] : (1.9080756902694702, 3.780816078186035)
COMET INFO:     train/gen/loss [9720]  : (0.0002489592880010605, 4.29982852935791)
COMET INFO:   Parameters:
COMET INFO:   Uploads [count]:
COMET INFO:     environment details      : 1
COMET INFO:     figures [14]             : 14
COMET INFO:     filename                 : 1
COMET INFO:     git metadata             : 1
COMET INFO:     git-patch (un

In [32]:
VAL_INTERVALS = 1500
GEN_UPDATES = 3
DISC_UPDATES = 1

In [33]:
def plot_hits(data):
    x_val = cosmic.sample(100000)[x_cols].values
    c_val = mc_noct.sample(100000)[c_cols].values
    x_gen = model.generate(100000,
                           encode_context(model.c_embs,
                                          torch.tensor(c_val, device=device)
                                         )
                          ).detach().cpu().numpy()
    plt.figure(figsize=(20, 50))
    plot_ind = 0
    for ind, col in enumerate(x_cols):
        if 'ADC' in col and not 'bg' in col:
            plot_ind += 1
            plt.subplot(10, 2, plot_ind)
            _, bins, _ = plt.hist(x_val[x_val[:, ind] != -1, ind], bins=50, alpha=0.6, label='Real, hits')
            plt.hist(x_gen[x_gen[:, ind] != -1, ind], bins=bins, alpha=0.6, label='Generated, hits')
            plt.grid()
            plt.legend()
            plt.title(col)
            plot_ind += 1
            plt.subplot(10, 2, plot_ind)
            _, bins, _ = plt.hist(x_val[x_val[:, ind] == -1, ind], bins=50, alpha=0.6, label='Real, nohits')
            plt.hist(x_gen[x_gen[:, ind] == -1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
            plt.grid()
            plt.legend()
            plt.title(col)
        if 'Nhit' in col:
            plot_ind += 1
            plt.subplot(10, 2, plot_ind)
            _, bins, _ = plt.hist(x_val[x_val[:, ind] != 1, ind], bins=50, alpha=0.6, label='Real, hits')
            plt.hist(x_gen[x_gen[:, ind] != 1, ind], bins=bins, alpha=0.6, label='Generated, hits')
            plt.grid()
            plt.legend()
            plt.title(col)
            plot_ind += 1
            plt.subplot(10, 2, plot_ind)
            _, bins, _ = plt.hist(x_val[x_val[:, ind] == 1, ind], bins=50, alpha=0.6, label='Real, nohits')
            plt.hist(x_gen[x_gen[:, ind] == 1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
            plt.grid()
            plt.legend()
            plt.title(col)

        if 'bg' in col:
            plot_ind += 1
            plt.subplot(10, 2, plot_ind)
            _, bins, _ = plt.hist(x_val[x_val[:, ind] != -1, ind], bins=50, alpha=0.6, label='Real, hits')
            plt.hist(x_gen[x_gen[:, ind] != -1, ind], bins=bins, alpha=0.6, label='Generated, hits')
            plt.grid()
            plt.legend()
            plt.title(col)
            plot_ind += 1
            plt.subplot(10, 2, plot_ind)
            _, bins, _ = plt.hist(x_val[x_val[:, ind] == -1, ind], bins=50, alpha=0.6, label='Real, nohits')
            plt.hist(x_gen[x_gen[:, ind] == -1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
            plt.grid()
            plt.legend()
            plt.title(col)


In [34]:
def plot_compare(data):
    plt.figure(figsize=(20, 20))
    x_val = data[x_cols].values
    c_val = data[c_cols].values
    x_gen = model.generate(data.shape[0],
                           encode_context(model.c_embs,
                                          torch.tensor(c_val, device=device)
                                         )
                          ).detach().cpu().numpy()
    
    plot_ind = 0
    for ind, col in enumerate(x_cols):
        if 'ADC' in col or col == 'Nhit':
            plot_ind += 1
            plt.subplot(5, 2, plot_ind)
            _, bins, _ = plt.hist(x_val[:, ind], bins=50, alpha=0.6, label='Real', density=True)
            plt.hist(x_gen[:, ind], bins=bins, alpha=0.6, label='Generated', density=True)
            plt.grid()
            plt.legend()
            plt.title(col)

In [21]:
mc_noct = pd.read_csv('../data/processed/mcmc_noxtalk.csv.gz', nrows=100000)
mc_ct = pd.read_csv('../data/processed/mcmc_xtalk.csv.gz', nrows=100000)


In [None]:
for iteration in tqdm(range(1000000), position=0, leave=True):
    
    if iteration % VAL_INTERVALS == 0:
        model.eval()
        experiment.log_metrics({'gen/lr': gen_optimizer.param_groups[0]['lr'],
                                'disc/lr': disc_optimizer.param_groups[0]['lr']},
                               step=iteration)

        gen_scheduler.step()
        disc_scheduler.step()
        
        plot_hits(mc_noct)
        experiment.log_figure('no_ct', step=iteration)
        plot_hits(mc_ct)
        experiment.log_figure('yes_ct', step=iteration)
        plt.clf()
    
    model.train()
    # gen update
    for _ in range(GEN_UPDATES):
        x, context = [i.to(device).float() for i in next(train_dataloader)]
        context = encode_context(model.c_embs, context)
        gen_optimizer.zero_grad()
        gen_loss = model.calculate_loss_gen(x, context=context).mean()
        gen_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        gen_optimizer.step()
    experiment.log_metric('train/gen/loss', gen_loss.item(), step=iteration)

    # disc update
    for _ in range(DISC_UPDATES):
        x, context = [i.to(device).float() for i in next(train_dataloader)]
        context = encode_context(model.c_embs, context)
        disc_optimizer.zero_grad()
        disc_loss = model.calculate_loss_disc(x, context=context).mean()
        disc_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        disc_optimizer.step()
    experiment.log_metric('train/disc/loss', disc_loss.item(), step=iteration)
    

  if __name__ == '__main__':
 45%|████▍     | 447743/1000000 [13:31:40<16:40:13,  9.20it/s] 

In [None]:
x_val = cosmic.loc[:100000, x_cols].values
c_val = mc_noct.loc[:100000, c_cols].values
x_gen = model.generate(100000,
                       encode_context(model.c_embs,
                                      torch.tensor(c_val, device=device)
                                     )
                      ).detach().cpu().numpy()

In [None]:
plt.figure(figsize=(20, 50))

plot_ind = 0
for ind, col in enumerate(x_cols):
    if 'ADC' in col and not 'bg' in col:
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] != -1, ind], bins=50, alpha=0.6, label='Real, hits')
        plt.hist(x_gen[x_gen[:, ind] != -1, ind], bins=bins, alpha=0.6, label='Generated, hits')
        plt.grid()
        plt.legend()
        plt.title(col)
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] == -1, ind], bins=50, alpha=0.6, label='Real, nohits')
        plt.hist(x_gen[x_gen[:, ind] == -1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
        plt.grid()
        plt.legend()
        plt.title(col)
    if 'Nhit' in col:
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] != 1, ind], bins=50, alpha=0.6, label='Real, hits')
        plt.hist(x_gen[x_gen[:, ind] != 1, ind], bins=bins, alpha=0.6, label='Generated, hits')
        plt.grid()
        plt.legend()
        plt.title(col)
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] == 1, ind], bins=50, alpha=0.6, label='Real, nohits')
        plt.hist(x_gen[x_gen[:, ind] == 1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
        plt.grid()
        plt.legend()
        plt.title(col)
        
    if 'bg' in col:
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] != -1, ind], bins=50, alpha=0.6, label='Real, hits')
        plt.hist(x_gen[x_gen[:, ind] != -1, ind], bins=bins, alpha=0.6, label='Generated, hits')
        plt.grid()
        plt.legend()
        plt.title(col)
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] == -1, ind], bins=50, alpha=0.6, label='Real, nohits')
        plt.hist(x_gen[x_gen[:, ind] == -1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
        plt.grid()
        plt.legend()
        plt.title(col)

plt.show()

In [None]:
1

In [None]:
x_val = mc_noct[x_cols].values
c_val = mc_noct[c_cols].values
x_gen = model.generate(mc_noct.shape[0],
                       encode_context(model.c_embs,
                                      torch.tensor(c_val, device=device)
                                     )
                      ).detach().cpu().numpy()

In [None]:
plt.figure(figsize=(20, 50))

plot_ind = 0
for ind, col in enumerate(x_cols):
    if 'ADC' in col and not 'bg' in col:
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] != -1, ind], bins=50, alpha=0.6, label='Real, hits')
        plt.hist(x_gen[x_gen[:, ind] != -1, ind], bins=bins, alpha=0.6, label='Generated, hits')
        plt.grid()
        plt.legend()
        plt.title(col)
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] == -1, ind], bins=50, alpha=0.6, label='Real, nohits')
        plt.hist(x_gen[x_gen[:, ind] == -1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
        plt.grid()
        plt.legend()
        plt.title(col)
    if 'Nhit' in col:
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] != 1, ind], bins=50, alpha=0.6, label='Real, hits')
        plt.hist(x_gen[x_gen[:, ind] != 1, ind], bins=bins, alpha=0.6, label='Generated, hits')
        plt.grid()
        plt.legend()
        plt.title(col)
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] == 1, ind], bins=50, alpha=0.6, label='Real, nohits')
        plt.hist(x_gen[x_gen[:, ind] == 1, ind], bins=bins, alpha=0.6, label='Generated, nohits')
        plt.grid()
        plt.legend()
        plt.title(col)
        
    if 'bg' in col:
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] != 0, ind], bins=50, alpha=0.6, label='Real, hits')
        plt.hist(x_gen[x_gen[:, ind] != 0, ind], bins=bins, alpha=0.6, label='Generated, hits')
        plt.grid()
        plt.legend()
        plt.title(col)
        plot_ind += 1
        plt.subplot(10, 2, plot_ind)
        _, bins, _ = plt.hist(x_val[x_val[:, ind] == 0, ind], bins=50, alpha=0.6, label='Real, nohits')
        plt.hist(x_gen[x_gen[:, ind] == 0, ind], bins=bins, alpha=0.6, label='Generated, nohits')
        plt.grid()
        plt.legend()
        plt.title(col)

plt.show()

In [None]:
x_val = mc_ct[x_cols].values
c_val = mc_ct[c_cols].values
x_gen = model.generate(mc_ct.shape[0],
                       encode_context(model.c_embs,
                                      torch.tensor(c_val, device=device)
                                     )
                      ).detach().cpu().numpy()

In [None]:
plt.figure(figsize=(20, 20))

plot_ind = 0
for ind, col in enumerate(x_cols):
    if 'ADC' in col and not 'bg' in col:
        plot_ind += 1
        plt.subplot(5, 2, plot_ind)
        plt.hist(x_val[:, ind], bins=50, alpha=0.6, label='Real', density=True)
        plt.hist(x_gen[:, ind], bins=50, alpha=0.6, label='Generated', density=True)
        plt.grid()
        plt.legend()
        plt.title(col)
plt.show()

In [None]:
plt.figure(figsize=(20, 20))

plot_ind = 0
for ind, col in enumerate(x_cols):
    if 'ADC' in col or col == 'Nhit':
        plot_ind += 1
        plt.subplot(5, 2, plot_ind)
        plt.hist(x_val[:, ind], bins=50, alpha=0.6, label='Real', density=True)
        plt.hist(x_gen[:, ind], bins=50, alpha=0.6, label='Generated', density=True)
        plt.grid()
        plt.legend()
        plt.title(col)
plt.show()

In [None]:
torch.save(model.state_dict(), os.path.join('../saved_models/', 'first_wgan' + '.state_dict'))


In [None]:
1

In [None]:
mc_noct = pd.read_csv('../data/raw/mcmc_noxtalk.csv.gz', nrows=100000)
plot_compare(mc_noct)

In [None]:
mc_ct = pd.read_csv('../data/raw/mcmc_xtalk.csv.gz', nrows=100000)
plot_compare(mc_ct)