In [1]:
import pandas as pd
import json
from pprint import pprint
import numpy as np
from tqdm import tqdm
from transformers import BertTokenizer, AdamW, BertModel, BertPreTrainedModel, BertConfig, SquadExample

I0417 05:14:09.089147 140285051234048 file_utils.py:41] PyTorch version 1.2.0 available.


# 载入预训练模型

In [2]:
bert_config = BertConfig.from_pretrained(r'/home/zhoujx/Pretrained_models/chinese_roberta_wwm_large_ext_pytorch/bert_config.json', output_hidden_states=True)
tokenizer  = BertTokenizer.from_pretrained(r'/home/zhoujx/Pretrained_models/chinese_roberta_wwm_large_ext_pytorch/vocab.txt', config=bert_config)

I0417 05:14:11.471177 140285051234048 configuration_utils.py:281] loading configuration file /home/zhoujx/Pretrained_models/chinese_roberta_wwm_large_ext_pytorch/bert_config.json
I0417 05:14:11.473556 140285051234048 configuration_utils.py:319] Model config BertConfig {
  "_num_labels": 2,
  "architectures": null,
  "attention_probs_dropout_prob": 0.1,
  "bad_words_ids": null,
  "bos_token_id": null,
  "decoder_start_token_id": null,
  "directionality": "bidi",
  "do_sample": false,
  "early_stopping": false,
  "eos_token_id": null,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "length_penalty": 1.0,
  "max_length": 20,
  "max_position_embeddings": 512,
  "min_length": 0,
  "m

In [4]:
tokenizer.convert_ids_to_tokens([23])

['[unused23]']

# 加载数据

## 辅助函数
>重新寻找start_position和end_position，避免分词后位置对不上


In [50]:
# 重新寻找start_position和end_position
def contains(small, big):
    for i in range(len(big)-len(small)+1):
        for j in range(len(small)):
            if big[i+j] != small[j]:
                break
        else:
            return i, i+len(small)
    return False

## train_data & dev_data

In [51]:
def get_data(data_path):
    with open(data_path, 'r', encoding='utf-8') as f:
        load_dict = json.load(f)
    datas = load_dict['data'][0]['paragraphs']
    
    query_id_list = []
    context_list = []
    query_text_list = []
    answer_list = []
    input_ids_list = []
    token_type_ids_list = []
    attention_mask_list = []

    for document in tqdm(datas):
        context = document['context'].strip()
        for qa in (document['qas']):
            query_id = qa['id']
            query_text = qa['question'].strip()
            answer = qa['answers'][0]['text']
            tokenize_out = tokenizer.encode_plus(query_text, 
                                                                         context, max_length=512, 
                                                                         pad_to_max_length=True)
            input_ids = tokenize_out['input_ids']
            token_type_ids = tokenize_out['token_type_ids']
            attention_mask = tokenize_out['attention_mask']
            # 
            query_id_list.append(query_id)
            context_list.append(context)
            query_text_list.append(query_text)
            answer_list.append(answer)
            input_ids_list.append(input_ids)
            token_type_ids_list.append(token_type_ids)
            attention_mask_list.append(attention_mask)            
            
    df_data = pd.DataFrame({'query_id' : query_id_list,
                                              'context' : context_list,
                                              'question' : query_text_list,
                                             'answer' : answer_list,
                                             'input_ids' : input_ids_list,
                                             'token_type_ids' : token_type_ids_list,
                                             'attention_mask' : attention_mask_list})
    
    # 分词
    for col in ['context', 'question', 'answer']:
        df_data[col + '_token'] = df_data.apply(lambda x: tokenizer.tokenize(x[col]), axis=1)
        print(col + '：finished!!!!')

    # 重新确定start_end
    df_data['start_end_span'] = df_data.apply(lambda x: contains(x['answer_token'], x['context_token']), axis=1)
    df_data['start_position'] = df_data.apply(lambda x: x['start_end_span'][0] if x['start_end_span'] != False else np.nan, axis=1)
    df_data['end_position'] = df_data.apply(lambda x: x['start_end_span'][1] if x['start_end_span'] != False else np.nan, axis=1)
    # 去除np.nan值
    df_data.dropna(subset=['start_position', 'end_position'], inplace=True)
    # 修正
    df_data['start_position'] = df_data['start_position'].astype(int)
    df_data['start_position'] = df_data.apply(lambda x:x['start_position'] + len(x['question_token']) +2, axis=1)
    df_data['end_position']   = df_data['end_position'].astype(int)
    df_data['end_position']   = df_data.apply(lambda x:x['end_position'] + len(x['question_token']) +2, axis=1)
    # 
    df_data.drop(['context_token', 'question_token', 'answer_token', 'start_end_span'], inplace=True, axis=1)
    return df_data

In [52]:
df_train = get_data('./train.json')
# df_dev   = get_data('./dev.json')

100%|██████████| 14520/14520 [01:05<00:00, 222.57it/s]


context：finished!!!!
question：finished!!!!
answer：finished!!!!


In [53]:
df_train.to_csv('./df_train.csv', index=False)

In [16]:
df_dev.to_csv('./df_dev.csv', index=False)

## test_data

In [56]:
def get_test_data(data_path):
    with open(data_path, 'r', encoding='utf-8') as f:
        load_dict = json.load(f)
    datas = load_dict['data'][0]['paragraphs']
       
    query_id_list = []
    context_list = []
    query_text_list = []
    input_ids_list = []
    token_type_ids_list = []
    attention_mask_list = []
    
    for document in tqdm(datas):
        context = document['context'].strip()
        for qa in (document['qas']):
            query_id = qa['id']
            query_text = qa['question'].strip()
            tokenize_out = tokenizer.encode_plus(query_text, 
                                                                         context, max_length=512, 
                                                                         pad_to_max_length=True)
            input_ids = tokenize_out['input_ids']
            token_type_ids = tokenize_out['token_type_ids']
            attention_mask = tokenize_out['attention_mask']
            # 
            query_id_list.append(query_id)
            context_list.append(context)
            query_text_list.append(query_text)
            input_ids_list.append(input_ids)
            token_type_ids_list.append(token_type_ids)
            attention_mask_list.append(attention_mask)
            
    df_data = pd.DataFrame({'query_id' : query_id_list,
                                              'context' : context_list,
                                              'question' : query_text_list,
                                              'input_ids' : input_ids_list,
                                              'token_type_ids' : token_type_ids_list,
                                              'attention_mask' : attention_mask_list})
    return df_data

In [None]:
df_train.to_csv('./df_train.csv', index=False)

In [58]:
df_test = get_test_data('./test1.json')
df_test.to_csv('./df_test.csv', index=False)

100%|██████████| 30703/30703 [04:36<00:00, 110.95it/s]


In [55]:
df_dev = get_test_data('./dev.json')
df_dev.to_csv('./df_dev.csv', index=False)

100%|██████████| 1417/1417 [00:06<00:00, 221.22it/s]


In [60]:
with open('./test1.json', 'r', encoding='utf-8') as f:
    load_dict = json.load(f)
datas = load_dict['data'][0]['paragraphs']

In [63]:
df_test = pd.read_csv('./df_test.csv')


In [76]:
df_test.head()

Unnamed: 0,query_id,context,question,input_ids,token_type_ids,attention_mask
0,3c41636fb3f3a1bca8dbf60e1d9a8d18,藏蓝色，兼于蓝色和黑色之间，既有蓝色的沉静安宁，也有黑色的神秘成熟，既有黑色的收敛效果，又不乏蓝色的洁净长久，虽然不会大热流行，却是可以长久的信任，当藏蓝色与其他颜色相遇，你便会懂得它内在的涵养。藏蓝色+橙色单纯的藏蓝色会给人很严肃的气氛，橙色的点缀让藏蓝色也充满时尚活力。藏蓝色+白色白色是藏蓝色的最佳搭档，两者搭档最容易显得很干净，藏蓝色和白色营造的洗练感，让通勤装永远都不会过时，展现出都市女性的利落感。藏蓝色+粉色藏蓝色和粉色组合散发出成熟优雅的女人味，让粉色显出别样娇嫩。藏蓝色+米色藏蓝色和米色的搭配散发出浓郁的知性气质，稚气的设计细节更显年轻。藏蓝色+红色藏蓝色和红色的搭配更加的沉稳，也更具存在感，如果是面积差不多的服装来搭配，可以用红色的小物点缀来巧妙的平衡。藏蓝色+松石绿藏蓝色搭配柔和的松石绿色给人上品好品质的感觉，用凉鞋和项链来点缀更加具有层次感。藏蓝色+黄色明亮的黄...,藏蓝色配什么颜色好看,"[101, 5966, 5905, 5682, 6981, 784, 720, 7582, 5682, 1962, 4692, 102, 5966, 5905, 5682, 8024, 1076, 754, 5905, 5682, 1469, 7946, 5682, 722, 7313, 8024, 3188, 3300, 5905, 5682, 4638, 3756, 7474, 2128, 2123, 8024, 738, 3300, 7946, 5682, 4638, 4868, 4908, 2768, 4225, 8024, 3188, 3300, 7946, 5682, 4638, 3119, 3137, 3126, 3362, 8024, 1348, 679, 726, 5905, 5682, 4638, 3815, 1112, 7270, 719, 8024, 600...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,..."
1,8edd3333dcff47508ebba5a6249fa8e9,阳光板大部分使用的是聚碳酸酯（PC）原料生产，利用空挤压工艺在耐候性脆弱的PC板材上空挤压UV树脂，质量好一点的板面均匀分布有高浓度的UV层，阻挡紫外线的穿过，防止板材黄变，延长板材寿命使产品使用寿命达到10年以上。并且产品具有长期持续透明性的特点。（有单面和双面UV防护）。用途：住宅/商厦采光天幕，工厂厂房 仓库采光顶，体育场馆采光顶，广告牌，通道/停车棚，游泳池/温室覆盖，室内隔断。另本司有隔热保温的PC板材做温棚 遮阳棚 都不错2832217048@qq.com,阳光板雨棚能用几年,"[101, 7345, 1045, 3352, 7433, 3476, 5543, 4500, 1126, 2399, 102, 7345, 1045, 3352, 1920, 6956, 1146, 886, 4500, 4638, 3221, 5471, 4823, 7000, 6994, 8020, 8295, 8021, 1333, 3160, 4495, 772, 8024, 1164, 4500, 4958, 2915, 1327, 2339, 5686, 1762, 5447, 952, 2595, 5546, 2483, 4638, 8295, 3352, 3332, 677, 4958, 2915, 1327, 9473, 3409, 5544, 8024, 6574, 7030, 1962, 671, 4157, 4638, 3352, 7481, 1772, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,..."
2,386169bb13528eb53a923e3f068cf0db,"手术方式的话用微创的更好,微创手术术野清晰,手术时间更少,伤口小术后患者恢复好。但如果是低位直肠癌的话建议用冷冻治疗,冷冻治疗与微创手术相比几乎没有创口,术后生存率一样,大部分低位超低位直肠癌患者需要做人工肛门,冷冻治疗可以保留肛门和肛门功能。手术费用各地公立医院的定价都是由物价局定价的,等级和区域的不同会有影响,在我们医院(公立三甲)直肠癌微创手术大概5万元。冷冻治疗大概2万元。",肠癌手术费大概多少钱,"[101, 5499, 4617, 2797, 3318, 6589, 1920, 3519, 1914, 2208, 7178, 102, 2797, 3318, 3175, 2466, 4638, 6413, 4500, 2544, 1158, 4638, 3291, 1962, 117, 2544, 1158, 2797, 3318, 3318, 7029, 3926, 3251, 117, 2797, 3318, 3198, 7313, 3291, 2208, 117, 839, 1366, 2207, 3318, 1400, 2642, 5442, 2612, 1908, 1962, 511, 852, 1963, 3362, 3221, 856, 855, 4684, 5499, 4617, 4638, 6413, 2456, 6379, 4500, 1107, 110...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,..."
3,362d218b681886d4644b6c9ca991675a,"现在橱柜的箱体板材主要有实木颗粒板 和实木多层 ,大的品牌主要是实木颗粒板因为它的防水防潮性能和握钉力好,但是因为它们的环保级别不一样,价格也是不一样的.最好的可以达到欧标EO级它的甲醛释放量<0.05mg/l 橱柜门板的主要种类及特点 门板作为表现橱柜外观和总体质量的主要部分,通常是订购橱柜时首先要考虑的。消费者选择橱柜门板时,应考虑款式造型、颜色搭配、易于清理、抗变形性、防潮、防水性、表面耐磨性及是否环保等决定橱柜质量和使用寿命的几个关键性能指标。随着新材料、新工艺的不断出现,门板种类也越来越丰富。目前市场上常见的橱柜门板主要有以下几种: (1)防火板门板:这种门板通常选用环保E1级大芯板作基材,外贴优质、环保防火板。具有耐磨、耐高温、耐划、易清洁以及不易变形、色泽靓丽等特性,成为门板的常用材料。现在橱柜的箱体板材主要有实木颗粒板和实木多层,大的品牌主要是实木颗粒板因为它的防水...",橱柜用什么板材好,"[101, 3586, 3385, 4500, 784, 720, 3352, 3332, 1962, 102, 4385, 1762, 3586, 3385, 4638, 5056, 860, 3352, 3332, 712, 6206, 3300, 2141, 3312, 7578, 5108, 3352, 1469, 2141, 3312, 1914, 2231, 117, 1920, 4638, 1501, 4277, 712, 6206, 3221, 2141, 3312, 7578, 5108, 3352, 1728, 711, 2124, 4638, 7344, 3717, 7344, 4060, 2595, 5543, 1469, 2995, 7152, 1213, 1962, 117, 852, 3221, 1728, 711, 2124, 812, 4638, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,..."
4,9754ed445e8745360b75665c315804f9,一般来说实木的胡桃木较橡木好。黄金胡桃木又称为：金丝胡桃木；胡桃木易于用手工和机械工具加工。适于敲钉、螺钻和胶合。可以持久保留油漆和染色，可打磨成特殊的最终效果。干燥得很慢，需要小心避免窑中烘干后的降等损失。胡桃木有良好的尺寸稳定性。且花纹特殊，木质名贵，少有家具制造商能够将其特性完美的体现出来。如果有更多问题，可以点击ID咨询。,橡木胡桃木哪个好,"[101, 3583, 3312, 5529, 3425, 3312, 1525, 702, 1962, 102, 671, 5663, 3341, 6432, 2141, 3312, 4638, 5529, 3425, 3312, 6772, 3583, 3312, 1962, 511, 7942, 7032, 5529, 3425, 3312, 1348, 4917, 711, 8038, 7032, 692, 5529, 3425, 3312, 8039, 5529, 3425, 3312, 3211, 754, 4500, 2797, 2339, 1469, 3322, 3462, 2339, 1072, 1217, 2339, 511, 6844, 754, 3145, 7152, 510, 6090, 7183, 1469, 5540, 1394, 511, 1377,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,..."


In [74]:
pd.set_option('max_colwidth',400)

In [72]:
pd.set_option('expand_frame_repr', True)