# pictures read and normalize

In [1]:
import imageio

In [2]:
img_arr = imageio.imread('../DL with PyTorch/data/bobby.jpg')

In [3]:
img_arr.shape # numpy array-like object, H*W*C, respectivel

(720, 1280, 3)

In [4]:
#change it into C*H*W
import torch
img = torch.from_numpy(img_arr)
out = img.permute(2,0,1)

In [6]:
batch_size = 3
batch = torch.zeros(batch_size, 3, 256, 256, dtype = torch.uint8)

## read multiple pics

In [8]:
import os
data_dir = '../DL with PyTorch/data/image-cats'
filenames = [name for name in os.listdir(data_dir) if os.path.splitext(name)[-1]=='.png']

for i, filename in enumerate(filenames):
    img_arr = imageio.imread(os.path.join(data_dir, filename))
    img_t = torch.from_numpy(img_arr)
    img_t = img_t.permute(2,0,1)
    img_t = img_t[:3] #flit the alpha channel
    batch[i] = img_t

## normalize pics

In [9]:
batch = batch.float()
batch /= 255.0

In [10]:
n_channels = batch.shape[1]
for c in range(n_channels):
    mean = torch.mean(batch[:,c])
    std = torch.std(batch[:,c])
    batch[:,c] = (batch[:,c]-mean)/std

In [16]:
from torchvision import transforms
for i in range(batch.shape[0]):
    out_t = batch[i]
    out_img = transforms.ToPILImage()(out_t)
    out_img.show()
    print(out_img)

<PIL.Image.Image image mode=RGB size=256x256 at 0x17970142A08>
<PIL.Image.Image image mode=RGB size=256x256 at 0x1796A3AB648>
<PIL.Image.Image image mode=RGB size=256x256 at 0x17970142B48>


In [20]:
for i in range(batch.shape[0]):
    print(batch[i].shape)
    torch.save(batch[i], '../DL with PyTorch/data/clean-image-cats.t')

torch.Size([3, 256, 256])
torch.Size([3, 256, 256])
torch.Size([3, 256, 256])


In [18]:
p_load = torch.load('../DL with PyTorch/outpoints.t')

In [19]:
p_load.shape

torch.Size([3, 4])

# 3D images:Volumetric data

In [22]:
#in medical case, the images are 5D: N*C*D*H*W, D=depth
dir_path = 'D:\jupyter_path\dlwpt-code-master\data\p1ch4\\volumetric-dicom\\2-LUNG 3.0  B70f-04083'
vol_arr = imageio.volread(dir_path, 'DICOM')

Reading DICOM (examining files): 1/99 files (1.0%5/99 files (5.1%10/99 files (10.1%16/99 files (16.2%22/99 files (22.2%28/99 files (28.3%35/99 files (35.4%43/99 files (43.4%50/99 files (50.5%58/99 files (58.6%67/99 files (67.7%73/99 files (73.7%79/99 files (79.8%84/99 files (84.8%92/99 files (92.9%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 17/99  (17.228/99  (28.341/99  (41.448/99  (48.559/99  (59.667/99  (67.778/99  (78.884/99  (84.891/99  (91.999/99  (100.0%99/99  (100.0%)


In [25]:

vol = torch.from_numpy(vol_arr).float()
print(vol.shape)
vol = torch.unsqueeze(vol, 0)

torch.Size([99, 512, 512])


In [26]:
vol.shape

torch.Size([1, 99, 512, 512])

# CSV Data: Wine Prediction

## 首先用np.loadtxt读入除第一行以外的所有数据

In [7]:
import csv
import numpy as np
wine_path = '../dlwpt-code-master/data/p1ch4/tabular-wine/winequality-white.csv'
wine_np = np.loadtxt(wine_path, dtype=np.float32, delimiter=';', skiprows= 1)

In [8]:
wine_np[:5], wine_np.shape

(array([[7.000e+00, 2.700e-01, 3.600e-01, 2.070e+01, 4.500e-02, 4.500e+01,
         1.700e+02, 1.001e+00, 3.000e+00, 4.500e-01, 8.800e+00, 6.000e+00],
        [6.300e+00, 3.000e-01, 3.400e-01, 1.600e+00, 4.900e-02, 1.400e+01,
         1.320e+02, 9.940e-01, 3.300e+00, 4.900e-01, 9.500e+00, 6.000e+00],
        [8.100e+00, 2.800e-01, 4.000e-01, 6.900e+00, 5.000e-02, 3.000e+01,
         9.700e+01, 9.951e-01, 3.260e+00, 4.400e-01, 1.010e+01, 6.000e+00],
        [7.200e+00, 2.300e-01, 3.200e-01, 8.500e+00, 5.800e-02, 4.700e+01,
         1.860e+02, 9.956e-01, 3.190e+00, 4.000e-01, 9.900e+00, 6.000e+00],
        [7.200e+00, 2.300e-01, 3.200e-01, 8.500e+00, 5.800e-02, 4.700e+01,
         1.860e+02, 9.956e-01, 3.190e+00, 4.000e-01, 9.900e+00, 6.000e+00]],
       dtype=float32), (4898, 12))

## 然后读入第一行的标题

In [9]:
col_list = next(csv.reader(open(wine_path), delimiter=';'))

In [10]:
col_list

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality']

In [11]:
import torch
wine_tensor = torch.from_numpy(wine_np)

In [12]:
wine_tensor.shape, wine_tensor.dtype

(torch.Size([4898, 12]), torch.float32)

In [13]:
#提取数据
data = wine_tensor[:,:-1]

In [14]:
data[:5],data.shape

(tensor([[7.0000e+00, 2.7000e-01, 3.6000e-01, 2.0700e+01, 4.5000e-02, 4.5000e+01,
          1.7000e+02, 1.0010e+00, 3.0000e+00, 4.5000e-01, 8.8000e+00],
         [6.3000e+00, 3.0000e-01, 3.4000e-01, 1.6000e+00, 4.9000e-02, 1.4000e+01,
          1.3200e+02, 9.9400e-01, 3.3000e+00, 4.9000e-01, 9.5000e+00],
         [8.1000e+00, 2.8000e-01, 4.0000e-01, 6.9000e+00, 5.0000e-02, 3.0000e+01,
          9.7000e+01, 9.9510e-01, 3.2600e+00, 4.4000e-01, 1.0100e+01],
         [7.2000e+00, 2.3000e-01, 3.2000e-01, 8.5000e+00, 5.8000e-02, 4.7000e+01,
          1.8600e+02, 9.9560e-01, 3.1900e+00, 4.0000e-01, 9.9000e+00],
         [7.2000e+00, 2.3000e-01, 3.2000e-01, 8.5000e+00, 5.8000e-02, 4.7000e+01,
          1.8600e+02, 9.9560e-01, 3.1900e+00, 4.0000e-01, 9.9000e+00]]),
 torch.Size([4898, 11]))

In [18]:
#提取标签，法1
target = wine_tensor[:,-1].long()

In [19]:
target[:5],target.shape

(tensor([6, 6, 6, 6, 6]), torch.Size([4898]))

In [24]:
#提取标签，法2：one-hot
target_onehot = torch.zeros(target.shape[0], 10)
target_onehot.scatter_(1, target.unsqueeze(1), 1.0) #for each row, take the index of the target label and use it as the column index to set the value 1.0

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [25]:
data_mean = data.mean(dim=0)

In [27]:
data_mean

tensor([6.8548e+00, 2.7824e-01, 3.3419e-01, 6.3914e+00, 4.5772e-02, 3.5308e+01,
        1.3836e+02, 9.9403e-01, 3.1883e+00, 4.8985e-01, 1.0514e+01])

In [28]:
data_var = data.var(dim=0)
data_var

tensor([7.1211e-01, 1.0160e-02, 1.4646e-02, 2.5726e+01, 4.7733e-04, 2.8924e+02,
        1.8061e+03, 8.9455e-06, 2.2801e-02, 1.3025e-02, 1.5144e+00])

In [29]:
data_normalized = (data - data_mean)/data_var

## 一个简单的方法，来提取所有分数<3的样本

In [30]:
bad_indexes = target<=3
bad_indexes.shape, bad_indexes.dtype, bad_indexes.sum()

(torch.Size([4898]), torch.bool, tensor(20))

In [31]:
bad_data = data[bad_indexes]
bad_data.shape

torch.Size([20, 11])

In [33]:
bad_mean = bad_data.mean(dim=1)
mid_data = data[(target>3) & (target<7)]
good_data = data[target>=7]

In [34]:
mid_mean = mid_data.mean(dim=1)
good_mean = good_data.mean(dim=1)

In [36]:
for i, args in enumerate(zip(col_list, bad_mean, mid_mean, good_mean)):
    print('{:2} {:20}{:6.2f}{:6.2f}{:6.2f}'.format(i, *args))

 0 fixed acidity        25.23 23.33 19.74
 1 volatile acidity     12.79 15.35 14.99
 2 citric acid          21.89 14.32 11.83
 3 residual sugar       10.63 23.98 11.83
 4 chlorides             4.87 23.98 13.32
 5 free sulfur dioxide   5.83 14.32 17.06
 6 total sulfur dioxide 26.69 17.63 16.18
 7 density              12.83 23.33 20.82
 8 pH                   40.25 15.35 17.02
 9 sulphates            15.31 16.63 11.50
10 alcohol              32.53  9.16 16.51
11 quality              43.59 13.98 13.73


In [37]:
total_sulfur_threshold = 141.83
total_sulfur_data = data[:,6]
predicted_idx = torch.lt(total_sulfur_data, total_sulfur_threshold)
predicted_idx.shape, predicted_idx.sum()

(torch.Size([4898]), tensor(2727))

In [38]:
actual_idx = target>5
actual_idx.shape, actual_idx.sum()

(torch.Size([4898]), tensor(3258))

In [40]:
n_matches = torch.sum(actual_idx & predicted_idx).item()
n_pre = torch.sum(predicted_idx).item()
n_actual = torch.sum(actual_idx).item()

In [41]:
n_matches, n_matches/n_pre

(2018, 0.74000733406674)

# Time Series Data

In [42]:
bikes_addr = '../dlwpt-code-master/data/p1ch4/bike-sharing-dataset/hour-fixed.csv'
bikes_np = np.loadtxt(bikes_addr, delimiter=',', skiprows=1, dtype=np.float32, converters={1:lambda x:float(x[8:10])})

In [43]:
bikes_tensor = torch.from_numpy(bikes_np)

In [44]:
bikes_tensor[:5]

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
         16.0000],
        [ 2.0000,  1.0000,  1.0000,  0.0000,  1.0000,  1.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2200,  0.2727,  0.8000,  0.0000,  8.0000, 32.0000,
         40.0000],
        [ 3.0000,  1.0000,  1.0000,  0.0000,  1.0000,  2.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2200,  0.2727,  0.8000,  0.0000,  5.0000, 27.0000,
         32.0000],
        [ 4.0000,  1.0000,  1.0000,  0.0000,  1.0000,  3.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.7500,  0.0000,  3.0000, 10.0000,
         13.0000],
        [ 5.0000,  1.0000,  1.0000,  0.0000,  1.0000,  4.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.7500,  0.0000,  0.0000,  1.0000,
          1.0000]])

In [45]:
## break the data into N*C*L, N: number of samples, C: number of channel, L: length of a channel
# in this case, we observe the bike hours, so L may 24, C from the original dada, which is 17
bikes_tensor.shape, bikes_tensor.stride()

(torch.Size([17520, 17]), (17, 1))

In [50]:
daily_bikes = bikes_tensor.view(-1, 24, bikes_tensor.shape[1])
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 24, 17]), (408, 17, 1))

In [55]:
daily_bikes = daily_bikes.transpose(1,2) # to make it N*C*L

In [60]:
first_day = bikes_tensor[:24].long()
weather_onehot = torch.zeros(first_day.shape[0], 4)
first_day[:,9]

tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2])

In [62]:
weather_onehot.scatter_(dim=1, index = first_day[:,9].unsqueeze(1).long()-1, value = 1.0)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [64]:
torch.cat((bikes_tensor[:24], weather_onehot), 1)[:1]

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
         16.0000,  1.0000,  0.0000,  0.0000,  0.0000]])

In [65]:
daily_weather_onehot = torch.zeros(730, 4, 24)

In [66]:
daily_weather_onehot.scatter_(1, daily_bikes[:,9].long().unsqueeze(1)-1, 1.0)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [67]:
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim = 1)

# Text Representing

In [2]:
import torch
with open('../dlwpt-code-master/data/p1ch4/jane-austen/1342-0.txt', encoding='utf-8') as f:
    text = f.read()

## split text into lines

In [4]:
lines = text.split('\n')
line = lines[200]
line

'“Impossible, Mr. Bennet, impossible, when I am not acquainted with him'

In [5]:
letter_t = torch.zeros(len(line), 128)
letter_t.shape

torch.Size([70, 128])

In [6]:
for i, letter in enumerate(line.lower().strip()):
    letter_idx = ord(letter) if ord(letter)<128 else 0
    letter_t[i][letter_idx] = 1

In [7]:
letter_t

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [14]:
# 定义一个预处理函数
def clean_words(input_str):
    punc = '.,;:“”''""!?_-'
    word_list = input_str.lower().replace('\n',' ').split()
    word_list = [word.strip(punc) for word in word_list]
    return word_list

In [11]:
words_in_line = clean_words(line)
line, words_in_line

('“Impossible, Mr. Bennet, impossible, when I am not acquainted with him',
 ['impossible',
  'mr',
  'bennet',
  'impossible',
  'when',
  'i',
  'am',
  'not',
  'acquainted',
  'with',
  'him'])

## 建造包含每个单词的字典，用于One-hot编码

In [15]:
word_list = sorted(set(clean_words(text)))

In [16]:

word2index_dict = {word:i for (i, word) in enumerate(word_list)}
len(word2index_dict), word2index_dict['impossible']

(7261, 3394)

In [18]:
word_t = torch.zeros(len(words_in_line), len(word2index_dict))
for i, word in enumerate(words_in_line):
    idx = word2index_dict[word]
    word_t[i][idx] = 1
    print('{:2} {:4} {}'.format(i, idx, word))

 0 3394 impossible
 1 4305 mr
 2  813 bennet
 3 3394 impossible
 4 7078 when
 5 3315 i
 6  415 am
 7 4436 not
 8  239 acquainted
 9 7148 with
10 3215 him


In [19]:
'''
使用单词表示和使用字母表示句子是常用的两种方法
者表达的含义更丰富，而后者需要考虑的类别少得多，也不需要考虑新单词的维护
下面介绍embedding
'''

'\n使用单词表示和使用字母表示句子是常用的两种方法\n者表达的含义更丰富，而后者需要考虑的类别少得多，也不需要考虑新单词的维护\n下面介绍embedding\n'

## embeding表示法

In [20]:
#用不同的浮点数来表示每个单词的权重
#在相似上下文中使用的单词应具有较近的距离
#一般用one-hot表示单词后，再用神经网络计算embedding
下略

NameError: name '下略' is not defined

# 视频文件的处理

In [21]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, threshold= 50)

When it comes to the shape of tensors, video data can be seen as equivalent to volumetric data, with depth replaced by the time dimension. The result is again a 5D tensor with shape N x C x T x H x W.

There are several formats for video, especially geared towards compression by exploiting redundancies in space and time. Luckily for us, imageio reads video data as well. Suppose we'd like to retain 100 consecutive frames in our 512 x 512 RBG video for classifying an action using a convolutional neural network. We first create a reader instance for the video, that will allow us to get information about the video and iterate over the frames in time. Let's see what the meta data for the video looks like:

In [33]:
import imageio

reader = imageio.get_reader('../dlwpt-code-master/data/p1ch4/video-cockatoo/cockatoo.mp4')
meta = reader.get_meta_data()
meta

{'plugin': 'ffmpeg',
 'nframes': inf,
 'ffmpeg_version': '4.2.2 built with gcc 9.2.1 (GCC) 20200122',
 'codec': 'h264',
 'pix_fmt': 'yuv444p',
 'fps': 20.0,
 'source_size': (1280, 720),
 'size': (1280, 720),
 'duration': 14.0}

In [38]:
#so we can initialze an appropriate size tensor to hold it
n_channels = 3
n_frames = 280
video = torch.empty(n_channels, n_frames, 1280, 720)
video.shape

torch.Size([3, 280, 1280, 720])

In [39]:
for i, per_frame in enumerate(reader):
    frame = torch.from_numpy(per_frame).float()
    
    video[:,i] = frame.transpose(0, 2)

# 音频文件的处理

In [41]:
import scipy.io.wavfile as wavfile
freq, wavform_addr = wavfile.read('../dlwpt-code-master/data/p1ch4/audio-chirp/1-100038-A-14.wav')
freq, wavform_addr

(44100, array([ -388, -3387, -4634, ...,  2289,  1327,    90], dtype=int16))

In [44]:
wavform_tensor = torch.from_numpy(wavform_addr).float()
wavform_tensor, wavform_tensor.shape

(tensor([ -388., -3387.,  ...,  1327.,    90.]), torch.Size([220500]))

In [45]:
from scipy import signal
f_arr, t_arr, sp_arr =signal.spectrogram(wavform_addr, freq)
sp_moto = torch.from_numpy(sp_arr)
sp_moto

tensor([[4.3517e+00, 1.4044e+00,  ..., 8.4661e-03, 7.7487e+00],
        [4.4579e+01, 3.3186e+00,  ..., 1.2889e+01, 1.6912e+01],
        ...,
        [2.1429e-06, 1.4071e-06,  ..., 1.8314e-06, 1.0062e-06],
        [5.5598e-06, 3.1114e-06,  ..., 6.3296e-07, 2.6184e-06]])

# exercise 1

In [None]:
import os
data_dir = '../DL with PyTorch/data/image-cats'
filenames = [name for name in os.listdir(data_dir) if os.path.splitext(name)[-1]=='.png']

for i, filename in enumerate(filenames):
    img_arr = imageio.imread(os.path.join(data_dir, filename))
    img_t = torch.from_numpy(img_arr)
    img_t = img_t.permute(2,0,1)
    img_t = img_t[:3] #flit the alpha channel
    batch[i] = img_t

In [46]:
import os 
data_dir = '../dlwpt-code-master/data/p1ch4/exercise'
filenames = [name for name in os.listdir(data_dir) if os.path.splitext(name)[-1]=='.jpg']

In [64]:
red_arr = imageio.imread(data_dir+'/red.jpg')
red_tensor = torch.from_numpy(red_arr)
red_tensor = red_tensor.permute(2, 0, 1)
red_tensor = red_tensor.float()
red_tensor.shape

torch.Size([3, 714, 500])

In [65]:
green_arr = imageio.imread(data_dir+'/green.jpg')
green_tensor = torch.from_numpy(green_arr)
green_tensor = green_tensor.permute(2, 0, 1)
green_tensor = green_tensor.float()
green_tensor.shape

torch.Size([3, 333, 500])

In [66]:
blue_arr = imageio.imread(data_dir+'/blue.jpg')
blue_tensor = torch.from_numpy(blue_arr)
blue_tensor = blue_tensor.permute(2, 0, 1)
blue_tensor = blue_tensor.float()
blue_tensor.shape

torch.Size([3, 350, 500])

In [67]:
red_tensor.mean(), blue_tensor.mean(), green_tensor.mean()

(tensor(92.7173), tensor(148.0162), tensor(50.8236))

In [69]:
for i in range(3):
    print(red_tensor[i].mean())

tensor(195.0812)
tensor(43.1465)
tensor(39.9243)


In [70]:
for i in range(3):
    print(blue_tensor[i].mean())

tensor(77.7464)
tensor(157.5766)
tensor(208.7257)


In [71]:
for i in range(3):
    print(green_tensor[i].mean())

tensor(25.3634)
tensor(114.2312)
tensor(12.8762)


# exercise 2

In [72]:
with open('../dlwpt-code-master/p2ch13_explore_data.ipynb', encoding='utf-8') as f:
    text = f.read()

In [87]:
lines = text.split('\n')
line = lines[34]
line

'    "from p2ch13.vis import build2dLungMask\\n",'

In [88]:
letter_t = torch.zeros(len(line), 128)
letter_t.shape

torch.Size([47, 128])

In [98]:
for i, letter in enumerate(line.lower().strip()):
    letter_idx = ord(letter) if ord(letter)<128 else 0
    print(letter_idx)
    letter_t[i][letter_idx] = 1

34
102
114
111
109
32
112
50
99
104
49
51
46
118
105
115
32
105
109
112
111
114
116
32
98
117
105
108
100
50
100
108
117
110
103
109
97
115
107
92
110
34
44


In [99]:
letter_t

tensor([[0., 0.,  ..., 0., 0.],
        [0., 0.,  ..., 0., 0.],
        ...,
        [0., 0.,  ..., 0., 0.],
        [0., 0.,  ..., 0., 0.]])

In [102]:
# 定义一个预处理函数
def clean_words(input_str):
    punc = '.。,;:“”''""!?_\-'
    word_list = input_str.lower().replace('\n',' ').split()
    word_list = [word.strip(punc) for word in word_list]
    return word_list

In [103]:
words_in_line = clean_words(line)
line, words_in_line

('    "from p2ch13.vis import build2dLungMask\\n",',
 ['from', 'p2ch13.vis', 'import', 'build2dlungmask\\n'])

In [83]:
word_list = sorted(set(clean_words(text)))

In [93]:
word2index_dict = {word:i for (i, word) in enumerate(word_list)}
len(word2index_dict), word2index_dict['data']

(496, 222)

In [94]:
word_t = torch.zeros(len(words_in_line), len(word2index_dict))
for i, word in enumerate(words_in_line):
    idx = word2index_dict[word]
    word_t[i][idx] = 1
    print('{:2} {:4} {}'.format(i, idx, word))

 0  251 from
 1  365 p2ch13.vis
 2  261 import
 3  167 build2dlungmask\n
