In [1]:
%load_ext autoreload
%autoreload 2

# Converting a Transformers TensorFlow model to TFLite

https://www.tensorflow.org/lite/guide/signatures

In [25]:
import tensorflow as tf
import numpy as np

from transformers import TFAutoModelForSequenceClassification, TFAutoModel, AutoTokenizer

#MODEL_NAME = "j-hartmann/emotion-english-distilroberta-base"
MODEL_NAME = "bhadresh-savani/distilbert-base-uncased-emotion"
SAVED_MODEL_NAME = "emotion_classification.tflite"
TOKENIZER_NAME = "bert-base-uncased"
TFLITE_OUTPUT = "tflite/bhadresh-output"


def load_vocab(filename, reverse=False):
    data = load_data(filename, reverse=reverse)
    return dict(data)


def load_data(filename, reverse=False):
    def prepare(i, line):
        line = line.strip()
        return (i, line) if reverse else (line, i)
        
    with open(filename, "r") as f:
        lines = f.readlines()
        #lines = list(filter(lambda l: not l.startswith("##"), lines))
        return [prepare(i, line) for i, line in enumerate(lines)]
 

class DataUtil:
    def __init__(self):
        self.token2id = load_vocab("vocab.txt")
        self.id2token = load_vocab("vocab.txt", reverse=True) 
    
    def tokenize(self, text, padding=True, max_len=87):
        tokens = [line.strip().lower() for line in text.split(" ")]
        # Starting
        token_ids = [101]
        count = 1

        for token in tokens:
            if token == 'ive':
                part1 = self.token2id.get("iv")
                part2 = self.token2id.get("##e")
                token_ids += [part1, part2]
                continue
                
            token_id = self.token2id.get(token)
            token_ids.append(token_id)
            count += 1

        token_ids.append(102)
        count +=1

        # Padding
        if padding:
            for _ in range(count, max_len):
                 token_ids.append(0)
        
        return token_ids
    
    def preprocess(self, text, padding=True, max_len=87):
        token_ids = self.tokenize(text, padding=padding, max_len=max_len)
        return np.array([token_ids], dtype=np.int32) # float32 for text_classification.tflite


data_util = DataUtil()

In [26]:
data_util.tokenize("ive been")

[101,
 None,
 None,
 None,
 102,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

# Load Transformers Model (TF)

In [27]:
model = TFAutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

Some layers from the model checkpoint at bhadresh-savani/distilbert-base-uncased-emotion were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/distilbert-base-uncased-emotion and are newly initialized: ['dropout_39']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
model

<transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertForSequenceClassification at 0x130ceaf40>

In [29]:
model.config

DistilBertConfig {
  "_name_or_path": "bhadresh-savani/distilbert-base-uncased-emotion",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "sadness",
    "1": "joy",
    "2": "love",
    "3": "anger",
    "4": "fear",
    "5": "surprise"
  },
  "initializer_range": 0.02,
  "label2id": {
    "anger": 3,
    "fear": 4,
    "joy": 1,
    "love": 2,
    "sadness": 0,
    "surprise": 5
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.19.4",
  "vocab_size": 30522
}

# Load tokenizer

In [30]:
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)

In [31]:
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

In [32]:
from datasets import load_dataset
# emotions = load_dataset("emotion")
emotions = load_dataset("SetFit/emotion")  # emotion is broken right now

Using custom data configuration SetFit--emotion-89147fdf376d67e2
Reusing dataset json (/Users/aj/.cache/huggingface/datasets/SetFit___json/SetFit--emotion-89147fdf376d67e2/0.0.0/da492aad5680612e4028e7f6ddc04b1dfcec4b64db470ed7cc5f2bb265b9b6b5)


  0%|          | 0/3 [00:00<?, ?it/s]

In [33]:
emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)
emotions_encoded.set_format("tf", columns=["input_ids", "attention_mask"])

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

# Test original model

https://huggingface.co/course/chapter2/2?fw=tf#model-heads-making-sense-out-of-numbers

In [34]:
test_id = 100
emotions["train"][test_id]

{'text': 'i wont let me child cry it out because i feel that loving her and lily when she was little was going to be opportunities that only lasted for those short few months',
 'label': 2,
 'label_text': 'love'}

In [35]:
print(emotions["train"][test_id]["text"])

i wont let me child cry it out because i feel that loving her and lily when she was little was going to be opportunities that only lasted for those short few months


In [36]:
test_input_data = emotions_encoded["train"][test_id]

In [37]:
test_input_data

{'input_ids': <tf.Tensor: shape=(87,), dtype=int64, numpy=
 array([ 101, 1045, 2180, 2102, 2292, 2033, 2775, 5390, 2009, 2041, 2138,
        1045, 2514, 2008, 8295, 2014, 1998, 7094, 2043, 2016, 2001, 2210,
        2001, 2183, 2000, 2022, 6695, 2008, 2069, 6354, 2005, 2216, 2460,
        2261, 2706,  102,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0])>,
 'attention_mask': <tf.Tensor: shape=(87,), dtype=int64, numpy=
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [38]:
outputs = model(test_input_data)

In [39]:
emotions["train"][test_id]  # original input data before preprocessing

{'text': 'i wont let me child cry it out because i feel that loving her and lily when she was little was going to be opportunities that only lasted for those short few months',
 'label': 2,
 'label_text': 'love'}

In [40]:
outputs.logits.shape

TensorShape([1, 6])

In [41]:
outputs.logits

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[-1.2143431 , -0.79977876,  5.70928   , -1.9749489 , -1.639312  ,
        -0.99667394]], dtype=float32)>

In [42]:
np.exp(outputs.logits)

array([[2.9690501e-01, 4.4942838e-01, 3.0165381e+02, 1.3876840e-01,
        1.9411354e-01, 3.6910504e-01]], dtype=float32)

In [43]:
predictions = tf.math.softmax(outputs.logits, axis=-1)
print(predictions)

tf.Tensor(
[[9.7955437e-04 1.4827619e-03 9.9522167e-01 4.5782723e-04 6.4042310e-04
  1.2177581e-03]], shape=(1, 6), dtype=float32)


In [44]:
predictions

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[9.7955437e-04, 1.4827619e-03, 9.9522167e-01, 4.5782723e-04,
        6.4042310e-04, 1.2177581e-03]], dtype=float32)>

# Convert and save model as tflite

In [22]:
def convert(model, saved_model_path):
    # Attempt conversion to tflite
    # Set TensorSpec - dynamic shape though
    input_spec = tf.TensorSpec([1, None], tf.int32)
    
    print(input_spec)

    # Save to correct tensor dims
    model._saved_model_inputs_spec = None
    model._set_save_spec(input_spec)

    # Convert
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()

    # Save the model.
    with open(saved_model_path, 'wb') as f:
      f.write(tflite_model)
    
    print(f"Saved to {saved_model_path}")
    
    return tflite_model

In [45]:
SAVED_MODEL_NAME

'emotion_classification.tflite'

In [218]:
tflite_model = convert(model, SAVED_MODEL_NAME)

TensorSpec(shape=(1, None), dtype=tf.int32, name=None)
























INFO:tensorflow:Assets written to: /tmp/tmplv_26gk5/assets


INFO:tensorflow:Assets written to: /tmp/tmplv_26gk5/assets
2023-01-04 21:25:35.553453: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-01-04 21:25:35.553485: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-01-04 21:25:35.553597: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /tmp/tmplv_26gk5
2023-01-04 21:25:35.569344: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2023-01-04 21:25:35.569359: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /tmp/tmplv_26gk5
2023-01-04 21:25:35.651704: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2023-01-04 21:25:35.928288: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /tmp/tmplv_26gk5
2023-01-04 21:25:36.065162: I tensorflow/cc/saved_model/loader.cc:301] SavedModel

Saved to bhadresh-emotion-classifier-output.tflite


In [None]:
# saved_model = tf.saved_model.load(SAVED_MODEL_NAME)
# print(saved_model.signatures)
# print(saved_model.signatures)
# saved_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY].inputs[0]
# concrete_func = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
# new_model = tf.keras.models.load_model("output")
# new_model.summary()
# concrete_func = new_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
# tf.saved_model.save(new_model, "output-new", signatures={"infer": concrete_func})
# converter = tf.lite.TFLiteConverter.from_saved_model("output-new")

"""
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]
converter.experimental_enable_resource_variables = True
tflite_model = converter.convert()
"""


# Test tflite model

In [46]:
from pathlib import Path

def run(interpreter, text): 
    token_ids = data_util.tokenize(text)
    input_ids = np.array([token_ids], dtype=np.int32)
    
    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    print("---INPUT DETAILS----")
    print(input_details)
    print("---OUTPUT DETAILS----")
    print(output_details)
     
    # Resize input shape based on current input
    interpreter.resize_tensor_input(input_details[0]['index'], input_ids.shape)
    interpreter.allocate_tensors()
    interpreter.set_tensor(input_details[0]['index'], input_ids)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    
    return output_data


def load_interpreter(saved_model_path):
    # Load the TFLite model and allocate tensors.
    if Path(saved_model_path).exists():
        print("Loading existing model")
        interpreter = tf.lite.Interpreter(model_path=saved_model_path)
    else:
        print("Converting model")
        interpreter = convert(model)
    return interpreter

In [47]:
interpreter = load_interpreter(SAVED_MODEL_NAME)

Loading existing model


In [59]:
input_data = emotions_encoded["train"][test_id]
input_data.keys()

dict_keys(['input_ids', 'attention_mask'])

In [58]:
# Expected
text = emotions["train"][test_id]["text"]
print(text)
input_data["input_ids"]

i wont let me child cry it out because i feel that loving her and lily when she was little was going to be opportunities that only lasted for those short few months


<tf.Tensor: shape=(87,), dtype=int64, numpy=
array([ 101, 1045, 2180, 2102, 2292, 2033, 2775, 5390, 2009, 2041, 2138,
       1045, 2514, 2008, 8295, 2014, 1998, 7094, 2043, 2016, 2001, 2210,
       2001, 2183, 2000, 2022, 6695, 2008, 2069, 6354, 2005, 2216, 2460,
       2261, 2706,  102,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0])>

In [73]:
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)

In [75]:
text = """Election Day. Terrified. Democracy in peril. Trying not to dwell on it too much. Fascism vs Democracy on the ballot. Don't know if I'll write today. But I will try. Tired. Want to go back to sleep.
"""

In [76]:
encoded = tokenizer(text, padding=True, truncation=True)

In [89]:
len(encoded['input_ids'])

53

In [90]:
actual_input_ids = "101, 2602, 2154, 29625, 10215, 29625, 7072, 1999, 2566, 4014, 29625, 2667, 2025, 2000, 23120, 2006, 2009, 2205, 2172, 29625, 23779, 5443, 7072, 2006, 1996, 10428, 29625, 2123, 29618, 2102, 2113, 2065, 1045, 29618, 3363, 4339, 2651, 29625, 2021, 1045, 2097, 3046, 29625, 5458, 29625, 2215, 2000, 2175, 2067, 2000, 3637, 29625, 102".split(", ")

In [92]:
actual_input_ids = list(map(int, actual_input_ids))

In [93]:
actual_input_ids

[101,
 2602,
 2154,
 29625,
 10215,
 29625,
 7072,
 1999,
 2566,
 4014,
 29625,
 2667,
 2025,
 2000,
 23120,
 2006,
 2009,
 2205,
 2172,
 29625,
 23779,
 5443,
 7072,
 2006,
 1996,
 10428,
 29625,
 2123,
 29618,
 2102,
 2113,
 2065,
 1045,
 29618,
 3363,
 4339,
 2651,
 29625,
 2021,
 1045,
 2097,
 3046,
 29625,
 5458,
 29625,
 2215,
 2000,
 2175,
 2067,
 2000,
 3637,
 29625,
 102]

In [105]:
len(actual_input_ids)

53

In [107]:
for i, v in enumerate(actual_input_ids):
    print(encoded['input_ids'][i], v)

101 101
2602 2602
2154 2154
1012 29625
10215 10215
1012 29625
7072 7072
1999 1999
2566 2566
4014 4014
1012 29625
2667 2667
2025 2025
2000 2000
23120 23120
2006 2006
2009 2009
2205 2205
2172 2172
1012 29625
23779 23779
5443 5443
7072 7072
2006 2006
1996 1996
10428 10428
1012 29625
2123 2123
1005 29618
1056 2102
2113 2113
2065 2065
1045 1045
1005 29618
2222 3363
4339 4339
2651 2651
1012 29625
2021 2021
1045 1045
2097 2097
3046 3046
1012 29625
5458 5458
1012 29625
2215 2215
2000 2000
2175 2175
2067 2067
2000 2000
3637 3637
1012 29625
102 102


In [94]:
input_ids = np.array([actual_input_ids], dtype=np.int32)

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("---INPUT DETAILS----")
print(input_details)
print("---OUTPUT DETAILS----")
print(output_details)

# Resize input shape based on current input
interpreter.resize_tensor_input(input_details[0]['index'], input_ids.shape)
interpreter.allocate_tensors()
interpreter.set_tensor(input_details[0]['index'], input_ids)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])


---INPUT DETAILS----
[{'name': 'serving_default_args_0:0', 'index': 0, 'shape': array([ 1, 53], dtype=int32), 'shape_signature': array([-1, -1], dtype=int32), 'dtype': <class 'numpy.int32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
---OUTPUT DETAILS----
[{'name': 'StatefulPartitionedCall:0', 'index': 720, 'shape': array([1, 6], dtype=int32), 'shape_signature': array([-1,  6], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [95]:
output_data

array([[ 2.489684 , -1.3514287, -3.0598903, -0.2730896,  3.2131891,
        -2.4661255]], dtype=float32)

In [96]:
outputs.logits

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[-1.2143431 , -0.79977876,  5.70928   , -1.9749489 , -1.639312  ,
        -0.99667394]], dtype=float32)>

In [97]:
predictions = tf.math.softmax(output_data, axis=-1)

In [98]:
predictions

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[0.31673995, 0.00680031, 0.00123184, 0.0199915 , 0.65300584,
        0.0022306 ]], dtype=float32)>

In [99]:
predictions[0]

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([0.31673995, 0.00680031, 0.00123184, 0.0199915 , 0.65300584,
       0.0022306 ], dtype=float32)>

In [100]:
predictions_rounded = tf.math.round(predictions[0])
predictions_rounded

<tf.Tensor: shape=(6,), dtype=float32, numpy=array([0., 0., 0., 0., 1., 0.], dtype=float32)>

In [101]:
max_idx = tf.keras.backend.eval(tf.argmax(predictions_rounded, axis=0))

In [102]:
model.config.id2label[max_idx]

'fear'

In [103]:
print(emotions["train"][test_id]['label_text'])

love


# Export additional files

In [32]:
lines = [f"{token} {token_id}" for token, token_id in data_util.token2id.items()] 

In [33]:
"\n".join(lines)



In [34]:
with open("text_classification_vocab.bert.txt", "w+") as f: 
    f.write("\n".join(lines))

In [35]:
read_lines = []

with open("text_classification_vocab.bert.txt", "r") as f:
    read_lines = [line.strip().split(" ") for line in f.readlines()]

In [36]:
vocab = dict(list(map(lambda x: (x[0], int(x[1])), read_lines)))

In [7]:
vocab['didn']

2134

In [8]:
"test" + "1"

'test1'

In [10]:
text = "i didn't feel humiliated"

In [11]:
word = 'didnt'

In [12]:
def wordpiece(text):
    tokens = text.split(" ")
    input_ids = []
    for token in tokens:
        # Find biggest subword
        subword = ""
        for char in token: 
            if subword in vocab:
                print(subword)
                input_ids.append(vocab[subword])
                break
            
            if subword not in vocab:
                subword = subword + char
                
    return input_ids

In [13]:
wordpiece(text)

d
f
h


[1040, 1042, 1044]

In [14]:
'Test'[:-1]

'Tes'

In [37]:
# WRONG
def wordpiece(text):
    tokens = text.split(" ")
    input_ids = []
    for token in tokens:
        subword = ""
        
        for char in token: 
            subword = subword + char
        
        input_ids.append(subword)
            
    return input_ids

In [38]:
# WRONG
word = "didnt"
subword = word
max_idx = len(word) - 1
min_idx = -1

for i in range(max_idx, min_idx, -1):
    if subword not in vocab:
        subword = subword[:-1]
    
print(subword)

didn


# Implement WordPiece

https://huggingface.co/course/chapter6/6?fw=pt#tokenization-algorithm

In [39]:
text = emotions["train"][test_id]["text"]

NameError: name 'emotions' is not defined

In [40]:
text

NameError: name 'text' is not defined

In [8]:
def wordpiece(text):
    words = text.split(" ") 
    tokens = []
    for word in words:
        
        # Find biggest subword
        prefix = word 
        suffix = ""
        
        for i in range(len(word)-1, -1, -1):
            if prefix not in vocab: 
                suffix = suffix + prefix[-1]
                prefix = prefix[:-1]
                
        tokens.append(prefix)
        
        # Find subwords after  
        print(suffix)
        
    return tokens

In [139]:
wordpiece("")

m'


['i']

In [140]:
wordpiece("did")




['did']

In [54]:
# CORRECT From huggingface - wordpiece
def encode_word(word):
    tokens = []
    word = word.lower()
    while len(word) > 0:
        i = len(word)
        while i > 0 and word[:i] not in vocab:
            i -= 1
        if i == 0:
            return ["[UNK]"]
        tokens.append(word[:i])
        word = word[i:]
        if len(word) > 0:
            word = f"##{word}"
    return tokens

In [46]:
encode_word("I'm")

['i', "##'", '##m']

In [49]:
text = "I'm feeling a bit down to be quite honest."

In [50]:
for word in text.split(" "):
    print(encode_word(word))

['i', "##'", '##m']
['feeling']
['a']
['bit']
['down']
['to']
['be']
['quite']
['honest', '##.']


In [105]:
test_input_data["input_ids"]

<tf.Tensor: shape=(87,), dtype=int64, numpy=
array([  101,  1045,  2514,  5186, 23637,   102,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0])>

In [109]:
word = "didnt"
word[:len(word)]

'didnt'

In [110]:
word[1:]

'idnt'