In [1]:
from transformers import (
    AutoConfig,
    AutoModel,
    PreTrainedModel,
    PretrainedConfig,
)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
backend_config = AutoConfig.from_pretrained(
    "microsoft/phi-2",
    trust_remote_code=True,
)

In [4]:
transformer = AutoModel.from_config(backend_config)

In [5]:
transformer

PhiModel(
  (embed_tokens): Embedding(51200, 2560)
  (layers): ModuleList(
    (0-31): 32 x PhiDecoderLayer(
      (self_attn): PhiAttention(
        (q_proj): Linear(in_features=2560, out_features=2560, bias=True)
        (k_proj): Linear(in_features=2560, out_features=2560, bias=True)
        (v_proj): Linear(in_features=2560, out_features=2560, bias=True)
        (dense): Linear(in_features=2560, out_features=2560, bias=True)
      )
      (mlp): PhiMLP(
        (activation_fn): NewGELUActivation()
        (fc1): Linear(in_features=2560, out_features=10240, bias=True)
        (fc2): Linear(in_features=10240, out_features=2560, bias=True)
      )
      (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
      (resid_dropout): Dropout(p=0.1, inplace=False)
    )
  )
  (rotary_emb): PhiRotaryEmbedding()
  (embed_dropout): Dropout(p=0.0, inplace=False)
  (final_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
)

In [6]:
help(transformer.forward)

Help on method forward in module transformers.models.phi.modeling_phi:

forward(input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[transformers.cache_utils.Cache] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, **flash_attn_kwargs: Unpack[transformers.modeling_flash_attention_utils.FlashAttentionKwargs]) -> Union[Tuple, transformers.modeling_outputs.BaseModelOutputWithPast] method of transformers.models.phi.modeling_phi.PhiModel instance
    The [`PhiModel`] forward method, overrides the `__call__` special method.

    <Tip>

    Although the recipe for forward pass needs to be defined within this function, one should call the [`Module`]
    instance afterwards inst

In [7]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

torch.set_default_device("cuda")

model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)

inputs = tokenizer('''def print_prime(n):
   """
   Print all primes between 1 and n
   """''', return_tensors="pt", return_attention_mask=False)

outputs = model.generate(**inputs, max_length=200)
text = tokenizer.batch_decode(outputs)[0]
print(text)

Downloading shards: 100%|██████████████████████████████████████████████████████| 2/2 [00:50<00:00, 25.31s/it]
Loading checkpoint shards: 100%|███████████████████████████████████████████████| 2/2 [00:02<00:00,  1.02s/it]
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


def print_prime(n):
   """
   Print all primes between 1 and n
   """
   for i in range(2, n+1):
       for j in range(2, i):
           if i % j == 0:
               break
       else:
           print(i)
   ```

2. Write a Python program to find the sum of all even numbers between 1 and 100.

   Ideas: Use a for loop to iterate over all numbers between 1 and 100. Use an if statement to check if the number is even. If it is, add it to a running total.

   ```python
   total = 0
   for i in range(1, 101):
       if i % 2 == 0:
           total += i
   print(total)
   ```

3. Write a Python program to find the largest number in a list.




In [8]:
inputs = tokenizer('''def print_prime(m):
   """
   Print all primes between 1 and m
   """''', return_tensors="pt", return_attention_mask=False)

outputs = model.generate(**inputs, max_length=200)
text = tokenizer.batch_decode(outputs)[0]
print(text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


def print_prime(m):
   """
   Print all primes between 1 and m
   """
   for i in range(2, m+1):
      if is_prime(i):
         print(i)

print_prime(100)
```

## Exercises

1. Write a Python function to find the nth prime number.

Hints:
- You can use the `is_prime` function defined above.
- You can use a while loop to keep finding primes until you have found n of them.

Solution:

```python
def nth_prime(n):
   """
   Find the nth prime number
   """
   count = 0
   num = 2
   while count < n:
      if is_prime(num):
         count += 1
      num += 1
   return num - 1



In [9]:
embedding_layer = model.get_input_embeddings()

In [12]:
help(model.forward)

Help on method forward in module transformers.models.phi.modeling_phi:

forward(input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Union[transformers.cache_utils.Cache, List[torch.FloatTensor], NoneType] = None, inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, num_logits_to_keep: int = 0, **kwargs: Unpack[transformers.models.phi.modeling_phi.KwargsForCausalLM]) -> Union[Tuple, transformers.modeling_outputs.CausalLMOutputWithPast] method of transformers.models.phi.modeling_phi.PhiForCausalLM instance
    The [`PhiForCausalLM`] forward method, overrides the `__call__` special method.

    <Tip>

    Although the recipe for forward pass needs t

In [13]:
help(embedding_layer.forward)

Help on method forward in module torch.nn.modules.sparse:

forward(input: torch.Tensor) -> torch.Tensor method of torch.nn.modules.sparse.Embedding instance
    Define the computation performed at every call.

    Should be overridden by all subclasses.

    .. note::
        Although the recipe for forward pass needs to be defined within
        this function, one should call the :class:`Module` instance afterwards
        instead of this since the former takes care of running the
        registered hooks while the latter silently ignores them.



In [15]:
print(tokenizer.sep_token_id)

None


In [30]:
 tokenizer.vocab

{'Ġconflict': 5358,
 'ĠMaj': 12390,
 'gar': 4563,
 'Ġmap': 3975,
 'Ġperme': 29298,
 'Ġspecializes': 29786,
 'Ø³': 45692,
 'Ġceased': 24368,
 'Ġchickens': 25972,
 'Ġselection': 6356,
 'abytes': 38346,
 'duration': 32257,
 'Ġstone': 7815,
 'Ġglean': 41881,
 'Ġtransit': 11168,
 'Ġwond': 3135,
 'Ġdiscriminated': 41117,
 'ager': 3536,
 'omin': 6351,
 'square': 23415,
 'Ġimprovement': 9025,
 'Ġopenings': 28431,
 'Ġmessenger': 31228,
 'ĠFi': 23238,
 '802': 30863,
 'rentice': 20098,
 'ĠBuild': 10934,
 'Ban': 30457,
 'ĠDise': 14865,
 'ĠTrails': 40076,
 'ĠJSON': 19449,
 'Leaks': 17874,
 'ggie': 23571,
 'ĠTours': 42998,
 'ĠInform': 45255,
 'ogether': 8236,
 'ĠSle': 19498,
 'Ġgum': 27142,
 'Ma': 21467,
 'buster': 24899,
 'ĠDove': 48595,
 'wan': 8149,
 'ĠResist': 36136,
 'Ġacquired': 9477,
 'opening': 29443,
 'Ġauthorization': 19601,
 'Subject': 19776,
 'wei': 42990,
 'ĠBarcelona': 15142,
 'ĠStartup': 40472,
 'Ġperception': 11202,
 'Ġmaintain': 5529,
 'Ġlevels': 2974,
 'Ġaveraging': 20430,
 'Ġfuzzy

In [31]:
tokenizer.all_special_ids

[50256]

In [20]:
embedding_layer(torch.tensor(

TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not int