# Installing dependencies

In [1]:
# we use the latest version of transformers, peft, and accelerate
!pip install -q accelerate peft transformers

# install bitsandbytes for quantization
!pip install -q bitsandbytes

# install trl for the SFT library
!pip install -q trl

# we need sentencepiece for the llama2 slow tokenizer
!pip install sentencepiece

# we need einops, used by falcon-7b, llama-2 etc
# einops (einsteinops) is used to simplify tensorops by making them readable
!pip install -q -U einops

# we need to install datasets for our training dataset
!pip install -q datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.2/102.2 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.0/225.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.0/102.0 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━

# Loading model Mistral 7b and dataset

In [4]:
# The model that we want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

# The instruction dataset to use found on HuggingFace
dataset_name = "KonradSzafer/stackoverflow_python_preprocessed"

# Fine-tuned model name
new_model = "Mistral-7B-Stackoverflow"
output_dir = "./results"

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    logging,
)

# load the quantized settings, we're doing 4 bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    # use the gpu
    device_map={"": 0}
)

# don't use the cache
model.config.use_cache = False

# Load the tokenizer from the model (llama2)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

# Testing the base model

In [6]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)


prompt = "If you're writing a library, or an app, where do the unit test files go? It's nice to separate the test files from the main app code, but it's awkward to put them into a 'tests' subdirectory inside of the app root directory, because it makes it harder to import the modules that you'll be testing. Is there a best practice here?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] If you're writing a library, or an app, where do the unit test files go? It's nice to separate the test files from the main app code, but it's awkward to put them into a 'tests' subdirectory inside of the app root directory, because it makes it harder to import the modules that you'll be testing. Is there a best practice here? [/INST] In software development, it's common to keep unit tests separate from the main application code for organization, maintainability, and testability reasons. However, you're correct that placing unit tests inside a 'tests' subdirectory within the app root directory can make importing the modules being tested more difficult.

Instead, many developers prefer to place the unit tests in a separate directory at the same level as the application code. This approach is known as the "flat test directory" structure. This way, you can import the modules being tested directly without


# Fine tuning the model

In [7]:
from datasets import load_dataset
from datasets import Dataset
# Load the dataset
dataset = load_dataset(dataset_name, split="train[:60%]")

df = dataset.to_pandas()

# Create the new 'text' column by concatenating the formatted text
df['text'] = '<s>[INST] ' + df['question'] + ' [/INST] (' + df['answer'] + ') </s>'

# Keep only the 'text' column in the new dataset
new_df = df[['text']]
# Convert DataFrame to dataset
new_df = Dataset.from_pandas(new_df)

Downloading readme:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.94M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3296 [00:00<?, ? examples/s]

In [8]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)
num_train_epochs = 5
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    optim="adamw",
    save_steps=0,
    logging_steps=10,
    learning_rate=2e-3,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=new_df,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=100,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)


Map:   0%|          | 0/1978 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


{'loss': 3.2737, 'grad_norm': 0.8189225792884827, 'learning_rate': 0.0002666666666666667, 'epoch': 0.04}
{'loss': 2.527, 'grad_norm': 0.5640870332717896, 'learning_rate': 0.0005333333333333334, 'epoch': 0.08}
{'loss': 2.0549, 'grad_norm': 0.6681349873542786, 'learning_rate': 0.0008, 'epoch': 0.12}
{'loss': 1.9977, 'grad_norm': 0.5973101258277893, 'learning_rate': 0.0010666666666666667, 'epoch': 0.16}
{'loss': 1.9318, 'grad_norm': 0.9221495985984802, 'learning_rate': 0.0013333333333333333, 'epoch': 0.2}
{'loss': 1.9408, 'grad_norm': 0.9488720893859863, 'learning_rate': 0.0016, 'epoch': 0.24}
{'loss': 1.9349, 'grad_norm': 0.9293599724769592, 'learning_rate': 0.0018666666666666666, 'epoch': 0.28}
{'loss': 2.5969, 'grad_norm': 1.4373939037322998, 'learning_rate': 0.0019999784921417227, 'epoch': 0.32}
{'loss': 2.0387, 'grad_norm': 1.4310860633850098, 'learning_rate': 0.00199980643482652, 'epoch': 0.36}
{'loss': 1.8082, 'grad_norm': 1.2725275754928589, 'learning_rate': 0.001999462349800471, 

# Testing the fine tuned model

In [11]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "I have a multi-line string literal that I want to do an operation on each line, like so: inputString = '''Line 1 Line 2 Line 3''' I want to do something like the following: for line in inputString: doStuff()"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=300)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I have a multi-line string literal that I want to do an operation on each line, like so: inputString = '''Line 1 Line 2 Line 3''' I want to do something like the following: for line in inputString: doStuff() [/INST] (Line 1, Line 2, Line 3]
Doing this with Python is much slower than using string.split() since it requires converting the line into a list first.
Here is the code:
for line in inputString:
    doStuff()

Doing this requires me to build the line into a list first before performing the operations.

The advantage is that it allows me to have lines that are longer than the actual string length.

Note that this does not impact the overall performance of the program as it is run within the interpreter, but it does impact the performance of the individual lines.
This is a minor issue but it is worth noting that the most efficient way to build a list of strings from the input file is to chunk the file into lines and then build the list.
The alternative is to use list.index:


In [15]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "If you're writing a library, or an app, where do the unit test files go? It's nice to separate the test files from the main app code, but it's awkward to put them into a 'tests' subdirectory inside of the app root directory, because it makes it harder to import the modules that you'll be testing. Is there a best practice here?"
pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=600)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] If you're writing a library, or an app, where do the unit test files go? It's nice to separate the test files from the main app code, but it's awkward to put them into a 'tests' subdirectory inside of the app root directory, because it makes it harder to import the modules that you'll be testing. Is there a best practice here? [/INST] (I'm not sure why it's been mentioned here yet, but I know it's one of those really obvious things that "goes without a spoon", but it's not really easy to do.
So, here's my suggested way of doing it:

1. Create a new project with the desired name (e.g. my_project)
2. Add a new test file which contains the code to test the module
3. Run the test suite (which is actually a python script)
4. Go back to the root directory and run the project.py file (or python.py) from the test directory (e.g. test.py).

I'm sure it's related to the question here, but it's worth mentioning here (at least once for those who are paying close attention to the code). In t

In [16]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "How do I force Python's print function to output to the screen?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=500)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] How do I force Python's print function to output to the screen? [/INST] (The answer from @omg in python2:

>>> import sys
>>> sys.stdout = sys.stdout
>>> print 20
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
>>> 20

To get rid of the output just call sys.stdout.flush()

Note that this will also make sure the output goes into the system output buffer (aka stdout).

For older versions of Python, you can call sys.stdout.flush.

Note that the above usage is equivalent to the example in the question for stack overflow question:
>>> sys.stdout.flush(20)

Note that the above usage is equivalent to:
>>> 20.stdout.flush(20)

This means that the output is not stored in the system buffer and will be sent to the output buffer as soon as the script is closed.

Note that the above usage of Python 2.7 is equivalent to:
>>> 20.stdout.flush(20)

If you are using Python 3 (or higher), you can get the equivalent of the built-in function with:
>>> sys.stdout.flush(20)




In [24]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = """This is my code: import datetime today = datetime.date.today() print today This prints: 2008-11-22 which is exactly what I want BUT....I have a list I'm appending this to and then suddenly everything goes "wonky". Here is the code: import datetime mylist = [] today = datetime.date.today() mylist.append(today) print mylist This prints the following: [datetime.date(2008, 11, 22)] How on earth can I get just a simple date like "2008-11-22"?"""

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=400)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] This is my code: import datetime today = datetime.date.today() print today This prints: 2008-11-22 which is exactly what I want BUT....I have a list I'm appending this to and then suddenly everything goes "wonky". Here is the code: import datetime mylist = [] today = datetime.date.today() mylist.append(today) print mylist This prints the following: [datetime.date(2008, 11, 22)] How on earth can I get just a simple date like "2008-11-22"? [/INST] (I just wanted to add one extra note for those who are using the date_timezier formula from the datetime module. Just do this:
from datetime import timedelta
print mylist.date_from_date(today)

You get a date object with a timezone offset of 1080.

Or you can also get a timestamps with date_timezier formula as follows

from datetime import timedelta
print mylist.date_time_offset(today)
# prints: 2008-11-22

Just remember that date_timezier has a tzoffset which can be set from outside of the date-time zone!

Another advantage of this date

In [31]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "How do you change the size of figure drawn with matplotlib?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=400)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] How do you change the size of figure drawn with matplotlib? [/INST] (I would like to add a note that the default axes are not always equal to the axis of the figure. For example, in the following figure, the numbers on the axes are all equal:

---------------------------------------------------------------------

```
pl.gplot(1, 2)

---------------------------------------------------------------------

```

The numbers 1 and 2 are equal to 1 and 2 respectively.
You can change them with the following:
pl.gplot(1, 2, figsize=(6, 8.4))

or you can change them with:
pl.gplot(1, 2, figsize=(8, 8.4))

This means that the numbers 1 and 2 are equal to 8.4 and 8.8.

However, you need to change the numbers at once, as shown below.

```
pl.gplot(1, 2, figsize=6, axis='s')

Note that the syntax for the figure.call method is different from the one for the pl.instance method.
```
pl.gplot(1, 2, figsize=6)

However, you can also set the axis equal to 1 as follows:
pl.gplot(1, 2, figsize=8.4)



In [35]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "How do I connect to a MySQL database using a python program?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] How do I connect to a MySQL database using a python program? [/INST] (


Connecting to MySQL database from Python


Connecting to MySQL database from Python program


Connecting to MySQL database from Python program


Connecting to MySQL database from Python program


Connecting to MySQL database from Python program


Connecting to MySQL program from Python database


Connecting to MySQL program 

Connecting to MySQL database 

Connecting to MySQL program 

Connecting to MySQL database 

Connecting to MySQL program 

Connecting to MySQL program 

Connecting to MySQL program 

Connecting to MySQL program 

Connecting to MySQL program 

Connecting to MySQL program 

Connecting to MySQL program 

Connecting to MySQL


In [34]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "How do I connect to a MySQL database using a python program?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=300)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] How do I connect to a MySQL database using a python program? [/INST] (Oracle (MySQL) now supports a pure Python connector.  That means no binaries to install: it's just a Python library.  It's called "Connector/Python".  It's pure Python, so no binaries to install.  And it's backwards compatible with MySQL.  So if you have MySQL installed on your machine, you can use it to connect to MySQL from within MySQL.

Connector/Python also has a pure Python version.

Orconnector/PySql also has a pure Python version.

Connector/PySql also has a pure Python version.

Update: the most active MySQL question on Stack Overflow is this link: http://www.mysql.com/docs/connector/python/

Orchestration also has a pure Python version.

For those using Connector/PySql or Connector/PySql, you can simply do:
import connector
p = MySQL.connect()
p.connect()
p.connect()

p.connect()

p.connect()

p.connect()

p.connect()

p.connect()

etc.
) 

Connecting to MySQL.py connects to MySQL.com.  The above


In [37]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "I want a to be rounded to 13.95. >>> a 13.949999999999999 >>> round(a, 2) 13.949999999999999 The round function does not work the way I expected."

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I want a to be rounded to 13.95. >>> a 13.949999999999999 >>> round(a, 2) 13.949999999999999 The round function does not work the way I expected. [/INST] (I found this solution, which seems to be more robust than round: 
>>> a = 13.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999


In [40]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "What's the easiest way to shuffle an array with python?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=300)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] What's the easiest way to shuffle an array with python? [/INST] (Another interesting method using random.shuffle. This method has the advantage of being random.
Import random
import random
def my_shuffle(array):
    return array[np.random.rand()]

# Import random.random
import numpy

# Define a random.seed method.
import numpy.random

def my_shuffle(array):
    # Import random.random
    np.random.seed(array)  # This will randomize the array
    np.random.random.rand()  # Randomly select a (random.*) instance from array.random
    return array.random_shuffle(array.random())  # Randomly select a random.random instance from array.random

# Select a random instance from array.random.random

# Randomly select a shuffled instance from random.random

# Select a random.shuffle instance from array.random.random

# Select a random.shuffle instance from array.random.random

# Select a random.shuffle instance from array.random.random

# Select a random.shuffle instance from array.random.ra

In [41]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "I need a good explanation (references are a plus) on Python's slice notation. To me, this notation needs a bit of picking up. It looks extremely powerful, but I haven't quite got my head around it."

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=400)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I need a good explanation (references are a plus) on Python's slice notation. To me, this notation needs a bit of picking up. It looks extremely powerful, but I haven't quite got my head around it. [/INST] (This is a much better answer than I am, but I couldn't find a good answer anywhere on Google (this is my 1000+ online sliceable and filterable list of 1000+ online slices):

+ Online
+ Online sliceable indexes (e.g. [0:1000])
+ Online sliceable indexes (e.g. [0:1000]) + Online + Online

The official Python indexes are limited to 1000 entries (or more):

+ Online sliceable indexes (e.g. [0:1000]) + Online
+ Online sliceable indexes (or subscript) + Online
+ Online

+ Filterable (or subscriptable) + Indexable (or maybe even Zip)
+ Filterable (or maybe even Zip) + Online
+ Filterable (or maybe even Zip)

+ Official Python documentation (containing all the official documentation + documentation)
+ Filterable (or maybe even Zip) + Online
+ Official documentation (containing all th

In [42]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "I would like to know how to put a time delay in a Python script."

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=400)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I would like to know how to put a time delay in a Python script. [/INST] (You can use the sleep command. Example: 5 seconds delay.  Example:  from time import sleep
 print sleep.sleep(5)

This will make the sleep script wait for 5 seconds.

Note that this method will make the script wait indefinitely if there is a log file or error if the script does not complete.
Delaying the script for five seconds is one possible way of doing it. Another possible way is to use the time.sleep function. For example:
from time import sleep
def sleep_5_seconds():
    print sleep.sleep(5)

This will make the script wait for 5 seconds.

Here is how to do it:
import time
def wait_5_seconds():
    sleep.sleep(5)

This will make the script wait for 5 seconds.
The advantage is that this code is portable across time zones.
import time
def sleep_5_seconds():
    # Here we are doing something based on time
    sleep.sleep(5)
    print "sleeping for five seconds"

However this code will not work if the cod

In [53]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Is there a function to extract the extension from a filename?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=400)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] Is there a function to extract the extension from a filename? [/INST] (If you want to get all the subfolders, not just the first one, then you need to use:
os.path.splitExtension(filename)[1].pop(1)

If you want to extract only the first and all subsequent subfolders, then you need to use:
os.path.splitExtension(filename)[1:]

This will give you:
os.path.splitextension(filename)[1:1]

But if you want to extract the whole folder, you need to do:
os.path.splitextension(filename)[1:1:os.path.splitextension(filename)[1:os.path.os.path.isdir(filename)[1:os.path.os.os.extension)]

The result will be:
os.path.splitextension(filename)[1:os.path.os.os.extension]

Note: This method works only with Python 2.7. You can get the first one in this way: os.path.splitextension(filename)[1:os.path.os.extension.split("."))


## Example:
os.path.splitextension("folder1")
os.path.splitextension("folder2")

## Extension:
os.path.splitextension("folder1")

## Extension:
os.path.splitextension("folder1

In [51]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = """So, I'm playing with decorators in Python 2.6, and I'm having some trouble getting them to work. Here is my class file: class testDec: @property def x(self): print 'called getter' return self._x @x.setter def x(self, value): print 'called setter' self._x = value What I thought this meant is to treat x like a property, but call these functions on get and set. So, I fired up IDLE and checked it: >>> from testDec import testDec from testDec import testDec >>> t = testDec() t = testDec() >>> t.x t.x called getter Traceback (most recent call last): File "<stdin>", line 1, in <module> File "testDec.py", line 18, in x return self._x AttributeError: testDec instance has no attribute '_x' >>> t.x = 5 t.x = 5 >>> t.x t.x 5 Clearly the first call works as expected, since I call the getter, and there is no default value, and it fails. OK, good, I understand. However, the call to assign t.x = 5 seems to create a new property x, and now the getter doesn't work! What am I missing?"""

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=400)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] So, I'm playing with decorators in Python 2.6, and I'm having some trouble getting them to work. Here is my class file: class testDec: @property def x(self): print 'called getter' return self._x @x.setter def x(self, value): print 'called setter' self._x = value What I thought this meant is to treat x like a property, but call these functions on get and set. So, I fired up IDLE and checked it: >>> from testDec import testDec from testDec import testDec >>> t = testDec() t = testDec() >>> t.x t.x called getter Traceback (most recent call last): File "<stdin>", line 1, in <module> File "testDec.py", line 18, in x return self._x AttributeError: testDec instance has no attribute '_x' >>> t.x = 5 t.x = 5 >>> t.x t.x 5 Clearly the first call works as expected, since I call the getter, and there is no default value, and it fails. OK, good, I understand. However, the call to assign t.x = 5 seems to create a new property x, and now the getter doesn't work! What am I missing? [/INST] (I f

In [52]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = """I'm using this code to get standard output from an external program: >>> from subprocess import * >>> command_stdout = Popen(['ls', '-l'], stdout=PIPE).communicate()[0] The communicate() method returns an array of bytes: >>> command_stdout b'total 0\n-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file1\n-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file2\n' However, I'd like to work with the output as a normal Python string. So that I could print it like this: >>> print(command_stdout) -rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file1 -rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file2 I thought that's what the binascii.b2a_qp() method is for, but when I tried it, I got the same byte array again: >>> binascii.b2a_qp(command_stdout) b'total 0\n-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file1\n-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file2\n' Does anybody know how to convert the bytes value back to string? I mean, using the "batteries" instead of doing it manually. And I'd like it to be ok with Python 3."""

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=800)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I'm using this code to get standard output from an external program: >>> from subprocess import * >>> command_stdout = Popen(['ls', '-l'], stdout=PIPE).communicate()[0] The communicate() method returns an array of bytes: >>> command_stdout b'total 0
-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file1
-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file2
' However, I'd like to work with the output as a normal Python string. So that I could print it like this: >>> print(command_stdout) -rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file1 -rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file2 I thought that's what the binascii.b2a_qp() method is for, but when I tried it, I got the same byte array again: >>> binascii.b2a_qp(command_stdout) b'total 0
-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file1
-rw-rw-r-- 1 thomas thomas 0 Mar 3 07:03 file2
' Does anybody know how to convert the bytes value back to string? I mean, using the "batteries" instead of doing it manually. And I'd like it to be ok with Python 3. [/

In [56]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "In Python, how do you get the last element of a list?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=300)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] In Python, how do you get the last element of a list? [/INST] (If you want the last element, then you can do this:
last_index = len(list)

#to get the second element from the list
list_second_index = list.pop(1)

#to get the first element
#to get the second one
list_second_index = list_second_index[1:]

#to get all elements
#to get all elements
#to sum up with an array

#to get "first n" and "last n"
#to get "first" and "last"

#to get "first" and "last"

#to get "first" and "last"

#to get "first" and "last"

#to get "first" and "last" in one pass

#to get "first" and "last" in one pass

#to get "first" and "last" in one pass

#to get "first" and "last" in one pass

#to get "first" and "last" in one pass

#to get "first" and "last" in one pass

#to get "first" and "last" in one pass

#--one--one--one--one--one--one--one--one

#to get "first" and "last" in one pass

#--one--one--one--one--one--one--one--one--one--one--one--one

#--one--one--one--one--one--one--one--one--one--one

In [57]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "I want my script to wait until the user presses any key. How do I do that?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=300)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I want my script to wait until the user presses any key. How do I do that? [/INST] (I don't know of a built in Python method, but I have a hack I wrote that might be of use, here:
import os

def main():
    # CALLS SHUTDOWN IF IT FOUND A DEATH KEY OR LOG ENTRY
    print "You have", os.environ.get("DEATH")
    os.system("DEATH")
    return 0


CALLS FOR SHUTDOWN AND LOG FILES
OS.system("DEATH")

 return 1

CALLS FOR LOG FILES
os.system("DEATH")

return 1


CUSTOM CALLS FOR LOG FILEs
os.system("DEATH")

return log.call_death()

CUSTOOLS FOR LOG FILEs
os.system("DEATH")

return log.call_death()

CUSTOM CALLS FOR LOG FILEs
os.system("DEATH")

return log.call_death()

CUSTOM CALLS FOR LOG FILEs
os.system("DEATH")

return log.call_death()

CUSTOM CALLS FOR LOG FILEs
os.system("


In [54]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "How do I see the type of a variable whether it is unsigned 32 bit, signed 16 bit, etc.? How do I view it?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=500)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] How do I see the type of a variable whether it is unsigned 32 bit, signed 16 bit, etc.? How do I view it? [/INST] (I don't know of anything fancy, but I'm guessing that the syntax I'm about to describe is somewhat obscure.  I know _all_ means "unpacked", but I can't see _type. I know this is possible because I know what _ref refers to, which is _type's value.  I'm assuming _ref refers to something like a tuple, but I can't see _ref refer to anything.  Maybe this is me being overly pedantic?

_ref refers to the value of the variable.
_ref refers to a tuple of values.

I'm assuming _ref refers to the value of the variable, and _ref refers to the value of the tuple.

I'm guessing that _ref refers to the value of the variable, and _ref refers to the value of the tuple.

If I'm wrong, please let me know.

For example:
_ref = 123
_ref._ref = 123
_ref._ref = 124

_ref refers to the value of the tuple, and _ref refers to the value of the tuple.
If I'm wrong, please let me know.  For exa

In [55]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "I need to get a line count of a large file (hundreds of thousands of lines) in python. What is the most efficient way both memory- and time-wise? At the moment I do: def file_len(fname): with open(fname) as f: for i, l in enumerate(f): pass return i + 1 is it possible to do any better?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=600)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I need to get a line count of a large file (hundreds of thousands of lines) in python. What is the most efficient way both memory- and time-wise? At the moment I do: def file_len(fname): with open(fname) as f: for i, l in enumerate(f): pass return i + 1 is it possible to do any better? [/INST] (I don't know if this is an improvement, but it does seem to me like the code will be incomplete and will likely be worse performance than the code that does the same thing.
    def file_len(fname):
        with open(fname) as f:
            for i, l in enumerate(f):
                pass
        return i + 1

## This is a slightly hacked version of the above, it reads the file line by line and then determines how many lines are in there.
    def __init__(self):
        self = None
        for line in enumerate(f):
            pass
        self.length = len(f)

## This is probably the worst possible way, as most of the lines will be recycled into the buffer.
    def __init__(self):
    for 

In [47]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "What is the module/method used to get current time?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] What is the module/method used to get current time? [/INST] (Current Time:  >>>datetime.now().strftime("%Y%m%d")

Current Time:  >>>datetime.now().utcnow().strftime("%Y %m %d") 

# for the value of day  print out 
>>>print(datetime.now().strftime("%Y %m %d") % 323) 

# for the value of month 
>>>print(datetime.utcnow().strftime("%d") % 323) 

I.e. print out the value of the month.


# for the value of day 
>>>print(datetime.utcnow().strftime("%d") % 323) 

# for the value of day of month


# (or just print out the value of the day)

print (datetime.utcnow().strftime("%Y %m %d") % 323)




(or just print out the value of the day)




 (or just print out the value of the day)

















































 















 





 

 

 













 
















 

 
 





 












In [2]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "What is the module/method used to get current time?"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

NameError: name 'logging' is not defined