In [1]:
%%capture
!pip install rouge-score
!pip install peft
!pip install trl
!pip install bitsandbytes
!pip install lightning

In [3]:
import os
import gc
import torch
import pandas as pd
import wandb
import lightning as L

from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.optim import AdamW
import torch.nn.functional as F

from datasets import Dataset
from transformers import TrainerCallback
import transformers
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
)

from peft import (
    PeftModel,
    LoraConfig,
    prepare_model_for_kbit_training,
    get_peft_model,
)

from trl import SFTTrainer


In [17]:

splits = {'train': 'openassistant_best_replies_train.jsonl', 'test': 'openassistant_best_replies_eval.jsonl'}
df_train = pd.read_json("hf://datasets/timdettmers/openassistant-guanaco/" + splits["train"], lines=True)
df_test = pd.read_json("hf://datasets/timdettmers/openassistant-guanaco/" + splits["test"], lines=True)

first_row = df_train.iloc[0]

# Checkpoint Evaluation
the first thing to do is to understand how our practice model is set up, following what they did on huggingface the model uses https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.1 as a base, so before finetuning we want to understand how the model performs.<br><br>
The idea is to see how it handles the questions in the dataset format and also we want to see how it performs on the test set.

In [10]:
model = "TinyLlama/TinyLlama-1.1B-step-50K-105b"
tokenizer = AutoTokenizer.from_pretrained(model)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device=0 if device == "cuda" else -1,  # GPU: device=0, CPU: device=-1
)

prompt = "What is the best programming language for Machine Learning?"
formatted_prompt = f"### Human: {prompt} ### Assistant:"
sequences = pipeline(
    formatted_prompt,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    repetition_penalty=1.5,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=32,
)
for seq in sequences:
    print(seq["generated_text"])

cuda


config.json:   0%|          | 0.00/607 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

Device set to use cuda:0


### Human: What is the best programming language for Machine Learning? ### Assistant: Can you tell us more about your background and how does it help in understanding machine learning problem.  #### Q1A:- How would someone be able to answer


In [18]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset

# ####################################
# STEP 1: Load model and tokenizer
# ####################################
base_model_id = "TinyLlama/TinyLlama-1.1B-step-50K-105b"
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.pad_token = tokenizer.eos_token 

# ####################################
# STEP 2: Prepare data in correct format
# ####################################


test_dataset = Dataset.from_pandas(df_test)

def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"],
        truncation=True,
        max_length=512,
        padding="max_length"
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# ####################################
# STEP 3: Trainer for evaluate
# ####################################
training_args = TrainingArguments(
    output_dir="./results_original_model",
    per_device_eval_batch_size=8, 
    fp16=True, 
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_test_dataset, 
)


# ####################################
# STEP 4: Evaluate and print the test loss
# ####################################

results = trainer.evaluate()
print(f"Mean Loss for the original model on the test set: {results['eval_loss']}")


Map:   0%|          | 0/518 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Mean Loss for the original model on the test set: 7.8757123947143555


# ################


# ########

# 🌟 Our complete Fine-Tuned Model

In this section, we demonstrate the process of fine-tuning a Transformer model (`TinyLlama-1.1B-step-50K-105b`) using the `Trainer` from Hugging Face. This is a **full fine-tuning approach**, where the entire model is trained.

---

## 🚀 Steps to Fine-Tune the Model

### 1️⃣ **Data Preparation for the Chat Task**
To begin, the data already in the corrected  formatted suitable for chat-based tasks. We use a delimiter format that incorporates:
- `### Human`: prompt
- `### Assistant`: the response

This structured format is both the training and evaluation datasets.

---

### 2️⃣ **Loading a Pre-Trained Model**
We load a pre-trained model configured with reduced precision (**FP16**) to manage resource constraints (we have max 16 GB or GPU). This allows us to perform efficient fine-tuning while balancing accuracy and performance.

The training and test datasets are tokenized with a maximum sequence length of 512 to ensure compatibility with the model.

---

### 3️⃣ **Applying Transformer Trainer for Fine-Tuning**
We perform **full fine-tuning** of the model, meaning all the model's parameters are updated during training. This approach ensures the model is fully adapted to the task at hand.

The training task is configured for **causal language modeling (CAUSAL_LM)**, which is suitable for autoregressive tasks like chat-based interactions. Tokenized datasets for training and testing are fed directly into the `Trainer`.

---

### 4️⃣ **Trainer Configuration and Training**
The `Trainer` is configured with the following settings:
- **Batch Size**: We use a `per_device_train_batch_size` of 1 to minimize memory usage.
- **Gradient Accumulation**: By accumulating gradients over **8** steps, we simulate a batch size of 8.
- **Precision**: We adopt `bfloat16` precision to further reduce the GPU load without sacrificing too much accuracy.
- **Learning Rate**: A low learning rate (`3e-5`) is used to ensure stable convergence.

This configuration balances efficiency and accuracy, allowing us to train the model effectively within hardware constraints.

---

### 🎉 **Results**
This setup, we've successfully fine-tuned our model and our loss on test test is **1.25**

In [8]:
import os
os.environ["WANDB_MODE"] = "disabled"
os.environ["WANDB_DISABLED"] = "true"


In [3]:

splits = {'train': 'openassistant_best_replies_train.jsonl', 'test': 'openassistant_best_replies_eval.jsonl'}
df_train = pd.read_json("hf://datasets/timdettmers/openassistant-guanaco/" + splits["train"], lines=True)
df_test = pd.read_json("hf://datasets/timdettmers/openassistant-guanaco/" + splits["test"], lines=True)


# ####################################
# STEP 1 we make data in correct format
# STEP 2 We load the model
# ####################################

test_dataset = Dataset.from_pandas(df_test)
train_dataset = Dataset.from_pandas(df_train)

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-step-50K-105b")

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token 

model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-step-50K-105b",
    torch_dtype=torch.bfloat16,
    device_map="auto",
    config={"dropout": 0.1}
)

def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"], 
        truncation=True,
        max_length=512,
        padding="max_length",
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_train_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"]
)

tokenized_test_dataset = test_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"]
)



# ####################################
# STEP 3 Train + Trainer
# ####################################

training_args = TrainingArguments(
    output_dir="./finetuned_model",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    learning_rate=3e-5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,
    save_steps=3000,
    eval_steps=500, 
    evaluation_strategy="steps", 
    save_total_limit=2,
    fp16=False,
    bf16=True,
    report_to=[],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
)

trainer.train()

# ####################################
# STEP 4 Save
# ####################################

trainer.save_model("./finetuned_model")
tokenizer.save_pretrained("./finetuned_model")


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/607 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

Map:   0%|          | 0/9846 [00:00<?, ? examples/s]

Map:   0%|          | 0/518 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
500,10.1332,1.265551
1000,10.057,1.259194


('./finetuned_model/tokenizer_config.json',
 './finetuned_model/special_tokens_map.json',
 './finetuned_model/tokenizer.model',
 './finetuned_model/added_tokens.json',
 './finetuned_model/tokenizer.json')

# #

# 🌟 QLoRA Fine-Tuning Approach

In this project, we fine-tune a Transformer model (`TinyLlama-1.1B-step-50K-105b`) using **Quantized Low-Rank Adaptation (QLoRA)**. This approach is  efficient, with a **4-bit quantization** and **LoRA adapters** we can reduce memory usage and computational cost. 

---

## 🚀 Steps to Fine-Tune the Model with QLoRA

### 1️⃣ **Data Formatting**
As in the previous approach, the dataset is pre-formatted for chat-based tasks. The structured format includes:
- **`### Human`**: Marks the user's prompt.
- **`### Assistant`**: Marks the assistant's response.

---

### 2️⃣ **Model Loading with 4-Bit Quantization**
To optimize memory usage and computational efficiency, the base model is loaded with **4-bit quantization** using the following settings:
- **Quantization Type**: `NF4` (Normalized Float 4) for improved precision.
- **Compute Precision**: `bfloat16` for efficient computations on modern GPUs.
- **Double Quantization**: Enabled to further reduce memory requirements.

---

### 3️⃣ **LoRA Configuration**
We apply **Low-Rank Adaptation (LoRA)** to add trainable adapters to the model. The configuration includes:
- **Rank (r)**: 8
- **Alpha**: 16
- **Dropout**: 0.05
- **Bias**: None 

The model is prepared for k-bit training using `prepare_model_for_kbit_training` before applying LoRA with `get_peft_model`. This is used to configure the model when using quantisation (4-bit or 8-bit). Sets the model to have quantised weights and prevents certain parts from being quantised instead (e.g. embedding)
Also disables traiing in some parts of the model

---

### 4️⃣ **Trainer Configuration and Training**
The fine-tuning process is acived  using `PyTorch Lightning` . The configuration includes:
- **Batch Size**: 4 with gradient accumulation over 64 steps
- **Precision**: Mixed `bfloat16` (optimize GPU memory usage)
- **Gradient Clipping**: 1.0 (prevent exploding gradients)
- **Epochs**: 1

---

### 5️⃣ **Saving the Fine-Tuned Model**
The fine-tuned model and tokenizer are saved locally in the directory `./finetuned_qlora_model`. This enables easy reuse and evaluation in future experiments.

---

### 🎉 **Results**
The fine-tuning process achieved a **test loss of 2.18**

In [5]:


# ####################################
# STEP 2 Load quantizate model
# ####################################
splits = {'train': 'openassistant_best_replies_train.jsonl', 'test': 'openassistant_best_replies_eval.jsonl'}

train_formatted = pd.read_json("hf://datasets/timdettmers/openassistant-guanaco/" + splits["train"], lines=True)
test_formatted = pd.read_json("hf://datasets/timdettmers/openassistant-guanaco/" + splits["test"], lines=True)
train_dataset = Dataset.from_dict({"text": train_formatted["text"].tolist()})
test_dataset = Dataset.from_dict({"text": test_formatted["text"].tolist()})

# train_dataset = Dataset.from_dict({"text": train_formatted})
# test_dataset = Dataset.from_dict({"text": test_formatted})

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                      
    bnb_4bit_compute_dtype=torch.bfloat16,  
    bnb_4bit_quant_type="nf4",              
    bnb_4bit_use_double_quant=True          
)

base_model_id = "TinyLlama/TinyLlama-1.1B-step-50K-105b"
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto",          
    quantization_config=bnb_config
)

model.gradient_checkpointing_enable()


# ####################################
# STEP 3 LoRa
# ####################################

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = prepare_model_for_kbit_training(model) 
model = get_peft_model(model, lora_config)


def collate_fn(batch):
    inputs = tokenizer(
        [sample['text'] for sample in batch], 
        return_tensors="pt", 
        padding=True, 
        truncation=True, 
        max_length=512
    )
    labels = inputs.input_ids.clone()
    labels[~inputs.attention_mask.bool()] = -100
    return inputs, labels

train_loader = DataLoader(
    train_dataset, 
    collate_fn=collate_fn, 
    shuffle=True, 
    batch_size=4, 
    num_workers=2
)

# just as we see in class
class LightningWrapper(L.LightningModule):
    def __init__(self, model, lr=1e-4):
        super().__init__()
        self.model = model
        self.lr = lr

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=self.lr)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(**inputs)
        # Shift logits to exclude the last element
        # shift labels to exclude the first element
        logits = outputs.logits[..., :-1, :].contiguous()
        labels = labels[..., 1:].contiguous()
        # Compute LM loss token-wise
        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
        self.log("train_loss", loss)
        return loss

lightning_model = LightningWrapper(model)


# ####################################
# STEP 4 Trainer + Train
# ####################################

trainer = L.Trainer(
    accumulate_grad_batches=64,
    precision="bf16-mixed", 
    gradient_clip_val=1.0,
    max_epochs=1
)

gc.collect()
torch.cuda.empty_cache()
trainer.fit(lightning_model, train_dataloaders=train_loader)


# ####################################
# STEP 5 Save
# ####################################

model.save_pretrained("./finetuned_qlora_model")
tokenizer.save_pretrained("./finetuned_qlora_model")



tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/607 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

INFO: Using bfloat16 Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name  | Type                 | Params | Mode 
-------------------------------------------------------
0 | model | PeftModelForCausalLM | 616 M  | train
-------------------------------------------------------
1.1 M     Trainable params
615 M     Non-trainable params
616 M     Total params
2,466.931 Total estimated model params size (MB)
442       Modules in train mode
293       Modules in eval mode
  self.pid = os.fork()


Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=1` reached.


('./finetuned_qlora_model/tokenizer_config.json',
 './finetuned_qlora_model/special_tokens_map.json',
 './finetuned_qlora_model/tokenizer.model',
 './finetuned_qlora_model/added_tokens.json',
 './finetuned_qlora_model/tokenizer.json')

In [6]:
class LightningWrapper2(L.LightningModule):
    def __init__(self, model, lr=1e-4):
        super().__init__()
        self.model = model
        self.lr = lr

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=self.lr)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(**inputs)
        logits = outputs.logits[..., :-1, :].contiguous()
        labels = labels[..., 1:].contiguous()
        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
        self.log("train_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(**inputs)
        logits = outputs.logits[..., :-1, :].contiguous()
        labels = labels[..., 1:].contiguous()
        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
        self.log("test_loss", loss)
        return loss
        
lightning_model_test = LightningWrapper2(model)
test_loader = DataLoader(
    test_dataset, 
    collate_fn=collate_fn, 
    shuffle=False, 
    batch_size=2, 
    num_workers=2
)
trainer.test(lightning_model_test, dataloaders=test_loader)




INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 2.185150384902954}]

# FOR MAC

## First fine tuned

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings

device = torch.device("mps")

# ####################################
# STEP 1: load model + tokenizer
# ####################################

model_path = "Models/finetuned_model_2_1"

model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    torch_dtype=torch.float32,
    device_map={"": device} 
)

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token

# ####################################
# STEP 2: Genera la risposta
# ####################################

prompt = "What is the best programming language for Machine Learning?"
formatted_prompt = f"### Human: {prompt} ### Assistant: "

inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=100, 
        do_sample=True, 
        top_k=50, 
        top_p=0.7,
        temperature=0.7,
        repetition_penalty=1.1
    )


response = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated_response = response.split("### Assistant:")[-1].strip()
generated_response = generated_response.split("###")[0].strip()


print("\nGenerated Response:")
print(generated_response)



Generated Response:
The best programming language for machine learning is Python. Python has a wide range of features and capabilities that make it a popular choice for machine learning developers. Here are some of the most popular features of Python:

- Fast development: Python is a general purpose language that can be used to develop applications for various platforms, including web servers, mobile devices, and embedded systems. It's also a good choice for developing machine learning algorithms that require high performance and efficiency.

- E


## QLoRa

In [4]:

from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, StoppingCriteriaList
from peft import PeftModel
import torch

device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

model_path = "Models/finetuned_qlora_model_2"
base_model_id = "TinyLlama/TinyLlama-1.1B-step-50K-105b"
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float32, 
    device_map=None
)

model = PeftModel.from_pretrained(base_model, model_path)
model.to(device)
model.eval()

prompt = "What is the best programming language for Machine Learning?"
prompt_2 = f"### Human: {prompt} ### Assistant: "

inputs = tokenizer(prompt_2, return_tensors="pt")
inputs = {key: value.to(device) for key, value in inputs.items()}

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        top_k=30,
        top_p=0.7,
        temperature=0.7,
        repetition_penalty=1.1
    )


response = tokenizer.decode(outputs[0], skip_special_tokens=True)
generated_response = response.split("### Assistant:")[-1].strip()
generated_response = generated_response.split("###")[0].strip()
print("Generated response:", generated_response)


Generated response: 🤖 It's a difficult question to answer, but we are currently in the process of creating a list of languages that we feel have the best chance of being used by machine learning professionals. If you have any suggestions, please leave them below!


# 🌟 Interactive Chatbot with Fine-Tuned Model

This script demonstrates how to load a fine-tuned model for interactive chatbot functionality. It leverages a **Transformer-based model** trained on a chat-style dataset and allows for real-time user interaction.

---

## 🚀 Key Steps in the Script

### 1️⃣ **Environment and Device Setup**
The script dynamically selects the device for computation:
- **MPS** (Metal Performance Shaders) if available (for macOS).
- **CPU** as a fallback if MPS is not supported.

---

### 2️⃣ **How to Interact with the Model**

The script runs an interactive loop where the user can type questions or prompts, and the fine-tuned model generates a response. Here's how it works:

1. **User Input**:
   - The user enters their message into the console. 
   - To end the interaction, the user can type `esc` or `end`.

2. **Prompt Construction**:
   - you can message as you wish, the code handles the format
   - If desired, `MEMORY_SAVING` can be enabled to allow the model to see the previous chat 


In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

MEMORY_SAVING = False

device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

model_path = "Models/finetuned_model_2_1"

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float32,
    device_map={"": device}
)

model.to(device)
model.eval()

prev_prompt = ""

while True:
    user_input = input("You: ").strip()
    print(f"you: {user_input}")
    if user_input.lower() == "esc" or user_input.lower() == "end" :
        print("\n\nEND :)")
        break

    prompt = f"### Human: {user_input} ### Assistant:"
    prompt = prev_prompt + prompt

    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {key: value.to(device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=False,
        num_beams=5,
        repetition_penalty=1.1
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if MEMORY_SAVING:
        response = response.split(prev_prompt)[1].strip()

    response = response.split("### Assistant:")[1].split("###")[0].strip()

    if MEMORY_SAVING:
        prev_prompt += f"### Human: {user_input} ### Assistant: {response} ###"
    
    print(f"Bot: {response}")

#hello
#How do I import TensorFlow into Python?
#I get the error: `ModuleNotFoundError: No module named 'tensorflow'`. What should I do?
#perfect, everything worked thank you very much. Bye


  from .autonotebook import tqdm as notebook_tqdm


you: hello
Bot: Hello! How are you?
you: how do I import TensorFlow into Python?
Bot: To import TensorFlow into Python, you can follow these steps:

1. Install TensorFlow by running the following command:

```bash
pip install tensorflow
```

2. Import TensorFlow by running the following
you: I get the error: `ModuleNotFoundError: No module named 'tensorflow'`. What should I do?
Bot: The error you're seeing is because TensorFlow is not installed on your system. You can install TensorFlow by running the following command:

```
pip install tensorflow
```

This will install TensorFlow on your system
you: perfect, everything worked thank you very much. Bye
Bot: Thank you very much for your kind words. I'm glad you enjoyed it.
you: esc


END :)
