In [27]:
!pip install -q  torch peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 accelerate

import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    TextDataset,
    DataCollatorForLanguageModeling
)

from peft import LoraConfig
from trl import SFTTrainer

In [28]:
import locale
print(locale.getpreferredencoding())


import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

UTF-8


In [29]:
! pip install xformers



# Preprocessing

In [30]:
import os
# Functions to read different file types

def read_txt(file_path):
    with open(file_path, "r+" ,encoding="utf-8") as file:
        text = file.read()
    return text

In [31]:
# Read documents from the directory
train_directory = '/content/filtered_data.txt'
text_data = read_txt(train_directory)


In [32]:
text_data

"Book Title: (Adaptive Computation and Machine Learning series) Ian Goodfellow, Yoshua Bengio, Aaron Courville - Deep Learning-The MIT Press (2016)\nWebsite\nIntroduction\n  Who should read the Book\n  Historical Trends in Deep Learning\n--- Applied Math & ML Basics\nLinear Algebra\n  Scalars, Vectors, Matrices & Tensors\n  Multiplying Matrices & Vectors\n  Identity & Inverse Matrices\n  Linear Dependence & Span\n  Norms\n  Special Kinds of Matrices & Vectors\n  Eigendecomposition\n  Singular Value Decomposition\n  Moore-Penrose Pseudoinverse\n  Trace Operator\n  Determinant\n  Example - Principal Components Analysis\nProbability & Information Theory\n  Why Probability\n  Random Variables\n  Probability Distributions\n  Marginal Probability\n  Conditional Probability\n  Chain Rule of Conditional Probabilities\n  Independence & Conditional Independence\n  Expectation, Variance & Covariance\n  Common Probability Distributions\n  Properties of Common Functions\n  Bayes Rule\n  Technical D

# Fix encoding problem

In [33]:
import pandas  as pd

df=pd.read_csv("/content/data.csv",encoding = "latin")

df.to_csv("data2.csv",encoding = "utf-8", index=False)

In [34]:
training_data = load_dataset('csv', data_files='/content/data2.csv')

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

## Add credentials

In [9]:
! huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [35]:
# Model and tokenizer names
base_model_name = "meta-llama/Llama-2-7b-chat-hf"
refined_model = "llama-2-7b-books"

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [36]:
# Generate Text
query = "Generate a course outline for a deep learning course"

# Set the desired output length
max_length = 300  # Adjust this value as needed


# Generate the course outline and plan
text_gen = pipeline(task="text-generation", model=base_model, tokenizer=llama_tokenizer, max_length=max_length, temperature=0.3, top_k=10)
output = text_gen(query)

# Print the generated course outline and plan
print(output[0]['generated_text'])


Generate a course outline for a deep learning course that covers the basics of deep learning, including neural networks, activation functions, backpropagation, and optimization techniques. Unterscheidung between supervised and unsupervised learning. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. Deep Learning Course Outline. 

In [19]:
training_data['train']

Dataset({
    features: ['train'],
    num_rows: 6
})

In [23]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    num_train_epochs=100,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=5,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data['train'],
    peft_config=peft_parameters,
    dataset_text_field="train",
    tokenizer=llama_tokenizer,
    args=train_params
)

# Training
fine_tuning.train()

# Save Model
fine_tuning.model.save_pretrained(refined_model)

Step,Training Loss
5,1.5684
10,1.4885
15,1.3389
20,1.1977
25,1.1385
30,1.0725
35,0.8819
40,0.6536
45,0.5474
50,0.3646


In [26]:
# Generate Text
query = "Generate a course outline for a deep learning course"

# Set the desired output length
max_length = 300  # Adjust this value as needed


# Generate the course outline and plan
text_gen = pipeline(task="text-generation", model=fine_tuning.model, tokenizer=llama_tokenizer, max_length=max_length, temperature=0.3, top_k=10)
output = text_gen(query)

# Print the generated course outline and plan
print(output[0]['generated_text'])


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausal

Generate a course outline for a deep learning course. nobody has time to read through a 100-page document. So, I'll keep it short and sweet. Here's a high-level outline of a deep learning course that I would design:
Introduction
  * What is deep learning?
  * Why use deep learning?
  * Key ingredients of deep learning
    * Large amounts of data
    * High-quality data
    * Representation learning
    * Learning multiple layers of representation
    * Adaptive learning
    * Deterministic or stochastic?
  * Historical context
    * Artificial neural networks (ANNs)
    * Deep ANNs
    * Early deep learning algorithms
    * Current state-of-the-art algorithms
  * Roadmap
    * Next-generation ANNs
    * Deep learning for structured data
    * Deep learning for computer vision
    * Deep learning for natural language processing
    * Other domains

Data Preprocessing and Cleaning
  * Data types
    * NumPy arrays
    * Pandas dataframes
    * Cassandra or other distributed databases
   