In [1]:
import os
%pwd
os.chdir("../")

In [2]:
%pwd

'/home/aman/Desktop/Cognitext'

In [3]:
from pathlib import Path
from dataclasses import dataclass

In [4]:
@dataclass(frozen=True)
class PredictionConfig:
    model: Path
    tokenizer: str
    max_token: int

In [5]:
from Cognitext.utils.common import read_yaml
from Cognitext.constants import *

In [6]:
class ConfigurationManager2:
    def __init__(self, config = CONFIG_FILE_PATH, params = PARAMS_FILE_PATH):
        self.config = read_yaml(config)
        self.params = read_yaml(params)

    def get_prediction_config(self) -> PredictionConfig:
        config = self.config.prediction
        params = self.params.DataLoaderParams

        get_prediction_config = PredictionConfig(
            model = config.model,
            tokenizer = config.tokenizer,
            max_token = params.max_token
        )
        return get_prediction_config

In [7]:
import torch
import tiktoken

In [8]:
from safetensors.torch import load_file

In [9]:
from Cognitext.components.ModelTraining import Model

In [10]:
from Cognitext.config.configuration import ConfigurationManager
config = ConfigurationManager()
get_training_config = config.get_trainer_config()

[2024-09-21 22:36:18,519: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-21 22:36:18,591: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-09-21 22:36:18,595: INFO: common: created directory at: artifacts]


In [13]:
class Prediction:
    def __init__(self , config = PredictionConfig):
        self.config = config

        self.model = Model(get_training_config)
        self.tokenizer = tiktoken.get_encoding(self.config.tokenizer)
        self.max_token = self.config.max_token
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        model_weights = load_file(self.config.model)  # config.model_path should point to your .safetensors file
        self.model.load_state_dict(model_weights)     # Load the weights into the model
        
        # Move model to the appropriate device
        self.model.to(self.device)



    def predict(self,text):
        encoded = self.tokenizer.encode(text)  # Tokenize the text
        input_ids = torch.tensor([encoded])
        self.model.eval()  
        context_size = self.model.pos_emb.weight.shape[0]

        with torch.no_grad():  
            for _ in range(self.max_token):
                input_cond = input_ids[:, -context_size:]

                logits = self.model(input_cond)

                logits = logits[:, -1, :]  

                probabilities = torch.softmax(logits, dim=-1)
                next_token_id = torch.argmax(probabilities, dim=-1, keepdim=True)  

                input_ids = torch.cat([input_ids, next_token_id], dim=1)

        flat = input_ids.squeeze(0)  
        generated_text =  self.tokenizer.decode(flat.tolist())

        return generated_text




In [15]:
try:
    config = ConfigurationManager2()
    prediction_config = config.get_prediction_config()
    prediction = Prediction(config=prediction_config)
    generated_text = prediction.predict("Once upon a time ")
    print(generated_text)
except Exception as e:
    raise e

[2024-09-21 22:39:14,878: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-09-21 22:39:15,011: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-21 22:39:15,014: INFO: common: created directory at: artifacts/model_trained]
Once upon a time ive got it was the documentary , but i dont hate the room , and the back to get out of the building , i was a mailbox . i was a stretch , and i 's got a grizzly discovery in the other side of the end of the road seemed to the loudspeaker announced his flight and i dont have to the other . i had booked a big bulls-embedded fear doesnt need to be scared shitless of the airline that i had been right ? i was a lot like that i was a few paces and the fringe on the fibrous food group of the whole time explaining that was just
