# Importing Required Libraries

In [15]:
import mlflow
from langchain_core.prompts import PromptTemplate
from mlflow.models.signature import infer_signature
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
import torch
import huggingface_hub
from transformers import BitsAndBytesConfig

huggingface_hub.login("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")

# Loading the Test Data

In [32]:
para1 = "The Reddit dataset is a graph dataset from Reddit posts made in the month of September, 2014. The node label in this case is the community, or “subreddit”, that a post belongs to. 50 large communities have been sampled to build a post-to-post graph, connecting posts if the same user comments on both. In total this dataset contains 232,965 posts with an average degree of 492. The first 20 days are used for training and the remaining days for testing. For features, off-the-shelf 300-dimensional GloVe CommonCrawl word vectors are used."

# Loading the Model

In [10]:
# device = "cuda" if torch.cuda.is_available() else "cpu"

# # Configure 4-bit quantization
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_compute_dtype=torch.float16
# )

# if device == "cuda":
#     model_name = "meta-llama/Llama-3.2-1B-Instruct"
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForCausalLM.from_pretrained(
#         model_name,
#         quantization_config=quantization_config,
#         device_map="auto"
#     )
# else:
#     model_name = "google/flan-t5-small"
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
#     model = model.to(device)

In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model = model.to(device)

# Doing Experimentation

In [16]:
template = "Summarize the paragraph: {input_paragraph} Answer should contain only summary and nothing else."
template_input_variables = ["input_paragraph"]
model_input = dict()
model_input["input_paragraph"] = para1
prompt_template = PromptTemplate(
            input_variables=template_input_variables,
            template=template,
        )
formatted_prompt = prompt_template.format(**model_input)

In [17]:
inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True)
outputs = model.generate(**inputs, max_length=512, num_beams=2, early_stopping=True)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])

The Reddit dataset contains 232,965 posts with an average degree of 492.


# Mlflow

In [128]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [145]:
import mlflow.pyfunc
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import huggingface_hub

# MLflow tracking
import mlflow
from mlflow.models.signature import infer_signature
import pandas as pd

# Login to Hugging Face
huggingface_hub.login("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")

In [86]:
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Model details
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model = model.to(device)

In [142]:
# Define a custom PythonModel wrapper
class TextSummarizationModel(mlflow.pyfunc.PythonModel):
    def __init__(self, model_name, device):
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, max_length = 500)
        self.device = device

    def predict(self, context, model_input):

        input_paragraph = model_input["input_paragraph"].iloc[0]

        formatted_prompt = f"Summarize the paragraph: {input_paragraph} Answer should contain only summary and nothing else."

        inputs = self.tokenizer(formatted_prompt, return_tensors="pt", truncation=True)
        inputs = {key: value.to (self.device) for key, value in inputs.items()}

        # Generate summary
        outputs = self.model.generate(**inputs, max_length=512, num_beams=2, early_stopping=True)
        summary = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        return summary

# Create an instance of the custom model
wrapped_model = TextSummarizationModel(model_name = "google/flan-t5-small", device=device)

In [143]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Text Summarization Experiment")

<Experiment: artifact_location='mlflow-artifacts:/633859552949112282', creation_time=1735791458842, experiment_id='633859552949112282', last_update_time=1735791458842, lifecycle_stage='active', name='Text Summarization Experiment', tags={}>

In [146]:
with mlflow.start_run(run_name="initial summarization experiment-4"):
    # Log parameters
    mlflow.log_param("model_name", model_name)
    mlflow.log_param("device", device)

    # Infer signature: Input is a DataFrame with a column "input_paragraph"
    example_input = pd.DataFrame({"input_paragraph":[para1]})
    example_output = wrapped_model.predict(None, example_input)
    signature = infer_signature(example_input, example_output)

    print("="*10)

    # Log model
    mlflow.pyfunc.log_model(
        artifact_path="summarization_model",
        python_model=wrapped_model,
        # signature=signature,
        registered_model_name="SummarizationModel",
        input_example=example_input,
    )

print("Model logged successfully.")




Registered model 'SummarizationModel' already exists. Creating a new version of this model...
2025/01/01 23:58:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: SummarizationModel, version 8


🏃 View run initial summarization experiment-4 at: http://127.0.0.1:5000/#/experiments/633859552949112282/runs/6cddf2d122104457b5fbbf583a7cdf25
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/633859552949112282
Model logged successfully.


Created version '8' of model 'SummarizationModel'.


# Serve the model

In [147]:
model_uri = "models:/SummarizationModel/8"  # Update with your model name/version
model = mlflow.pyfunc.load_model(model_uri)

Downloading artifacts: 100%|██████████| 7/7 [00:09<00:00,  1.32s/it]


In [148]:
model_input = pd.DataFrame({"input_paragraph":[para1]})

In [149]:
# model_input = {"input_paragraph": para1}
summary = model.predict(model_input)

In [150]:
summary

'The Reddit dataset contains 232,965 posts with an average degree of 492.'