In [None]:

print("✅ STEP 1: Generating mock data files...")
import pandas as pd
import numpy as np

# Create the data dictionary to explain the columns
data_dict_data = {
    'Column Name': ['machine_id', 'cpu_usage_percent', 'memory_usage_gb', 'disk_io_mbps', 'error_count', 'insight'],
    'Description': [
        'Unique identifier for the machine.',
        'CPU utilization as a percentage.',
        'Memory currently in use (Gigabytes).',
        'Disk read/write speed (Megabytes per second).',
        'Number of critical errors logged in the period.',
        'A human-readable, actionable insight based on the log data.'
    ]
}
data_dict_df = pd.DataFrame(data_dict_data)
data_dict_df.to_csv('data_dictionary.csv', index=False)
print("... data_dictionary.csv has been created.")

# Define a function to generate realistic log data and corresponding insights
def generate_data(num_rows):
    """Creates a DataFrame with synthetic machine logs and insights."""
    data = []
    for i in range(num_rows):
        machine_id = f'PROD-SRV-{1001 + i}'
        # Introduce anomalous or normal values for different metrics
        cpu = round(np.random.choice([np.random.uniform(30, 60), np.random.uniform(92, 99)]), 2)
        mem = round(np.random.choice([np.random.uniform(10, 20), np.random.uniform(28, 31.5)]), 2)
        disk = round(np.random.uniform(20, 250), 2)
        errors = np.random.choice([0, 0, 0, 0, 0, 1, 3, 10])

        insight = ""
        # Create insights that logically follow the data
        if errors > 5:
            insight = f"Critical alert: {errors} errors detected on {machine_id}. Immediate investigation is required to ensure system stability."
        elif cpu > 90:
            insight = f"High CPU usage ({cpu}%) on {machine_id} suggests a potential performance bottleneck. Recommend analyzing top processes."
        elif mem > 27:
            insight = f"Memory usage is critical at {mem}GB on {machine_id}. Check for memory leaks or consider a resource upgrade."
        elif disk > 220:
             insight = f"Unusually high disk I/O ({disk} MB/s) on {machine_id} could degrade storage performance. Monitor read/write operations."
        else:
            insight = f"System {machine_id} is operating within normal performance parameters. No action is required at this time."
        data.append([machine_id, cpu, mem, disk, errors, insight])

    columns = ['machine_id', 'cpu_usage_percent', 'memory_usage_gb', 'disk_io_mbps', 'error_count', 'insight']
    return pd.DataFrame(data, columns=columns)

# Generate and save the training and testing datasets
train_df = generate_data(200)
train_df.to_csv('train.csv', index=False)
print("... train.csv has been created.")

test_df = generate_data(50)
test_df.to_csv('test.csv', index=False)
print("... test.csv has been created.")

print("\n✅ Mock data generation is complete. The notebook is ready to run.")

✅ STEP 1: Generating mock data files...
... data_dictionary.csv has been created.
... train.csv has been created.
... test.csv has been created.

✅ Mock data generation is complete. The notebook is ready to run.


In [None]:

print("✅ STEP 2: Installing Python libraries...")
!pip install transformers sentence-transformers evaluate rouge_score nltk --quiet
print("... Installation complete.")

✅ STEP 2: Installing Python libraries...
... Installation complete.


In [None]:

print("✅ STEP 3: Importing libraries and loading data...")

# Core data handling and progress bars
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
import torch

# Hugging Face and evaluation libraries
from transformers import pipeline
import evaluate
from sentence_transformers import SentenceTransformer, util

# --- Data Loading ---
try:
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')
    data_dict_df = pd.read_csv('data_dictionary.csv')

    print("... Datasets loaded successfully.")

    # --- Data Exploration ---
    print("\n--- Data Dictionary ---")
    display(data_dict_df)

    print("\n--- Test Data Sample ---")
    display(test_df.head())

except FileNotFoundError as e:
    print(f"❌ ERROR: Could not find the data files")

✅ STEP 3: Importing libraries and loading data...
... Datasets loaded successfully.

--- Data Dictionary ---


Unnamed: 0,Column Name,Description
0,machine_id,Unique identifier for the machine.
1,cpu_usage_percent,CPU utilization as a percentage.
2,memory_usage_gb,Memory currently in use (Gigabytes).
3,disk_io_mbps,Disk read/write speed (Megabytes per second).
4,error_count,Number of critical errors logged in the period.
5,insight,"A human-readable, actionable insight based on the log data."



--- Test Data Sample ---


Unnamed: 0,machine_id,cpu_usage_percent,memory_usage_gb,disk_io_mbps,error_count,insight
0,PROD-SRV-1001,95.1,16.94,124.28,0,High CPU usage (95.1%) on PROD-SRV-1001 suggests a potential performance bottleneck. Recommend analyzing top processes.
1,PROD-SRV-1002,32.25,13.95,237.53,0,Unusually high disk I/O (237.53 MB/s) on PROD-SRV-1002 could degrade storage performance. Monitor read/write operations.
2,PROD-SRV-1003,97.75,30.24,236.14,10,Critical alert: 10 errors detected on PROD-SRV-1003. Immediate investigation is required to ensure system stability.
3,PROD-SRV-1004,92.05,19.46,62.51,1,High CPU usage (92.05%) on PROD-SRV-1004 suggests a potential performance bottleneck. Recommend analyzing top processes.
4,PROD-SRV-1005,51.78,17.61,91.28,0,System PROD-SRV-1005 is operating within normal performance parameters. No action is required at this time.


In [None]:
print("✅ STEP 4: Loading the pre-trained model...")

# Define the model checkpoint from Hugging Face Hub
MODEL_NAME = "google/flan-t5-large"

# Set device to GPU (cuda) if available, otherwise CPU
device = "cuda" if torch.cuda.is_available() else "cpu"

# Create the text-to-text generation pipeline
summarizer_pipeline = pipeline(
    task="text2text-generation",
    model=MODEL_NAME,
    device=device
)

print(f"... Model '{MODEL_NAME}' loaded successfully on device: '{device}'.")

✅ STEP 4: Loading the pre-trained model...


Device set to use cuda


... Model 'google/flan-t5-large' loaded successfully on device: 'cuda'.


In [None]:
print("✅ STEP 5: Defining the prompt engineering function...")

def create_prompt(data_row: pd.Series) -> str:
    """
    Converts a pandas Series into a structured, few-shot prompt.
    """
    # --- FEW-SHOT EXAMPLES ---
    # Provide clear examples of the desired input-output format.
    examples = """
Analyze the following machine usage log and generate a concise, actionable business insight.

---
**Example Input:**
Machine Id: PROD-SRV-9001
Cpu Usage Percent: 95.8
Error Count: 0

**Example Insight:**
High CPU usage (95.8%) on PROD-SRV-9001 suggests a potential performance bottleneck. Recommend analyzing top processes.
---
**Example Input:**
Machine Id: PROD-SRV-9002
Cpu Usage Percent: 45.1
Error Count: 8

**Example Insight:**
Critical alert: 8 errors detected on PROD-SRV-9002. Immediate investigation is required to ensure system stability.
---
"""

    # --- ACTUAL TASK ---
    # Format the current row's data for the actual task.
    current_log = ""
    for col, value in data_row.drop('insight').items():
        current_log += f"{col.replace('_', ' ').title()}: {value}\n"

    # Combine examples with the actual task.
    final_prompt = examples + f"""**Actual Input:**
{current_log}
**Actual Insight:**"""

    return final_prompt

# Display an example prompt from the first row of the test data
example_prompt = create_prompt(test_df.iloc[0])
print("... Prompt function created. Example prompt:\n")
print(example_prompt)

✅ STEP 5: Defining the prompt engineering function...
... Prompt function created. Example prompt:


Analyze the following machine usage log and generate a concise, actionable business insight.

---
**Example Input:**
Machine Id: PROD-SRV-9001
Cpu Usage Percent: 95.8
Error Count: 0

**Example Insight:**
High CPU usage (95.8%) on PROD-SRV-9001 suggests a potential performance bottleneck. Recommend analyzing top processes.
---
**Example Input:**
Machine Id: PROD-SRV-9002
Cpu Usage Percent: 45.1
Error Count: 8

**Example Insight:**
Critical alert: 8 errors detected on PROD-SRV-9002. Immediate investigation is required to ensure system stability.
---
**Actual Input:**
Machine Id: PROD-SRV-1001
Cpu Usage Percent: 95.1
Memory Usage Gb: 16.94
Disk Io Mbps: 124.28
Error Count: 0

**Actual Insight:**


In [None]:
print("✅ STEP 6: Running inference on the test data...")

# We will run this on the full test set (50 samples)
# This may take a few minutes depending on the Colab GPU allocated.
test_subset_df = test_df

# Lists to store the results
generated_insights = []
reference_insights = test_subset_df['insight'].tolist()

# Use tqdm for a live progress bar
for index, row in tqdm(test_subset_df.iterrows(), total=test_subset_df.shape[0], desc="Generating Insights"):
    prompt = create_prompt(row)

    # Get model output
    output = summarizer_pipeline(
    prompt,
    max_length=128,
    num_beams=5,
    temperature=0.9,       # Increase from default 1.0, makes output less predictable
    length_penalty=2.0,    # Encourages the model to generate longer, more complete sentences
    no_repeat_ngram_size=2,
    early_stopping=True
)

    generated_insights.append(output[0]['generated_text'])

print(f"\n... Inference complete. Generated {len(generated_insights)} insights.")

✅ STEP 6: Running inference on the test data...


Generating Insights:   0%|          | 0/50 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will t


... Inference complete. Generated 50 insights.


In [None]:

print("✅ STEP 7: Calculating lexical similarity metrics...")

# Load the evaluation metrics from the 'evaluate' library
rouge_metric = evaluate.load('rouge')
bleu_metric = evaluate.load('bleu')

# --- ROUGE Calculation ---
rouge_results = rouge_metric.compute(predictions=generated_insights, references=reference_insights)
print("\n--- ROUGE Scores ---")
for key, value in rouge_results.items():
    print(f"  {key:<8}: {value:.4f}")

# --- BLEU Calculation ---
# BLEU requires references to be in a list of lists
bleu_results = bleu_metric.compute(predictions=generated_insights, references=[[ref] for ref in reference_insights])
print("\n--- BLEU Score ---")
print(f"  BLEU    : {bleu_results['bleu']:.4f}")

✅ STEP 7: Calculating lexical similarity metrics...

--- ROUGE Scores ---
  rouge1  : 0.3171
  rouge2  : 0.2201
  rougeL  : 0.2676
  rougeLsum: 0.2679

--- BLEU Score ---
  BLEU    : 0.0935


In [None]:

print("✅ STEP 8: Calculating semantic similarity...")

# Load a pre-trained model for creating sentence embeddings
similarity_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

# Generate embeddings for both lists of insights
generated_embeddings = similarity_model.encode(generated_insights, convert_to_tensor=True)
reference_embeddings = similarity_model.encode(reference_insights, convert_to_tensor=True)

# Compute cosine similarity between each pair of embeddings
cosine_scores = util.cos_sim(generated_embeddings, reference_embeddings)

# Extract the diagonal scores (the similarity of each generated text with its corresponding reference)
semantic_scores = [cosine_scores[i][i].item() for i in range(len(generated_insights))]
average_semantic_score = np.mean(semantic_scores)

print("\n--- Semantic Similarity Score ---")
print(f"  Average Cosine Similarity: {average_semantic_score:.4f}")

✅ STEP 8: Calculating semantic similarity...

--- Semantic Similarity Score ---
  Average Cosine Similarity: 0.7159


In [None]:
#
# CELL 9: DISPLAY AND ANALYZE EXAMPLE RESULTS (CORRECTED)
#
print("✅ STEP 9: Qualitative analysis of generated examples...")

# Create a DataFrame for easy comparison
results_df = pd.DataFrame({
    # THIS IS THE CORRECTED LINE:
    'Input Log Data': [create_prompt(test_df.iloc[i]).split('**Actual Input:**')[1].split('**Actual Insight:**')[0].strip() for i in range(5)],
    'Reference Insight (Ground Truth)': reference_insights[:5],
    'Generated Insight (Flan-T5)': generated_insights[:5],
    'Semantic Score': semantic_scores[:5]
})

# Set pandas display options for better readability of long text
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', 200)

print("\n--- Comparison of Top 5 Results ---")
display(results_df.style.set_properties(**{'text-align': 'left', 'white-space': 'normal'}))

✅ STEP 9: Qualitative analysis of generated examples...

--- Comparison of Top 5 Results ---


Unnamed: 0,Input Log Data,Reference Insight (Ground Truth),Generated Insight (Flan-T5),Semantic Score
0,Machine Id: PROD-SRV-1001 Cpu Usage Percent: 95.1 Memory Usage Gb: 16.94 Disk Io Mbps: 124.28 Error Count: 0,High CPU usage (95.1%) on PROD-SRV-1001 suggests a potential performance bottleneck. Recommend analyzing top processes.,The CPU usage (95.1%) on PROD-SRV-1001 suggests a potential performance bottleneck. Recommend analyzing top processes. ---,0.980874
1,Machine Id: PROD-SRV-1002 Cpu Usage Percent: 32.25 Memory Usage Gb: 13.95 Disk Io Mbps: 237.53 Error Count: 0,Unusually high disk I/O (237.53 MB/s) on PROD-SRV-1002 could degrade storage performance. Monitor read/write operations.,The CPU usage on PROD-SRV-1002 was 32.25 percent. The memory usage was 13.95 percent and the disk Io Mbps was 237.53. The error count was 0.,0.718645
2,Machine Id: PROD-SRV-1003 Cpu Usage Percent: 97.75 Memory Usage Gb: 30.24 Disk Io Mbps: 236.14 Error Count: 10,Critical alert: 10 errors detected on PROD-SRV-1003. Immediate investigation is required to ensure system stability.,"Machine Id: PROD-SRV-1003 has a CPU usage of 97.75 percent, Memory Usage Gb: 30.24 Disk Io Mbps: 236.14 and an error count of 10.",0.602368
3,Machine Id: PROD-SRV-1004 Cpu Usage Percent: 92.05 Memory Usage Gb: 19.46 Disk Io Mbps: 62.51 Error Count: 1,High CPU usage (92.05%) on PROD-SRV-1004 suggests a potential performance bottleneck. Recommend analyzing top processes.,The CPU usage percentage of PROD-SRV-1004 is 92.05. The memory usage is 19.46 GB and the disk Io Mbps are 62.51.,0.79448
4,Machine Id: PROD-SRV-1005 Cpu Usage Percent: 51.78 Memory Usage Gb: 17.61 Disk Io Mbps: 91.28 Error Count: 0,System PROD-SRV-1005 is operating within normal performance parameters. No action is required at this time.,The CPU usage percentage of PROD-SRV-1005 is 51.78. The memory usage is 17.61 GB and the disk Io Mbps is 91.28.,0.627327


In [None]:

print("✅ Final Step: Generating the .txt report file...")

def generate_report_text(model_name, rouge_scores, bleu_score, sem_score, results_dataframe, prompt_func, sample_row):
    """
    Formats all experiment results into a single string for the report.
    """

    # --- SECTION 1: MODEL DETAILS ---
    section1 = f"""
======================================================================
         ASSIGNMENT 4: TABLE-TO-INSIGHTS GENERATION REPORT
======================================================================

----------------------------------------------------------------------
SECTION 1: MODEL DETAILS
----------------------------------------------------------------------

1.  **Model Name**: {model_name}

2.  **Model Architecture**: The model is based on the Text-to-Text Transfer Transformer (T5) architecture.
    It is an encoder-decoder model that is pre-trained on a mixture of unsupervised and supervised tasks
    and then fine-tuned on a large collection of instruction-based datasets ("instruction tuning").

3.  **Number of Parameters**: The 'large' version has approximately 780 Million parameters.

4.  **Reason for Choice**:
    * **Instruction-Tuned**: Flan-T5 is designed to perform well on unseen tasks by following natural
        language instructions (prompts), which is ideal for this assignment.
    * **Performance**: The 'large' variant offers a strong balance between high performance and the
        ability to run on freely available hardware like Google Colab's T4 GPU.
"""

    # --- SECTION 2: EXPERIMENT RESULTS ---

    # Get the prompt template structure
    prompt_template = prompt_func(sample_row).split("--- LOG DATA ---")[0] + "\\n--- LOG DATA ---\\n{LOG DATA}\\n--- INSIGHT ---"

    # Format the metrics
    metrics_text = f"""
----------------------------------------------------------------------
SECTION 2: EXPERIMENT RESULTS
----------------------------------------------------------------------

1.  **Prompt Used for Generation**:
    The following prompt template was used to instruct the model. Each row from the
    test data was formatted and inserted into the {{LOG DATA}} placeholder.

    ```
    {prompt_template}
    ```

2.  **Performance Metrics**:
    * **ROUGE (Recall-Oriented Understudy for Gisting Evaluation)**:
        - ROUGE-1 (Unigram Overlap):   {rouge_scores['rouge1']:.4f}
        - ROUGE-2 (Bigram Overlap):    {rouge_scores['rouge2']:.4f}
        - ROUGE-L (Longest Subsequence): {rouge_scores['rougeL']:.4f}

    * **BLEU (Bilingual Evaluation Understudy)**:
        - BLEU Score:                  {bleu_score['bleu']:.4f}

    * **Semantic Match Score**:
        - Average Cosine Similarity:   {sem_score:.4f}
"""

    # Format the five examples
    examples_text = "\n3.  **Five Example Results**:\n"
    for index, row in results_dataframe.head(5).iterrows():
        examples_text += f"""
    ------------------------------- Example {index + 1} -------------------------------

    **INPUT LOG DATA**:
    {row['Input Log Data']}

    **REFERENCE INSIGHT (Ground Truth)**:
    {row['Reference Insight (Ground Truth)']}

    **GENERATED INSIGHT (Flan-T5)**:
    {row['Generated Insight (Flan-T5)']}

    **SEMANTIC SIMILARITY SCORE**: {row['Semantic Score']:.4f}
    """

    # --- Combine all sections ---
    final_report = section1 + metrics_text + examples_text + "\n\n========================= END OF REPORT ========================="
    return final_report.strip()


# --- Main execution ---
# Gather all the necessary variables from the notebook
report_content = generate_report_text(
    model_name=MODEL_NAME,
    rouge_scores=rouge_results,
    bleu_score=bleu_results,
    sem_score=average_semantic_score,
    results_dataframe=results_df,
    prompt_func=create_prompt,
    sample_row=test_df.iloc[0] # Used to generate the prompt template
)

# Write the report string to a .txt file
report_filename = "assignment_report.txt"
with open(report_filename, 'w', encoding='utf-8') as f:
    f.write(report_content)

print(f"\n✅ Report successfully generated and saved as '{report_filename}'.")

✅ Final Step: Generating the .txt report file...

✅ Report successfully generated and saved as 'assignment_report.txt'.
