In [1]:
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from tqdm import tqdm
import evaluate
from transformers import StoppingCriteria, StoppingCriteriaList
from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training

import warnings
warnings.filterwarnings("ignore")

In [2]:
model_name = "EleutherAI/pythia-410m"

In [3]:
jsonl_path = "data/datascience_1000_multistep.jsonl"
data = []
with open(jsonl_path, "r", encoding="utf-8") as f:
    for line in f:
        data.append(json.loads(line))

# Use a small sample for quick evaluation
data = data[:100]

# Base Model

In [4]:
#Load model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

base_model = prepare_model_for_kbit_training(base_model)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

In [5]:
predictions = []
references = []

for sample in tqdm(data):
    instruction = sample["instruction"]
    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
    reference = sample["output"]

    inputs = tokenizer(prompt, return_tensors="pt").to(base_model.device)
    input_len = inputs["input_ids"].shape[1]
    with torch.no_grad():
        outputs = base_model.generate(**inputs, max_new_tokens=192, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Optional: Strip input prompt from decoded output if it's included
    response = response.replace(prompt, "").strip()

    predictions.append(response)
    references.append(reference)

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [13:24<00:00,  8.04s/it]


In [6]:
rouge = evaluate.load("rouge")
rouge_results = rouge.compute(predictions=predictions, references=references)

In [7]:
print("\n=== ROUGE Scores ===")
for k, v in rouge_results.items():
    print(f"{k}: {v:.4f}")


=== ROUGE Scores ===
rouge1: 0.0407
rouge2: 0.0103
rougeL: 0.0366
rougeLsum: 0.0390


# FINE TUNED MODEL

In [9]:
model_path = "pythia-lora-final/"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="cuda")

In [10]:
model.eval()
predictions = []
references = []

for sample in tqdm(data):
    instruction = sample["instruction"]
    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
    reference = sample["output"]

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=192, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Optional: Strip input prompt from decoded output if it's included
    response = response.replace(prompt, "").strip()

    predictions.append(response)
    references.append(reference)

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [11:46<00:00,  7.06s/it]


In [11]:
rouge = evaluate.load("rouge")
rouge_results = rouge.compute(predictions=predictions, references=references)

In [12]:
print("\n=== ROUGE Scores ===")
for k, v in rouge_results.items():
    print(f"{k}: {v:.4f}")


=== ROUGE Scores ===
rouge1: 0.6084
rouge2: 0.5067
rougeL: 0.5852
rougeLsum: 0.6073


# Fine Tuned V2

In [13]:
model_path = "pythia-lora-V2/"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="cuda")

model.eval()
predictions = []
references = []

for sample in tqdm(data):
    instruction = sample["instruction"]
    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
    reference = sample["output"]

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_len = inputs["input_ids"].shape[1]

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=192, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Optional: Strip input prompt from decoded output if it's included
    response = response.replace(prompt, "").strip()

    predictions.append(response)
    references.append(reference)

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [33:43<00:00, 20.24s/it]


In [14]:
rouge = evaluate.load("rouge")
rouge_results = rouge.compute(predictions=predictions, references=references)

In [15]:
print("\n=== ROUGE Scores ===")
for k, v in rouge_results.items():
    print(f"{k}: {v:.4f}")


=== ROUGE Scores ===
rouge1: 0.8763
rouge2: 0.8164
rougeL: 0.8627
rougeLsum: 0.8734


In [33]:
instruction = data[28]["instruction"]
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
prompt

"### Instruction:\nLoad time series CSV 'data.csv', set 'timestamp' as index, resample weekly average of 'revenue', detect points > mean+2*std and annotate plot.\n\n### Response:\n"

In [34]:
print(data[28]["output"])

import pandas as pd
import matplotlib.pyplot as plt
df2 = pd.read_csv('data.csv', parse_dates=['timestamp'], index_col='timestamp')
weekly = df2['revenue'].resample('W').mean()
mean = weekly.mean(); std = weekly.std()
signal = weekly[weekly > mean + 2*std]
plt.plot(weekly.index, weekly)
plt.scatter(signal.index, signal, color='red')
plt.title('Weekly revenue with Anomalies')
plt.show()


In [35]:
outputs = base_model.generate(**inputs, max_new_tokens=192, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

### Instruction:
Load time series CSV 'data.csv', set 'timestamp' as index, resample weekly average of 'revenue', detect points > mean+2*std and annotate plot.

### Response:

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```

```



In [36]:
outputs = model.generate(**inputs, max_new_tokens=192, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

### Instruction:
Load time series CSV 'data.csv', set 'timestamp' as index, resample weekly average of 'revenue', detect points > mean+2*std and annotate plot.

### Response:
import pandas as pd
import matplotlib.pyplot as plt
data_clean = pd.read_csv('data.csv', parse_dates=['timestamp'], index_col='timestamp')
weekly = data_clean['revenue'].resample('W').mean()
mean = weekly.mean(); std = weekly.std()
signal = weekly[weekly > mean + 2*std]
plt.plot(weekly.index, weekly)
plt.scatter(signal.index, signal, color='red')
plt.title('Weekly revenue with Anomalies')
plt.show()
