In [21]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import Dataset


In [22]:
data = [
    {"text": "University: University of Moratuwa.\nDepartment: Electrical Engineering.\nLocation: Sumanadasa Building (Levels 1 & 2), Katubedda, Moratuwa.\nContact: Tel +94 11 2650301 Ext: 3200–3201, Fax +94 11 2650622."},
    {"text": "Head of Department: Prof. W. D. A. S. Wijayapala (appointed effective 20 Oct 2022). Last HOD was Prof. Udayanga Hemapala (from Jan 2021). Research interests: energy planning, power systems, renewable energy, industrial safety."},
    {"text": "Senior Professors: Snr. Prof. Sisil Kumarawadu, Snr. Prof. Nalin Wickramarachchi.\nMembers focusing on Quality Assurance: Prof. D. P. Chandima (QA Coordinator). Student Counsellor: Prof. L. N. Widanagama Arachchige."},
    {"text": "Professors: Prof. K.T.M.U. Hemapala; Prof. A.G.B.P. Jayasekara; Prof. D.P. Chandima; Prof. L.N. Widanagama Arachchige; Prof. W.D.A.S. Rodrigo."},
    {"text": "Associate Professor: Prof. J.P. Karunadasa. Senior Lecturer Gr. I: Dr. A.M.H.S. Abeykoon; Dr. S.K. Abeygunawardane; Dr. R.M.T. Damayanthi; Dr. W.D. Prasad; Dr. Rasara Samarasinghe."},
    {"text": "Senior Lecturer Gr. II: Dr. V. Logeeshan; Dr. R.M. Maheshi Ruwanthika; Dr. S.D.M.S. Gunawardana; Ms. K.A.T. Lasagani; Lecturer (Probationary): Mr. S.R.A. Bolonne."},
    {"text": "Academic Coordinators (UG BSc): Semester 1 – Dr. W.D. Prasad; Semester 2 – Dr. R. Samarasinghe; Sem 3 – Prof. A.G.B.P. Jayasekara; Sem 4 – Dr. R.M.T. Damayanthi; Sem 5 – Mrs. K.A.T. Lasagani; Sem 6 – Dr. S.D.M.S. Gunawardana; Sem 7 – Dr. R.M.M. Ruwanthika; Sem 8 – Prof. W.D.A.S. Rodrigo."},
    {"text": "PG Coordinators: MSc/PG Diploma Electrical Engineering – Prof. L. N. Widanagama Arachchige; Electrical Installations – Dr. R.M.T. Damayanthi; Industrial Automation – Prof. A.G.B.P. Jayasekara; PG Certificate Industrial Automation – Dr. L. Velmanickam."},
    {"text": "Laboratories: Electrical Measurement; Power Systems; Electrical Machines; Controlled Drives; Nuclear Measurements; High Voltage; Computer Systems; Wiring Diagram & Electrical Construction."},
    {"text": "Student Body: approx. 70 undergraduate intake; ~100 postgraduate students annually; department hosts short continuing‑education courses for engineers; strong consultancy links to CEB, SLT and power industry."},
    {"text": "Support staff: Clerk assisting HOD – Mr. L. Kushan. Computer services: ~35 PCs, NT & UNIX servers, campus LAN and LEARN network in labs and staff rooms."},
    {"text": "Notable alumnus: Emeritus Prof. Sam Karunaratne — department’s first professor (1969–2002), founded Computer Engineering dept at UoM; Professor Mahinda Vilathgamuwa — notable power engineering academic trained at UoM and Cambridge."}
]

dataset = Dataset.from_list(data)
print(dataset)


Dataset({
    features: ['text'],
    num_rows: 12
})


In [None]:
dataset = Dataset.from_list(data)
model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenziser = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [None]:
def tokenize_function(examples):
    tokens = tokenziser(examples['text'],
                        truncation=True,
                        max_length=64,
                        padding='max_length')
    tokens['labels'] = tokens('input_ids'.copy())

In [None]:
tokenized_dataset = dataset.map(tokenize_function,batched=True)

In [None]:
training_args = TrainingArguments(
    output_dir= './fast_finetuned',
    per_device_train_batch_size=1,
    max_train_epochs=1,
    max_steps = 10,
    logging_steps=1,
    no_cuda=True,
    fp16=False
)

In [None]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_dataset
)

In [None]:
trainer.train()

In [None]:
trainer.save_model('./fast_finetuned')
tokenizer.save_pretrained('./fast_finetuned')

In [None]:
def generate_response(prompt, max_length = 64):
    inputs = tokenizer(prompt, return_tensors = 'pt')
    outputs = model.generate(**inputs,max_length = max_length)
    return tokenizer.decode(outputs[0], skip_special_tokens = True)

In [None]:
prompt = "Summarize the key academic staff and research areas of the University of Moratuwa Electrical Engineering department."
print("🔹 Model Output:", generate_response(prompt))