<a href="https://colab.research.google.com/github/JayR1031/reinforcement-language-model/blob/main/Reinforcement_Learning_with_Language_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd #for reading CSV dataset
import random  # For random sampling in generation
from collections import defaultdict, Counter #For counting n-gram transitions

# ==== Build Corpus From CSV ===

In [None]:
def build_corpus_from_csv(csv_path):
    df = pd.read_csv(csv_path)
    fields_to_use = ['Title', 'ExperienceLevel', 'Skills', 'Responsibilities', 'Keywords']
    def clean_and_combine_fields(row, fields):
        combined = []
        for field in fields:
            val = str(row[field]) if pd.notnull(row[field]) else ''
            val = val.replace(';', ',')
            val = val.replace('\n', '').replace('\r', '')
            combined.append(val.strip())
        return ' '.join(combined)
    corpus_list = [clean_and_combine_fields(row, fields_to_use) for _, row in df.iterrows()]
    eos_token = '<EOS>'
    corpus = f'{eos_token} '.join(corpus_list) + f' {eos_token}'
    return corpus


# ==== Character N-Gram Language Model ====

In [None]:
class CharNGramLanguageModel:
    def __init__(self, n, text):
        self.n = n
        self.model = defaultdict(Counter)
        eos_token = ' '

    def update_probs_with_min_plus_one(self):
        #converts raw counts to positive, normalizes weights
        self.probs = {}
        for context, counter  in self.model.items():
            min_count = min(counter.values())
            #subtract min and add 1: ensures every weight is >= 1
            new_counts = {char: count - min_count + 1 for char, count in counter.items()}
            total_count = sum(new_counts.values())
            self.probs[context] = {char: val / total_count for char, val in new_counts.items()}

    def generate_character(self, prompt):
        context = prompt [-self.n:]
        #Ensures probabilities are updated before generation
        if context in self.probs:
            choices, weights = zip(*self.probs[context].items())
            #Pick next char using weighted probabilities
            return random.choices(choices, weights=weights)[0]
        else:
            #Back-off: unseen context, pick at random
            return random.choice(list('abcdefghijklmnopqrstuvwxyz'))

    def generate(self, prompt, max_len=200):
        result = prompt
        for _ in range(max_len):
            next_char = self.generate_character(result)
            if next_char == '<EOS>':
                break
            result += next_char
        return result

In [None]:
def update_probs_with_min_plus_one(self):
        #converts raw counts to positive, normalizes weights
        self.probs = {}
        for context, counter  in self.model.items():
            min_count = min(counter.values())
            #subtract min and add 1: ensures every weight is >= 1
            new_counts = {char: count - min_count + 1 for char, count in counter.items()}
            total_count = sum(new_counts.values())
            self.probs[context] = {char: val / total_count for char, val in new_counts.items()}

In [None]:
def generate_character(self, prompt):
    context = prompt[-self.n:]
    if context in self.probs:
        choices, weights = zip(*self.probs[context].items())
        return random.choices(choices, weights=weights)[0]
    else:
        # Back-off must include <EOS>
        return random.choice(list('abcdefghijklmnopqrstuvwxyz <EOS>'))


In [None]:
def generate(self, prompt, max_len=200):
    result = prompt
    for _ in range(max_len):
        next_char = self.generate_character(result)
        if next_char == '<EOS>':
            break
        result += next_char
    return result


#==== Reinforcement Learning for N-gram Model ===

In [None]:
class ReinforcementLearning:
    def __init__(self, model, alpha=0.1, gamma=0.9):
        self.model = model # CharNGramLanguageModel
        self.alpha = alpha # Learning rate
        self.gamma = gamma # Discount Factor (not used but can be)


    def Q_learning(self, criteria, num_prompts=1, iterations_per_prompts=30):
        #Apply Q-Learning to update n-gram weights based on reward
        for _ in range(num_prompts):
            prompt = input("Enter a prompt for RL training: ")
            for _ in range(iterations_per_prompts):
                generated = self.model.generate(prompt, max_len=200)
                reward = criteria(generated)
                #Update model weights for each context/next_char in generated sequence
                for i in range(len(generated) - self.model.n):
                    context = generated[i:i+self.model.n]
                    next_char = generated[i + self.model.n]
                    #Basic Q-update: add (alpha * reward) to weight
                    self.model.model[context][next_char] += self.alpha * reward
        #After RL training, convert all counts to positive, normalized probabilities
        self.model.update_probs_with_min_plus_one()


In [None]:
# ==== 5. Testing Function ====
def test_model(model, prompt, num_samples=10):
    lengths = []
    samples = []
    for _ in range(num_samples):
        generated = model.generate(prompt, max_len=200)
        lengths.append(len(generated))
        samples.append(generated)
    print(f"Average length: {sum(lengths)/num_samples:.2f}")
    print("Sample generated text:", samples[0])



In [None]:
# ==== 6. Main Workflow: Train and Test Model ====

# -- Prepare training corpus --
csv_path = "/content/job_dataset.csv"         # Update as needed
corpus = build_corpus_from_csv(csv_path)

In [None]:
# -- Train base model --
n = 5
model = CharNGramLanguageModel(n, corpus)
model.update_probs_with_min_plus_one()   # Initial positive weights for sampling

In [None]:
# -- Test model BEFORE RL --
prompt = "Python Programming"
print("Testing BEFORE RL:")
test_model(model, prompt)

Testing BEFORE RL:
Average length: 218.00
Sample generated text: Python Programmingqwygsciwwiluymvlnessrsgauaqmtkytlnzfxdhpyylztvbpljjohagzolwcrbsyntuoaipxmwspqskotgsnowyieweckpqnvrvowbyxhqnhkcxizxitxgsxzecyvpkeubkofkxdonjvlnfshzqxxqjklmwxuzggkrsdrdszgpkpkbgwkpajdbtyzsejntiruktnfosd


In [None]:
# -- Apply RL with criteria: shorter text will be rewarded using -len(x) --
rl = ReinforcementLearning(model)
rl.Q_learning(lambda x: -len(x), num_prompts=1, iterations_per_prompts=300)

Enter a prompt for RL training: Python


In [None]:
# -- Test model AFTER RL --
print("Testing AFTER RL:")
test_model(model, prompt)

Testing AFTER RL:
Average length: 218.00
Sample generated text: Python Programmingbxopizhnvobjlpzcmqyhigyxkpsbtzlseinpabljnirrensyzwaiuafcljvauhkbxgrrnpwafwrynmjpsgrjblzlpvhlevxovbdizwaxcfiktpkuhbogldkkjoxzkadjvurrmdppzmfjdohzxsizvcwffpxcvyebpgsqeyakspcakpqqernmesinugtrbdojldasqnth
