<a href="https://colab.research.google.com/github/JiyaPaliwal14/Machine-Learning/blob/main/reinforcement/reinforcement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score
import numpy as np

In [14]:
data = {
    'f1': np.random.normal(0, 1, 200),
    'f2': np.random.normal(0, 1, 200),
    'target' : [0]*180 + [1]*20
}

df = pd.DataFrame(data)

In [15]:
X = df.drop('target', axis=1)
y = df['target']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
alpha = 0.5
gamma = 0.8
exp = 0.2
eps = 20

In [17]:
actions = [(k, ratio) for k in [3,5,7] for ratio in [0.3, 0.5, 0.7, 1.0]]
q_table = {action:0 for action in actions}

In [19]:
def eval_action(k, ratio):
  sm = SMOTE(k_neighbors=k, sampling_strategy=ratio, random_state=42)
  X_res, y_res = sm.fit_resample(X_train, y_train)
  clf = RandomForestClassifier(random_state=42)
  clf.fit(X_res, y_res)
  y_pred = clf.predict(X_test)
  f1 = f1_score(y_test, y_pred)
  return f1

In [21]:
best_f1 = 0

for episode in range(eps):
  if np.random.rand() < exp:
    action = actions[np.random.randint(len(actions))]
  else:
    action = max(q_table, key=q_table.get)

  k, ratio = action
  f1 = eval_action(k, ratio)
  reward = f1 - best_f1
  best_f1 = max(best_f1, f1)

  q_table[action] += alpha * (reward + gamma*best_f1 - q_table[action])

  print(f"episode: {episode+1:2d} | action: (k: {k}, ratio: {ratio}) | reward: {reward:.3f} | f1: {f1:.3f}")

episode:  1 | action: (k: 7, ratio: 0.3) | reward: 0.222 | f1: 0.222
episode:  2 | action: (k: 5, ratio: 1.0) | reward: -0.097 | f1: 0.125
episode:  3 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode:  4 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode:  5 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode:  6 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode:  7 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode:  8 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode:  9 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode: 10 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode: 11 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode: 12 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode: 13 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode: 14 | action: (k: 7, ratio: 0.3) | reward: 0.000 | f1: 0.222
episode: 15 | action: (k: 7, rati

In [22]:
best = max(q_table, key=q_table.get)
print(f"best action  = {best}")
print("best f1 = ", best_f1)

best action  = (7, 0.3)
best f1 =  0.2222222222222222
