In [None]:
import numpy
import matplotlib.pyplot as plt
import cv2
import os
import random
import numpy as np

In [None]:
input_img = cv2.imread('g:\Images\Original\image_1721814223451221704.png')
plt.imshow(input_img)

In [None]:
def calculate_snr(image):
    
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    mean_intensity = np.mean(image)
    variance = np.var(image)
    snr = 10 * np.log10(mean_intensity**2 / variance)

    return snr

In [None]:
calculate_snr(input_img)

In [None]:
def calculate_features(image):
    # Create ORB detector
    orb = cv2.ORB_create()

    # Detect keypoints using ORB
    keypoints = orb.detect(image, None)

    # Compute descriptors
    keypoints, descriptors = orb.compute(image, keypoints)

    # Draw the detected keypoints on the image
    output_image = cv2.drawKeypoints(image, keypoints, None, color=(0, 255, 0), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

    # Count the number of detected features
    num_features = len(keypoints)
    # print(f"Number of ORB features detected: {num_features}")

    # Display the image with keypoints
    plt.imshow(output_image)
    plt.axis('off')
    plt.show()
    
    return num_features

calculate_features(input_img)



In [None]:
def CLAHE(image):

    if len(image.shape) == 3:
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray_image = image

    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))

    clahe_image = clahe.apply(gray_image)

    if len(image.shape) == 3:
        clahe_image = cv2.cvtColor(clahe_image, cv2.COLOR_GRAY2BGR)

    return clahe_image

In [None]:
def white_balance(image):
   
    lab_image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)

    l, a, b = cv2.split(lab_image)

    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)

    balanced_lab_image = cv2.merge((cl, a, b))

    balanced_image = cv2.cvtColor(balanced_lab_image, cv2.COLOR_LAB2BGR)

    return balanced_image



In [None]:
def Contrast_Up(image):
    
    contrasted_image = cv2.convertScaleAbs(image, alpha=4.0, beta=0)
    return contrasted_image

def Contrast_Down(image):
    
    contrasted_image = cv2.convertScaleAbs(image, alpha=0.2, beta=0)
    return contrasted_image

def Brightness_Up(image):
    
    brightened_image = cv2.convertScaleAbs(image, alpha=1.0, beta=150)
    return brightened_image

def Brightness_Down(image):
    
    darkened_image = cv2.convertScaleAbs(image, alpha=1.0, beta=10)
    return darkened_image

In [None]:
class Agent:
    def __init__(self,image):
        self.actions = ['WB','C_Up','C_Down','Bs_Up','B_Down','CLAHE']
        self.states = ['F0','F1','F2','F3','F4','F5']
        self.rewards = [-5,-1,1,2,3,4,5]
        self.target_reached = 0
        # self.step_limit = 6
        self.memory = []
        self.image = image
        self.steps = 0
        self.cumulative_reward = 0
        self.learning_rate = 0.1
        self.discount_factor = 0.9
        self.exploration_prob = 0.6
        self.num_episodes = 20
        self.next_img = numpy.zeros_like(self.image)
        self.action_taken = []

        self.Q = np.zeros((len(self.states), len(self.actions)))

    def check_state(self,image):
        num_of_features = calculate_features(image)
        if num_of_features < 0:
            return 'F0'
        elif num_of_features >= 0 and num_of_features < 100 :
            return 'F1'
        elif num_of_features <=200 and num_of_features > 100:
            return 'F2'
        elif num_of_features <=300 and num_of_features > 200:
            return 'F3'
        elif num_of_features <=400 and num_of_features > 300:
            return 'F4'
        elif  num_of_features > 400:
            return 'F5'
    
    def get_feature_difference(self,ft1,ft2):
        i1 = self.states.index(ft1)
        i2 = self.states.index(ft2)
        return (i2-i1) * 100

    def update_reward(self,st1,st2):
        print(st2,st1)
        feature_difference = self.get_feature_difference(st2,st1)
        
        if feature_difference <0:
            return -5
        elif feature_difference == 0:
            return -1
        elif feature_difference <= 100 and feature_difference > 0:
            return 1
        elif feature_difference <= 200 and feature_difference > 100:
            return 2
        elif feature_difference <= 300 and feature_difference > 200:
            return 3
        elif feature_difference <= 400 and feature_difference > 300:
            return 4
        elif feature_difference > 400 :
            return 5
        
    def update_memory(self,a,s,sd,r):
        self.memory.append([a,s,sd,r])
    
    def perform_action(self,ind,img_inp):
        self.steps += 1
        if ind == 0:
            denoised = white_balance(img_inp)
            return denoised
        elif ind == 1:
            denoised = Contrast_Up(img_inp)
            return denoised      
        elif ind == 2:
            denoised = Contrast_Down(img_inp)
            return denoised
        elif ind == 3:
            denoised = Brightness_Up(img_inp)
            return denoised
        elif ind == 4:
            denoised = Brightness_Down(img_inp)
            return denoised
        elif ind == 5:
            denoised = CLAHE(self.image)
            return denoised
    
    def select_action(self,state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(len(self.actions))  # Exploration: Choose a random action
        else:
            return np.argmax(self.Q[state, :])  # Exploitation: Choose the action with the highest Q-value
    
    def next_state(self,image ,action):
        self.next_img = self.perform_action(action,image)
        #next_img = self.perform_action(action,image)
        return self.check_state(self.next_img), self.next_img
        
    def Q_train(self):
    
        curr_image = self.image

        for episode in range(self.num_episodes):
            init_state = self.check_state(self.image)
            state = self.states.index(self.check_state(self.image))

            for tries in range(100):
                curr_state = self.check_state(curr_image)
                action = self.select_action(state)
                print("Action : ",action)
                
                # Perform the selected action and observe the next state and reward
                if action == 0:
                    next_state, _ = self.next_state(curr_image,0)
                    print(curr_state,next_state)
                    reward = self.update_reward(next_state,curr_state)
                    self.update_memory(self.actions[action],curr_state,next_state,reward)
                    self.cumulative_reward += reward
                    self.action_taken.append(action)
                elif action == 1:
                    next_state, _ = self.next_state(curr_image,1)
                    print(curr_state,next_state)
                    reward = self.update_reward(next_state,curr_state)
                    self.update_memory(self.actions[action],curr_state,next_state,reward)
                    self.cumulative_reward += reward
                    self.action_taken.append(action)
                elif action == 2:
                    next_state, _ = self.next_state(curr_image,2)
                    print(curr_state,next_state)
                    reward = self.update_reward(next_state,curr_state)
                    self.update_memory(self.actions[action],curr_state,next_state,reward)
                    self.cumulative_reward += reward
                    self.action_taken.append(action)
                elif action == 3:
                    next_state, _ = self.next_state(curr_image,3) 
                    print(curr_state,next_state)
                    reward = self.update_reward(next_state,curr_state)
                    self.update_memory(self.actions[action],curr_state,next_state,reward)
                    self.cumulative_reward += reward
                    self.action_taken.append(action)
                elif action == 4:
                    next_state, _ = self.next_state(curr_image,4)
                    print(curr_state,next_state)
                    reward = self.update_reward(next_state,curr_state)
                    self.update_memory(self.actions[action],curr_state,next_state,reward)
                    self.cumulative_reward += reward
                    self.action_taken.append(action)
                elif action == 5:
                    next_state, _ = self.next_state(curr_image,5)
                    print(curr_state,next_state)
                    reward = self.update_reward(next_state,curr_state)
                    self.update_memory(self.actions[action],curr_state,next_state,reward)
                    self.cumulative_reward += reward
                    self.action_taken.append(action)
                
                print(self.memory)
                print('cumulative_reward = ',self.cumulative_reward)
                    
        
                # Update the Q-value using the Q-learning update rule 
                self.Q[self.states.index(curr_state), action] = self.Q[self.states.index(curr_state), action] + self.learning_rate * (reward + self.discount_factor * np.max(self.Q[self.states.index(next_state), :]) - self.Q[self.states.index(curr_state), action])
                
                state = self.states.index(next_state)  # Move to the next state
                print("After action: ",curr_state,next_state)
                print("Feature_Diff: ",self.get_feature_difference(curr_state,init_state))
                #terminating condition
                if self.get_feature_difference(curr_state,init_state) > 200 :
                    #plt.imshow(curr_image)
                    #plt.imshow(self.next_image)
                    print("Target Reached")
                    
                    break
                if tries == 50:
                    #plt.imshow(self.next_image)
                    #plt.imshow(curr_image)
                    print("Episode Force Stopped")


        # while self.target_reached == 0 and self.steps < self.step_limit:
        #     print('Training')
        #     init_features = len(fast12.detect(image, None))
        #     select_action = random.randint(0,5)
        #     denoised_img = self.perform_action(select_action,image)
        #     image = denoised_img
        #     temp_keypoints = fast12.detect(denoised_img, None)
        #     num_features = len(temp_keypoints)
        #     feature_diff = num_features - init_features
        #     print(feature_diff)
        #     self.update_memory(self.actions[select_action],self.check_state(init_features),self.check_state(num_features),self.update_reward)
        #     self.cumulative_reward += self.update_reward(feature_diff)
        #     print(self.cumulative_reward)
        #     if feature_diff > 100:
        #         print('Target_Reached',training_steps = self.steps,increase_in_features = feature_diff)
        #         self.target_reached == 1

In [None]:
A1 = Agent(input_img)

A1.Q_train()

In [None]:
print(A1.action_taken)
print(len(A1.action_taken))
print(A1.steps)

In [None]:
plt.imshow(A1.next_img)
cv2.imwrite("output_45.jpg", A1.next_img)

In [None]:
plt.imshow(input_img)

In [None]:
rewards = []
for i in A1.memory:
    rewards.append(i[-1])

In [None]:
def Cumulative_sum(lists):   
    cum_list = []   
    length = len(lists)   
    cum_list = [sum(lists[0:x:1]) for x in range(0, length+1)]   
    return cum_list[1:]  
   
print (Cumulative_sum(rewards)) 

In [None]:
# import numpy as np


# # Define Q-table to store Q-values for each state-action pair
# num_states = 5
# num_actions = 4  # Up, Down, Left, Right
# Q = np.zeros((num_states, num_actions))

# # Define parameters for the Q-learning algorithm
# learning_rate = 0.1
# discount_factor = 0.9
# exploration_prob = 0.2
# num_episodes = 1000

# # Define a function to convert a state from the environment to an index
# # def state_to_index(state):
# #     return np.ravel_multi_index(np.where(env == state), env.shape)

# # Define a function to select an action using epsilon-greedy policy
# def select_action(state):
#     if np.random.rand() < exploration_prob:
#         return np.random.choice(num_actions)  # Exploration: Choose a random action
#     else:
#         return np.argmax(Q[state, :])  # Exploitation: Choose the action with the highest Q-value

# # Q-learning training loop
# for episode in range(num_episodes):
#     state = self.states.index(check_state(self.image))  # Start from the initial state 'S'
    
#     while True:
#         action = select_action(state)
        
#         # Perform the selected action and observe the next state and reward
#         if action == 0:  # Up
#             next_state = state_to_index(env[np.where(env == 'S')])  # Stay at 'S'
#             reward = 0
#         elif action == 1:  # Down
#             next_state = state_to_index(env[np.where(env == 'H')])  # Hit an obstacle, stay there
#             reward = 0
#         elif action == 2:  # Left
#             next_state = state_to_index(env[np.where(env == 'S')])  # Stay at 'S'
#             reward = 0
#         elif action == 3:  # Right
#             next_state = state_to_index(env[np.where(env == ' ')] or env[np.where(env == 'G')])  # Move to an empty cell or the goal
#             reward = 1 if env[np.where(env == 'G')] else 0  # +1 if reached the goal, else 0
        
#         # Update the Q-value using the Q-learning update rule
#         Q[state, action] = Q[state, action] + learning_rate * (reward + discount_factor * np.max(Q[next_state, :]) - Q[state, action])
        
#         state = next_state  # Move to the next state
        
#         if env[np.where(env == 'G')]:  # Reached the goal
#             break

# # Now, the Q-table contains learned Q-values, and the agent can use them to make decisions
# # You can use the Q-table to find the optimal policy or evaluate the agent's performance
