<a href="https://colab.research.google.com/github/SIVAGORAM/DEEPLEARNING/blob/main/RECOMMENDATION_SYSTEM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#9921005015-GORAM SIVA PRASAD
#RECOMMENDATION SYSTEM
import numpy as np

class PricingEnvironment:
    def __init__(self, max_demand, max_price):
        self.max_demand = max_demand
        self.max_price = max_price
        self.current_demand = 0

    def reset(self):
        self.current_demand = np.random.randint(1, self.max_demand + 1)
        return self.current_demand

    def step(self, price):
        revenue = min(price, self.current_demand) * price
        self.current_demand -= min(price, self.current_demand)
        done = self.current_demand == 0
        return revenue, done

class QLearningAgent:
    def __init__(self, num_prices, max_price, learning_rate=0.1, gamma=0.9, epsilon=0.1):
        self.num_prices = num_prices
        self.max_price = max_price
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = np.zeros((num_prices,))

    def choose_price(self, demand):
        if np.random.rand() < self.epsilon:
            return np.random.randint(1, self.max_price + 1)
        else:
            return np.argmax(self.q_table) + 1

    def update_q_table(self, price, demand, revenue):
        old_q_value = self.q_table[price - 1]
        max_future_q_value = np.max(self.q_table)
        new_q_value = (1 - self.learning_rate) * old_q_value + self.learning_rate * (revenue + self.gamma * max_future_q_value)
        self.q_table[price - 1] = new_q_value

# Parameters
max_demand = 10
max_price = 10
num_episodes = 1000
learning_rate = 0.1
gamma = 0.9
epsilon = 0.1

# Initialize environment and agent
env = PricingEnvironment(max_demand, max_price)
agent = QLearningAgent(num_prices=max_price, max_price=max_price, learning_rate=learning_rate, gamma=gamma, epsilon=epsilon)

# Training loop
for episode in range(num_episodes):
    demand = env.reset()
    done = False
    total_revenue = 0
    while not done:
        price = agent.choose_price(demand)
        revenue, done = env.step(price)
        total_revenue += revenue
        agent.update_q_table(price, demand, revenue)
    print(f"Episode {episode + 1}, Total Revenue: ${total_revenue:.2f}")

# Once trained, use the agent to choose prices
demand = env.reset()
optimal_price = agent.choose_price(demand)
print(f"Optimal Price for current demand ({demand}): ${optimal_price}")


Episode 1, Total Revenue: $14.00
Episode 2, Total Revenue: $21.00
Episode 3, Total Revenue: $14.00
Episode 4, Total Revenue: $21.00
Episode 5, Total Revenue: $7.00
Episode 6, Total Revenue: $56.00
Episode 7, Total Revenue: $64.00
Episode 8, Total Revenue: $70.00
Episode 9, Total Revenue: $18.00
Episode 10, Total Revenue: $21.00
Episode 11, Total Revenue: $28.00
Episode 12, Total Revenue: $35.00
Episode 13, Total Revenue: $28.00
Episode 14, Total Revenue: $54.00
Episode 15, Total Revenue: $28.00
Episode 16, Total Revenue: $35.00
Episode 17, Total Revenue: $18.00
Episode 18, Total Revenue: $42.00
Episode 19, Total Revenue: $7.00
Episode 20, Total Revenue: $53.00
Episode 21, Total Revenue: $73.00
Episode 22, Total Revenue: $70.00
Episode 23, Total Revenue: $35.00
Episode 24, Total Revenue: $14.00
Episode 25, Total Revenue: $7.00
Episode 26, Total Revenue: $70.00
Episode 27, Total Revenue: $28.00
Episode 28, Total Revenue: $14.00
Episode 29, Total Revenue: $56.00
Episode 30, Total Revenue: