#Intelligent Agents

#The Turing Test and the Chinese Room Concept
* Turing Test: A test proposed by Alan Turing to determine a machine's ability to exhibit intelligent behavior equivalent to, or indistinguishable from, that of a human. If a machine can pass the Turing Test, it can be considered intelligent.
* Chinese Room: A philosophical argument against the possibility of strong artificial intelligence. It suggests that even if a computer could convincingly simulate human intelligence by responding to questions in Chinese, it wouldn't truly understand the language—it would be akin to a person in a room following instructions without comprehension.

#Characteristics of a Problem
* Well-defined goal: The problem should have a clear objective.
* Initial state: The starting point from which the problem-solving process begins.
* Actions: Possible moves or operations that can be taken to transform states.
* Transition model: Rules that define the result of taking an action from a given state.
* Path cost: The cumulative cost associated with a sequence of actions to achieve the goal.

# Definition of Agents
* Agents: Entities that perceive their environment through sensors and act upon that environment through actuators to achieve their goals.
* Perception: Obtaining information from sensors about the environment.
* Action: Executing actions that affect the environment based on perception and goals.

# Nature of Agents
* Autonomous vs. controlled: Agents can operate autonomously or under human control.
* Reactive vs. deliberative: Reactive agents respond immediately to stimuli, while deliberative agents plan and reason before acting.
* Single vs. multi-agent: Agents can operate individually or collaborate with other agents.

# Philosophical and Ethical Issues
* Philosophical questions: What constitutes intelligence? Can machines have consciousness or emotions?
* Ethical concerns: How should intelligent agents be designed to ensure fairness, transparency, and accountability? What are the implications of automation on employment and society?

In [None]:
class SimpleAgent:
    def __init__(self):
        self.current_location = (0, 0)

    def move(self, direction):
        if direction == "up":
            self.current_location = (self.current_location[0], self.current_location[1] + 1)
        elif direction == "down":
            self.current_location = (self.current_location[0], self.current_location[1] - 1)
        elif direction == "left":
            self.current_location = (self.current_location[0] - 1, self.current_location[1])
        elif direction == "right":
            self.current_location = (self.current_location[0] + 1, self.current_location[1])

    def perceive_environment(self, environment):
        # Assume the environment is represented as a grid
        print("Perceiving environment...")
        # Here you can implement sensing logic based on the agent's current location and the environment

    def act(self):
        # Simple reactive behavior: move up if possible, else move right
        if self.current_location[1] < 10:
            self.move("up")
        else:
            self.move("right")


# Example usage:
agent = SimpleAgent()
for _ in range(20):
    agent.act()
    print("Current location:", agent.current_location)

Current location: (0, 1)
Current location: (0, 2)
Current location: (0, 3)
Current location: (0, 4)
Current location: (0, 5)
Current location: (0, 6)
Current location: (0, 7)
Current location: (0, 8)
Current location: (0, 9)
Current location: (0, 10)
Current location: (1, 10)
Current location: (2, 10)
Current location: (3, 10)
Current location: (4, 10)
Current location: (5, 10)
Current location: (6, 10)
Current location: (7, 10)
Current location: (8, 10)
Current location: (9, 10)
Current location: (10, 10)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
from tensorflow.keras.utils import to_categorical

# Load and preprocess MNIST dataset
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# Define CNN architecture
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Test accuracy:", test_acc)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.9883999824523926


#Q-Learning Agent in MDP

In [None]:
import numpy as np

class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.q_table = np.zeros((num_states, num_actions))

    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.exploration_rate:
            return np.random.choice(self.num_actions)
        else:
            return np.argmax(self.q_table[state, :])

    def update_q_table(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state, :])
        td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]
        td_error = td_target - self.q_table[state, action]
        self.q_table[state, action] += self.learning_rate * td_error

    def train(self, env, num_episodes):
        for episode in range(1, num_episodes+1):
            state = env.reset()
            done = False
            total_reward = 0
            num_steps = 0
            while not done:
                action = self.choose_action(state)
                next_state, reward, done, _ = env.step(action)
                self.update_q_table(state, action, reward, next_state)
                state = next_state
                total_reward += reward
                num_steps += 1
            print(f"Episode: {episode}, Total Reward: {total_reward}, Num Steps: {num_steps}")


In [None]:
import gym

env = gym.make('Taxi-v3')
agent = QLearningAgent(num_states=env.observation_space.n, num_actions=env.action_space.n)
agent.train(env, num_episodes=1000)

Episode: 1, Total Reward: -614, Num Steps: 200
Episode: 2, Total Reward: -452, Num Steps: 200
Episode: 3, Total Reward: -569, Num Steps: 200
Episode: 4, Total Reward: -533, Num Steps: 200
Episode: 5, Total Reward: -488, Num Steps: 200
Episode: 6, Total Reward: -587, Num Steps: 200
Episode: 7, Total Reward: -560, Num Steps: 200
Episode: 8, Total Reward: -533, Num Steps: 200
Episode: 9, Total Reward: -506, Num Steps: 200
Episode: 10, Total Reward: -407, Num Steps: 200
Episode: 11, Total Reward: -515, Num Steps: 200
Episode: 12, Total Reward: -371, Num Steps: 200
Episode: 13, Total Reward: -614, Num Steps: 200
Episode: 14, Total Reward: -324, Num Steps: 156
Episode: 15, Total Reward: -524, Num Steps: 200
Episode: 16, Total Reward: -542, Num Steps: 200
Episode: 17, Total Reward: -497, Num Steps: 200
Episode: 18, Total Reward: -416, Num Steps: 200
Episode: 19, Total Reward: -227, Num Steps: 200
Episode: 20, Total Reward: -254, Num Steps: 200
Episode: 21, Total Reward: -309, Num Steps: 168
E

#Deep Q-Learning Agent (It uses NN to approximate the Q-Functions)

In [None]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

class DQNAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, discount_factor=0.99, exploration_rate=1.0,
                 exploration_decay=0.995, exploration_min=0.01, batch_size=64, memory_size=10000):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.exploration_min = exploration_min
        self.batch_size = batch_size
        self.memory = deque(maxlen=memory_size)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate), metrics=["mse"])
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def choose_action(self, state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in batch:
            target = reward
            if not done:
                target = reward + self.discount_factor * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay

    def train(self, env, num_episodes):
        for episode in range(num_episodes):
            state = env.reset()
            state = np.reshape(state, [1, self.state_size])
            done = False
            total_reward = 0
            while not done:
                action = self.choose_action(state)
                next_state, reward, done, _ = env.step(action)
                next_state = np.reshape(next_state, [1, self.state_size])
                total_reward += reward
                self.remember(state, action, reward, next_state, done)
                state = next_state
                self.replay()
            print(f"Episode: {episode + 1}, Total Reward: {total_reward}")

  and should_run_async(code)


In [None]:
import gym

env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
agent.train(env, num_episodes=3)



Episode: 1, Total Reward: 27.0
Episode: 2, Total Reward: 10.0
Episode: 3, Total Reward: 9.0
