# MountainCar


### Observation Space
The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
| Num | Observation                          | Min  | Max | Unit         |
|-----|--------------------------------------|------|-----|--------------|
| 0   | position of the car along the x-axis | -Inf | Inf | position (m) |
| 1   | velocity of the car                  | -Inf | Inf | position (m) |
### Action Space
There are 3 discrete deterministic actions:
| Num | Observation             | Value | Unit         |
|-----|-------------------------|-------|--------------|
| 0   | Accelerate to the left  | Inf   | position (m) |
| 1   | Don't accelerate        | Inf   | position (m) |
| 2   | Accelerate to the right | Inf   | position (m) |
### Transition Dynamics:
Given an action, the mountain car follows the following transition dynamics:
*velocity<sub>t+1</sub> = velocity<sub>t</sub> + (action - 1) * force - cos(3 * position<sub>t</sub>) * gravity*
*position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
where force = 0.001 and gravity = 0.0025. The collisions at either end are inelastic with the velocity set to 0
upon collision with the wall. The position is clipped to the range `[-1.2, 0.6]` and
velocity is clipped to the range `[-0.07, 0.07]`.
### Reward:
The goal is to reach the flag placed on top of the right hill as quickly as possible, as such the agent is
penalised with a reward of -1 for each timestep.
### Starting State
The position of the car is assigned a uniform random value in *[-0.6 , -0.4]*.
The starting velocity of the car is always assigned to 0.
### Episode End
The episode ends if either of the following happens:
1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill)
2. Truncation: The length of the episode is 200.

# Human keyboard

In [None]:
import gym
import keyboard
env = gym.make("MountainCar-v0", new_step_api=True, render_mode='human')
env._max_episode_steps = 1000

# Play
state = env.reset()  # [position, velocity]
terminated = False # True if the player dies or wins
truncated = False # True if the time runs out
score = 0

while not terminated and not truncated:
    #Close window with ESC key
    if keyboard.is_pressed('esc'):
        env.close()
        assert False

    # Read keyboard input
    int_action = 1  # No push
    if keyboard.is_pressed('right'):
        int_action = 2  # right push
    elif keyboard.is_pressed('left'):
        int_action = 0  # left push

    # Send input to game
    state, reward, terminated, truncated, info = env.step(int_action)

    score += reward
print(score)
env.close()

# AI enviroment setup
run once before running the "AI test" and "AI train block"

In [None]:
import gym
import neat
import os
import math
import pickle

def convert_to_int_action(action):
    int_action = 1  # No push

    if action[0] > 0.5:
        int_action = 2  # Right push
    elif action[1] > 0.5:
        int_action = 0  # Left push
    return int_action

# Getting file with values
config_path = os.path.abspath("config-feedforward.txt")
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path)


# Train AI
Remember to run "AI enviroment setup" before 

In [None]:
def ai_train(genomes, config):
    nets = []  # Neural networks/brain
    ge = []  # Genes

    # Setting up AI
    for _, g in genomes:
        net = neat.nn.FeedForwardNetwork.create(g, config)  # Getting values from txt
        nets.append(net)  # Appending every Neural Network to array
        g.fitness = 0
        ge.append(g)  # Appending every gene to array.

    # training every agent
    for g, net in zip(ge, nets):
        # Start game
        state = env.reset()
        terminated = False
        truncated = False

        # Variables for calculating fitness
        high_vel = -math.inf
        low_vel = math.inf
        high_pos = -math.inf
        low_pos = math.inf
        elapsed_steps = 0
        closest_distance = math.inf

        time_reward = 0

        while not terminated and not truncated:
            action = net.activate((state))  # send state to AI

            int_action = convert_to_int_action(action)
            state, reward, terminated, truncated, info = env.step(int_action)
            
            pos, vel = state
            elapsed_steps += 1

            #if pos > high_pos:
              #  g.fitness += 1
             #   high_pos = pos
            #if pos < low_pos:
            #    g.fitness += 1
           #     low_pos = pos

            #high_vel = max(high_vel, vel)
            #low_vel = min(low_vel, vel)
            #high_pos = max(high_pos, pos)
            #low_pos = min(low_pos, pos)
            #closest_distance = min(closest_distance, abs(pos - 0.6))

            #big_vel_swing = (high_vel - low_vel)
            #big_pos_swing = (high_pos - low_pos)
            #g.fitness = big_pos_swing / elapsed_steps
            #g.fitness = (big_vel_swing - 1/closest_distance) / elapsed_steps
            #g.fitness += vel
            #g.fitness += big_pos_swing / elapsed_steps
        #g.fitness /= elapsed_steps

            #inverted_distance = 1 / distance

# Environment setup
env = gym.make("MountainCar-v0", new_step_api=True)
env._max_episode_steps = 1000

p = neat.Population(config)
p.add_reporter(neat.StdOutReporter(True))  # printing stats

num_training_generations = 10
winner = p.run(ai_train, num_training_generations)
print(winner)

# Save best gene in Pickle file
file = open('winner_neat.p', 'wb')
pickle.dump(winner, file)
file.close()

# Test AI
Remember to run "AI enviroment setup" before 

In [None]:
import keyboard

def ai_test():
    # open pickle file
    file = open('winner_neat.p', 'rb')
    winner = pickle.load(file)
    file.close()
    print(winner)

    # Create Neural Network
    winner_net = neat.nn.FeedForwardNetwork.create(winner, config)

    state = env.reset()
    terminated = False
    truncated = False
    score = 0

    while not terminated and not truncated:
        #Close window with ESC key
        if keyboard.is_pressed('esc'):
            env.close()
            assert False

        action = winner_net.activate((state))

        int_action = convert_to_int_action(action)
        state, reward, terminated, truncated, info = env.step(int_action)
        score += reward
    print(score)

# Environment setup
env = gym.make("MountainCar-v0", new_step_api=True, render_mode='human')
env._max_episode_steps = 1000

p = neat.Population(config)
p.add_reporter(neat.StdOutReporter(True))  # printing stats

ai_test()
env.close()