# Web Scraping - OpenAI Gym Environment Table
This notebook downloads the table with information about the OpenAI Gym environment in [this](https://github.com/openai/gym/wiki/Table-of-environments) website.

> We use `beautifulsoup4` for the web scraping. Therefore, install it if required. 

In [None]:
!pip install requests beautifulsoup4 

## Imports

In [1]:
import os
import urllib
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

## Global variables

In [2]:
url = r"https://github.com/openai/gym/wiki/Table-of-environments"
current_dir = os.getcwd()
dump_dir = os.path.join(os.path.dirname(current_dir), "assets")
save_file = os.path.join(dump_dir, "env_info.csv")

## Load the contents from the website

In [3]:
client = urllib.request.urlopen(url)
soup = BeautifulSoup(client, 'html.parser')

Extract the table from the web contents.

In [4]:
table = soup.find(name="table", attrs={"role": "table"})

## Preprocessing table contents
1. Load all table row contents.
2. Split the table into column names and data.
3. ~~Determine data type for each columns as one of the followings: string, integer or float.~~

In [5]:
def isNum(string):
    signs = ("-", ".")
    for char in string:
        if not (char in signs or char.isnumeric()):
            return False
    return True

In [6]:
raw_data = [header.text.strip("\n").split("\n") for header in table.find_all("tr")]
columns = raw_data.pop(0)
raw_data = np.array(raw_data)
# dtypes = [type(eval(val)) if isNum(val) else str for val in raw_data[0]]

## Create pandas.DataFrame from the table data

In [7]:
data_dict = {col: raw_data[:, i] for i, col in enumerate(columns)}
df = pd.DataFrame(data_dict)
df

Unnamed: 0,Environment Id,Observation Space,Action Space,Reward Range,tStepL,Trials,rThresh
0,MountainCar-v0,"Box(2,)",Discrete(3),"(-inf, inf)",200,100,-110.0
1,MountainCarContinuous-v0,"Box(2,)","Box(1,)","(-inf, inf)",999,100,90.0
2,Pendulum-v0,"Box(3,)","Box(1,)","(-inf, inf)",200,100,
3,CartPole-v0,"Box(4,)",Discrete(2),"(-inf, inf)",200,100,195.0
4,CartPole-v1,"Box(4,)",Discrete(2),"(-inf, inf)",500,100,475.0
...,...,...,...,...,...,...,...
771,FrozenLake8x8-v0,Discrete(64),Discrete(4),"(0, 1)",200,100,0.99
772,Taxi-v2,Discrete(500),Discrete(6),"(-inf, inf)",200,100,8
773,KellyCoinflipGeneralized-v0,"Tuple(Box(1,), Discrete(283), Discrete(283), D...",Discrete(28800),"(0, 288.0)",,100,
774,KellyCoinflip-v0,"Tuple(Box(1,), Discrete(301))",Discrete(25000),"(0, 250.0)",,100,246.61


## Save pandas.DataFrame as a CSV file

In [None]:
df.to_csv(save_file, index=False)