# Goal
strip all `<script>` tags in well formed and malformed html

## Well formed example

In [7]:

a = """
<!DOCTYPE html>
<html>
<body>

<h1>My First Heading</h1>

<p>My first paragraph.</p>
<script thing='thingthing'>

</script>
</body>
</html>

"""

In [8]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(a, 'html.parser')

In [9]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <body>
  <h1>
   My First Heading
  </h1>
  <p>
   My first paragraph.
  </p>
  <script thing="thingthing">
  </script>
 </body>
</html>



## Remove script tags

In [11]:
for stag in soup.findAll('script'):
    stag.extract()

In [12]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <body>
  <h1>
   My First Heading
  </h1>
  <p>
   My first paragraph.
  </p>
 </body>
</html>



## Malformed example

In [19]:
b = """
<!DOCTYPE html>
<html>


<h1>My First Heading</h1>
Opening p tag but no closer.
<p>My first paragraph
<script thing='thingthing'>

</script>
Closing body tag but no opener.
</body>
</html>

"""

In [20]:
soup = BeautifulSoup(b, 'html.parser')

In [21]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <h1>
  My First Heading
 </h1>
 Opening p tag but no closer.
 <p>
  My first paragraph
  <script thing="thingthing">
  </script>
  Closing body tag but no opener.
 </p>
</html>



Notice that dangling tags are fixed.

## Remove scripts and print again

In [22]:
for stag in soup.findAll('script'):
    stag.extract()
print(soup.prettify())

<!DOCTYPE html>
<html>
 <h1>
  My First Heading
 </h1>
 Opening p tag but no closer.
 <p>
  My first paragraph
  Closing body tag but no opener.
 </p>
</html>



Success