In [1]:
import pandas as pd
import os
from sqlalchemy import create_engine


# Scraping with Pandas

We can use the `read_html` function in Pandas to automatically scrape any tabular data from a page.

In [2]:
url = 'https://animalcrossing.fandom.com/wiki/Deep-sea_creatures_(New_Horizons)'

In [3]:
# Reading url table into a df
tables = pd.read_html(url)
tables[4]

Unnamed: 0,Name,Image,Price,Shadow size [1],Swimming pattern,Time,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,Seaweed,,600,Large,Stationary,All day,✓,✓,✓,✓,✓,✓,✓,-,-,✓,✓,✓
1,Sea grapes,,900,Small,Stationary,All day,-,-,-,-,-,✓,✓,✓,✓,-,-,-
2,Sea cucumber,,500,Medium,Slow consistent movement,All day,✓,✓,✓,✓,-,-,-,-,-,-,✓,✓
3,Sea pig,,10000,Small,Quick long lunges,4pm - 9am,✓,✓,-,-,-,-,-,-,-,-,✓,✓
4,Sea star,,500,Small,Slow short lunges,All day,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓
5,Sea urchin,,1700,Small,Slow consistent movement,All day,-,-,-,-,✓,✓,✓,✓,✓,-,-,-
6,Slate pencil urchin,,2000,Medium,Moderate consistent movement,4pm - 9am,-,-,-,-,✓,✓,✓,✓,✓,-,-,-
7,Sea anemone,,500,Large,Stationary,All day,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓
8,Moon jellyfish,,600,Small,Slow consistent movement,All day,-,-,-,-,-,-,✓,✓,✓,-,-,-
9,Sea slug,,600,Tiny,Slow consistent movement,All day,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓


# Data Transformation

In [4]:
# storing the third table from the url in a df
df = tables[4]
# deleting the image column
del df["Image"]
# displaying the df
df

Unnamed: 0,Name,Price,Shadow size [1],Swimming pattern,Time,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,Seaweed,600,Large,Stationary,All day,✓,✓,✓,✓,✓,✓,✓,-,-,✓,✓,✓
1,Sea grapes,900,Small,Stationary,All day,-,-,-,-,-,✓,✓,✓,✓,-,-,-
2,Sea cucumber,500,Medium,Slow consistent movement,All day,✓,✓,✓,✓,-,-,-,-,-,-,✓,✓
3,Sea pig,10000,Small,Quick long lunges,4pm - 9am,✓,✓,-,-,-,-,-,-,-,-,✓,✓
4,Sea star,500,Small,Slow short lunges,All day,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓
5,Sea urchin,1700,Small,Slow consistent movement,All day,-,-,-,-,✓,✓,✓,✓,✓,-,-,-
6,Slate pencil urchin,2000,Medium,Moderate consistent movement,4pm - 9am,-,-,-,-,✓,✓,✓,✓,✓,-,-,-
7,Sea anemone,500,Large,Stationary,All day,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓
8,Moon jellyfish,600,Small,Slow consistent movement,All day,-,-,-,-,-,-,✓,✓,✓,-,-,-
9,Sea slug,600,Tiny,Slow consistent movement,All day,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓,✓


In [5]:
# Replacing check marks with True and minus signs with False
df["Jan"] = df["Jan"].replace("✓", True).replace("-", False)
df["Feb"] = df["Feb"].replace("✓", True).replace("-", False)
df["Mar"] = df["Mar"].replace("✓", True).replace("-", False)
df["Apr"] = df["Apr"].replace("✓", True).replace("-", False)
df["May"] = df["May"].replace("✓", True).replace("-", False)
df["Jun"] = df["Jun"].replace("✓", True).replace("-", False)
df["Jul"] = df["Jul"].replace("✓", True).replace("-", False)
df["Aug"] = df["Aug"].replace("✓", True).replace("-", False)
df["Sep"] = df["Sep"].replace("✓", True).replace("-", False)
df["Oct"] = df["Oct"].replace("✓", True).replace("-", False)
df["Nov"] = df["Nov"].replace("✓", True).replace("-", False)
df["Dec"] = df["Dec"].replace("✓", True).replace("-", False)
    
# Capitalizing the name column
df["Name"] = df["Name"].str.title()

# Displaying df
df

Unnamed: 0,Name,Price,Shadow size [1],Swimming pattern,Time,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,Seaweed,600,Large,Stationary,All day,True,True,True,True,True,True,True,False,False,True,True,True
1,Sea Grapes,900,Small,Stationary,All day,False,False,False,False,False,True,True,True,True,False,False,False
2,Sea Cucumber,500,Medium,Slow consistent movement,All day,True,True,True,True,False,False,False,False,False,False,True,True
3,Sea Pig,10000,Small,Quick long lunges,4pm - 9am,True,True,False,False,False,False,False,False,False,False,True,True
4,Sea Star,500,Small,Slow short lunges,All day,True,True,True,True,True,True,True,True,True,True,True,True
5,Sea Urchin,1700,Small,Slow consistent movement,All day,False,False,False,False,True,True,True,True,True,False,False,False
6,Slate Pencil Urchin,2000,Medium,Moderate consistent movement,4pm - 9am,False,False,False,False,True,True,True,True,True,False,False,False
7,Sea Anemone,500,Large,Stationary,All day,True,True,True,True,True,True,True,True,True,True,True,True
8,Moon Jellyfish,600,Small,Slow consistent movement,All day,False,False,False,False,False,False,True,True,True,False,False,False
9,Sea Slug,600,Tiny,Slow consistent movement,All day,True,True,True,True,True,True,True,True,True,True,True,True


## Create .csv File from DataFrame

In [6]:
df.to_csv("../Resources/seacreatures.csv",index=False)