In [1]:
import pandas as pd
import os
from sqlalchemy import create_engine

# Scraping with Pandas

We can use the `read_html` function in Pandas to automatically scrape any tabular data from a page.

In [2]:
villagers_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes'
tools_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes/Tools'
house_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes/Housewares'
misc_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes/Miscellaneous'
wall_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes/Wall-mounted'
flooring_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes/Wallpaper,_rugs_and_flooring'
equipment_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes/Equipment'
other_url = 'https://animalcrossing.fandom.com/wiki/DIY_recipes/Other'

In [3]:
# Reading url tables into dataframes
villager_tables = pd.read_html(villagers_url)
tools_tables = pd.read_html(tools_url)
house_tables = pd.read_html(house_url)
misc_tables = pd.read_html(misc_url)
wall_tables = pd.read_html(wall_url)
flooring_tables = pd.read_html(flooring_url)
equipment_tables = pd.read_html(equipment_url)
others_tables = pd.read_html(other_url)

# Data Transformation

In [4]:
# storing tables from the url into dfs
cranky_df = villager_tables[1]
jock_df = villager_tables[2]
lazy_df = villager_tables[3]
smug_df = villager_tables[4]
normal_df = villager_tables[5]
peppy_df = villager_tables[6]
sisterly_df = villager_tables[7]
snooty_df = villager_tables[8]
any_df = villager_tables[9]
tools_df = tools_tables[2]
house_df = house_tables[2]
misc_df = misc_tables[2]
wall_df = wall_tables[2]
flooring_df = flooring_tables[2]
equipment_df = equipment_tables[2]
others_df = others_tables[2]

# displaying a df
others_df.columns

Index(['Recipe Name', 'Image', 'Materials Needed [1]', 'Size',
       'Obtained From [1]', 'Sell Price [1]'],
      dtype='object')

In [5]:
# deleting the image columns
del cranky_df['Recipes from cranky villagers',                'Image']
del jock_df['Recipes from jock villagers',                'Image']
del lazy_df['Recipes from lazy villagers',                'Image']
del smug_df['Recipes from smug villagers',                'Image']
del normal_df['Recipes from normal villagers',                'Image']
del peppy_df['Recipes from peppy villagers',                'Image']
del sisterly_df['Recipes from sisterly villagers',                'Image']
del snooty_df['Recipes from snooty villagers',                'Image']
del any_df['Recipes from any villagers',                'Image']
del cranky_df['Recipes from cranky villagers',                'Size [1]']
del jock_df['Recipes from jock villagers',                'Size [1]']
del lazy_df['Recipes from lazy villagers',                'Size [1]']
del smug_df['Recipes from smug villagers',                'Size [1]']
del normal_df['Recipes from normal villagers',                'Size [1]']
del peppy_df['Recipes from peppy villagers',                'Size [1]']
del sisterly_df['Recipes from sisterly villagers',                'Size [1]']
del snooty_df['Recipes from snooty villagers',                'Size [1]']
del any_df['Recipes from any villagers',                'Size [1]']
del tools_df["Image"]
del house_df["Image"]
del misc_df["Image"]
del wall_df["Image"]
del flooring_df["Image"]
del equipment_df["Image"]
del others_df["Image"]
del tools_df["Size [1]"]
del house_df["Size [1]"]
del misc_df["Size [1]"]
del wall_df["Size [1]"]
del flooring_df["Size [1]"]
del equipment_df["Size [1]"]
del others_df["Size"]

In [6]:
tools_df = tools_df.rename(columns={'Recipe name': 'Recipe Name'})

In [7]:
tools_df.columns

Index(['Recipe Name', 'Materials needed [1]', 'Obtained from [1]',
       'Item sell price [1]'],
      dtype='object')

In [8]:
# Capitalizing the name columns
cranky_df['Recipes from cranky villagers',          'Recipe Name'] = cranky_df['Recipes from cranky villagers',          'Recipe Name'].str.title()
jock_df['Recipes from jock villagers',          'Recipe Name'] = jock_df['Recipes from jock villagers',          'Recipe Name'].str.title()
lazy_df['Recipes from lazy villagers',          'Recipe Name'] = lazy_df['Recipes from lazy villagers',          'Recipe Name'].str.title()
smug_df['Recipes from smug villagers',          'Recipe Name'] = smug_df['Recipes from smug villagers',          'Recipe Name'].str.title()
normal_df['Recipes from normal villagers',          'Recipe Name'] = normal_df['Recipes from normal villagers',          'Recipe Name'].str.title()
peppy_df['Recipes from peppy villagers',          'Recipe Name'] = peppy_df['Recipes from peppy villagers',          'Recipe Name'].str.title()
sisterly_df['Recipes from sisterly villagers',          'Recipe Name'] = sisterly_df['Recipes from sisterly villagers',          'Recipe Name'].str.title()
snooty_df['Recipes from snooty villagers',          'Recipe Name'] = snooty_df['Recipes from snooty villagers',          'Recipe Name'].str.title()
any_df['Recipes from any villagers',          'Recipe Name'] = any_df['Recipes from any villagers',          'Recipe Name'].str.title()
tools_df["Recipe Name"] = tools_df["Recipe Name"].str.title()
house_df["Recipe Name"] = house_df["Recipe Name"].str.title()
misc_df["Recipe Name"] = misc_df["Recipe Name"].str.title()
wall_df["Recipe Name"] = wall_df["Recipe Name"].str.title()
flooring_df["Recipe Name"] = flooring_df["Recipe Name"].str.title()
equipment_df["Recipe Name"] = equipment_df["Recipe Name"].str.title()
others_df["Recipe Name"] = others_df["Recipe Name"].str.title()

# Displaying df
tools_df

Unnamed: 0,Recipe Name,Materials needed [1],Obtained from [1],Item sell price [1]
0,Flimsy Axe,x5 tree branchx1 stone,Tom Nook (first DIY workshop) Nook's Cranny (2...,200
1,Stone Axe,x1 flimsy axex3 wood,"Pretty Good Tools Recipes from Nook Stop (3,000 )",560
2,Axe,x1 flimsy axex3 woodx1 iron nugget,"Pretty Good Tools Recipes from Nook Stop (3,000 )",625
3,Golden Axe,x1 axex1 gold nugget,Breaking axes 100 times,10655
4,Flimsy Shovel,x5 hardwood,Blathers Nook's Cranny (280 ),200
5,Shovel,x1 flimsy shovelx1 iron nugget,"Pretty Good Tools Recipes from Nook Stop (3,000 )",600
6,Golden Shovel,x1 shovelx1 gold nugget,Assisting Gulliver 30 times,10675
7,Flimsy Fishing Rod,x5 tree branch,Tom Nook (first DIY workshop),100
8,Fishing Rod,x1 flimsy fishing rodx1 iron nugget,"Pretty Good Tools Recipes from Nook Stop (3,000 )",600
9,Golden Rod,x1 fishing rodx1 gold nugget,Completing the fish section of the Critterpedia,10400


## Create .csv Files from DataFrames

In [10]:
cranky_df.to_csv("../Resources/cranky.csv",index=False)
jock_df.to_csv("../Resources/jock.csv",index=False)
lazy_df.to_csv("../Resources/lazy.csv",index=False)
smug_df.to_csv("../Resources/smug.csv",index=False)
normal_df.to_csv("../Resources/normal.csv",index=False)
peppy_df.to_csv("../Resources/peppy.csv",index=False)
sisterly_df.to_csv("../Resources/sisterly.csv",index=False)
snooty_df.to_csv("../Resources/snooty.csv",index=False)
any_df.to_csv("../Resources/any.csv",index=False)
tools_df.to_csv("../Resources/tools.csv",index=False)
house_df.to_csv("../Resources/house.csv",index=False)
misc_df.to_csv("../Resources/misc.csv",index=False)
wall_df.to_csv("../Resources/wall.csv",index=False)
flooring_df.to_csv("../Resources/flooring.csv",index=False)
equipment_df.to_csv("../Resources/equipment.csv",index=False)
others_df.to_csv("../Resources/others.csv",index=False)