## Getting data
https://www.nintendolife.com/

In [53]:
import requests
from bs4 import BeautifulSoup

In [59]:
def get_game_info(game_url: str, image_url: str):
    print(game_url)
    response = requests.get(game_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    overview_section = soup.find('section', {'id': 'game-overview'})
    title = overview_section.find('strong', string='Title').parent.parent.find('p', {'class': 'definition'}).text.replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    system = overview_section.find('strong', string='System').parent.parent.find('p', {'class': 'definition'}).text.replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    try:
        publisher = overview_section.find('strong', string='Publisher').parent.parent.find('p', {'class': 'definition'}).text.replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    except:
        publisher = ''
    try:
        developer = overview_section.find('strong', string='Developer').parent.parent.find('p', {'class': 'definition'}).text.replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    except:
        developer = ''
    try:
        genre = overview_section.find('strong', string='Genre').parent.parent.find('p', {'class': 'definition'}).text.replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    except:
        genre = ''
    try:
        num_players = overview_section.find('strong', string='Players').parent.parent.find('p', {'class': 'definition'}).text.replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    except:
        num_players = ''
    try:
        release_date = overview_section.find('strong', string='Release Date').parent.parent.find('li', {'class': 'first'}).text.lstrip().split(',')[0].replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    except:
        release_date = ''
    review = ' '.join(p.text for p in soup.find('div', {'class': 'body body-text article-text'}).find_all('p')).replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    try:
        review_conclusion = soup.find('section', {'id': 'conclusion'}).find('p').text.replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
    except:
        review_conclusion = ''
    try:
        rating = soup.find('p', {'class': 'user-ratings'}).text.replace('User Ratings: ', '').replace(',', ' ').replace('"', '').replace("'", '').replace('\n', ' ')
        num_votes = soup.find('span', {'class': 'score accent'}).text.replace(',', ' ').replace('"', '').replace("'", '').strip('\n').replace('N/A', '0').replace('\n', ' ')
    except:
        rating = '0'
        num_votes = '0'

    with open('../dataset/switch-games.csv', 'a') as file:
        file.write(title + ',' + game_url + ',' + image_url + ',' + system + ',' + publisher + ',' + developer + ',' + genre + ',' + num_players + ',' + release_date + ',' + review + ',' + review_conclusion + ',' + rating + ',' + num_votes + '\n')



def get_games(num_pages: int = 22):
    for page_number in range(1, 23):
        response = requests.get('https://www.nintendolife.com/reviews?system=nintendo-switch&page={0}'.format(page_number))
        soup = BeautifulSoup(response.text, 'html.parser')
        games_links = [
            (
                'https://www.nintendolife.com/' + page.find('a', {'class': 'title accent-hover'})['href'],
                page.find('img')['src']
            )
            for page in soup.find_all('li', {'class': 'item item-content item-review item-medium'})
        ]
        for page in games_links:
            get_game_info(page[0], page[1])

get_games()

https://www.nintendolife.com/reviews/nintendo-switch/the-diofield-chronicle
https://www.nintendolife.com/reviews/nintendo-switch/the-legend-of-heroes-trails-from-zero
https://www.nintendolife.com/reviews/nintendo-switch/nba-2k23
https://www.nintendolife.com/reviews/nintendo-switch/temtem
https://www.nintendolife.com/reviews/nintendo-switch/made-in-abyss-binary-star-falling-into-darkness
https://www.nintendolife.com/reviews/nintendo-switch/splatoon-3
https://www.nintendolife.com/reviews/nintendo-switch/jojos-bizarre-adventure-all-star-battle-r
https://www.nintendolife.com/reviews/nintendo-switch/lego-brawls
https://www.nintendolife.com/reviews/nintendo-switch/pac-man-world-re-pac
https://www.nintendolife.com/reviews/nintendo-switch/teenage-mutant-ninja-turtles-the-cowabunga-collection
https://www.nintendolife.com/reviews/nintendo-switch/prinny-presents-nis-classics-volume-3
https://www.nintendolife.com/reviews/nintendo-switch/dusk-diver-2
https://www.nintendolife.com/reviews/nintendo-sw

In [23]:
import hashlib
import pandas as pd

In [28]:
# Create id
df = pd.read_csv('../dataset/switch-games.csv', sep=',')
df['id'] = df.apply(lambda x: hashlib.md5(x[0].encode()).hexdigest()[:8], axis=1)
df.to_csv('../dataset/switch-games-id.csv', index=False)

## MariaDB

In [31]:
df = pd.read_csv('../dataset/switch-games-id.csv', sep=',')
mariadb = df[['id', 'title', 'release_date']]
mariadb

Unnamed: 0,id,title
0,0f0191a3,The DioField Chronicle
1,f2af5f5f,The Legend of Heroes: Trails from Zero
2,a8996117,NBA 2K23
3,e2f10932,Temtem
4,51ff4f64,Made in Abyss: Binary Star Falling into Darkness
...,...,...
632,0f933f58,Skylanders Imaginators
633,44b24a40,Just Dance 2017
634,1e8edaf2,Super Bomberman R
635,92d53aa6,1-2-Switch
