## lets fetch and format the data

In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [43]:
def get_and_transform(url):
    """
    This function sends a get request to rotten tomatoes server
    to get the page containing data about its actors.

    Param url:
        Url to the page of the actor

    Result df:
        pandas dataframe containing data about movies related to the actor
    """
    result = requests.get(url).content
    soup = BeautifulSoup(result)
    tables = soup.find_all('table')
    
    # getting movies
    movies_soup = tables[0].find('tbody')
    movies_rows = movies_soup.find_all('tr')
    data1 = [[row['data-audiencescore'], row['data-boxoffice'], row['data-title'], row['data-tomatometer'], row['data-year'], 'movie'] for row in movies_rows]

    # geting tv shows
    tvs_soup = tables[1].find('tbody')
    tvs_rows = tvs_soup.find_all('tr')
    data2 = [[row['data-audiencescore'], '', row['data-title'], row['data-tomatometer'], row['data-appearance-year'], 'tv'] for row in tvs_rows]
    
    # combining the two datasets and creating a pandas dataframe
    data = data1+data2
    columns = ['audiencescore', 'boxoffice', 'title', 'tomatometer', 'year', 'type']
    df = pd.DataFrame(data, columns=columns)
    
    # some data cleaning
    df['year'] = df['year'].apply(lambda x: str(x).replace('[','').replace(']','').split('-')[0])
    return df

In [45]:
jonny_df = get_and_transform('https://www.rottentomatoes.com/celebrity/oprah_noodlemantra')
amber_df = get_and_transform('https://www.rottentomatoes.com/celebrity/amber_heard')

In [46]:
jonny_df.head()

Unnamed: 0,audiencescore,boxoffice,title,tomatometer,year,type
0,90,,Minamata,78,2020,movie
1,94,2202.0,Crock of Gold: A Few Rounds With Shane MacGowan,96,2020,movie
2,63,,Waiting for the Barbarians,53,2019,movie
3,31,,"London Fields, The Director's Cut",0,2019,movie
4,80,565661.0,City of Lies,51,2018,movie


In [47]:
amber_df.head()

Unnamed: 0,audiencescore,boxoffice,title,tomatometer,year,type
0,0,,Aquaman and the Lost Kingdom,0,2023,movie
1,80,,Gully,26,2019,movie
2,31,,"London Fields, The Director's Cut",0,2019,movie
3,44,257686.0,Her Smell,84,2018,movie
4,73,335012133.0,Aquaman,65,2018,movie
