In [None]:
import requests
import json
import pandas as pd
from connect_db import connect_to_grad_db

In [None]:
resp = requests.get(
    "https://www.trustpilot.com/_next/data/businessunitprofile-consumersite-7193/review/www.teslamotors.com.json",
    )

In [None]:
with open("tp.json", "w") as file:
    json.dump(resp.json(), file)

In [None]:
def get_review_data(
    company_website:str, 
    page:int=1,
    sort:str="recency",
    date="",
    stars="",
):
    BASE_URL = "https://www.trustpilot.com/_next/data/businessunitprofile-consumersite-7193/review/"

    target_url = BASE_URL + company_website + ".json?"

    target_url = f"{target_url}sort={sort}"
    
    if page > 1:
        target_url+=f"&page={page}"
        
    if date:
        target_url+=f"&date={date}"
    
    if stars:
        target_url+=f"&stars={stars}"

    print(target_url)
    resp = requests.get(target_url)

    if not resp.ok:
        print(f"WARNING: something went wrong making requests, status code: {resp.status_code}")
    else:
        return resp.json()

def extract_reviews(raw_reviews):
    extracted = []
    try:
        reviews=raw_reviews["pageProps"]["reviews"]
    
    
        for review in reviews:
            text = review["text"] if review["text"] else ""
            rating = review["rating"] if review["rating"] else None
            title = review["title"] if review["title"] else ""
            rating = review["rating"] if review["rating"] else None
            creation_date = review["dates"]["publishedDate"] if review["dates"]["publishedDate"] else None

            clean_review = {
                "text":text,
                "rating":rating,
                "title":title,
                "creation_date":creation_date
            }

            extracted.append(clean_review)
    except Exception as e:
        print(e)
    
    return extracted
        
def get_reviews(company_website, num_pages=1, start_page=1):
    reviews = []
    for page in range(start_page, start_page+num_pages):
        raw_review = get_review_data(company_website, page=page)
        clean_reviews = extract_reviews(raw_review)
        reviews += clean_reviews
    
    return reviews



In [None]:
extract_reviews(get_review_data("www.norwegian.com", page=3))

In [None]:
reviews = get_reviews("www.norwegian.com", 43)

In [None]:
df = pd.DataFrame(reviews)

In [None]:
df["creation_date"] = pd.to_datetime(df["creation_date"])

In [None]:
df["reviews_for_site"] = "www.norwegian.com"

In [None]:
engine = connect_to_grad_db()
df.to_sql(
    "trustpilot_api",
    engine,
    if_exists="append",
    index=False
)