## imports

In [1]:
import polars as pl
from polars import col as c
import os, sys, requests, re
from dotenv import load_dotenv, find_dotenv
from tqdm import tqdm
from datetime import datetime

load_dotenv(find_dotenv())

True

## constants

In [2]:
OMDB_API_KEY = os.getenv("OMDB_API_KEY")
FILES_DIR = "../data"

## load df

In [3]:
file_name = "movies_df.parquet"
file_path = os.path.join(FILES_DIR, file_name)
movies_df = pl.read_parquet(file_path)

In [4]:
# file_name = "movies_df.xlsx"
# file_path = os.path.join(FILES_DIR, file_name)
# movies_df = pl.read_excel(file_path)

## custom funcs

In [6]:
def fetch_english_title(title):
    try:
        url = f"http://www.omdbapi.com/?apikey={OMDB_API_KEY}&i={requests.utils.quote(title)}"  # for index search
        # url = f"http://www.omdbapi.com/?apikey={OMDB_API_KEY}&t={requests.utils.quote(title)}"
        response = requests.get(url)
        data = response.json()
        if data.get("Response") == "True" and "Title" in data:
            return data
        else:
            print(f"Not found in OMDb: {title}")
            return "Not found"
    except Exception as e:
        print(f"OMDb error for {title}: {e}")
        return "Not found"

In [None]:
def add_row_by_id(
    df: pl.DataFrame,
    omdb_id: str,
    viewed_date: str = datetime.today().date(),
    default_values=None,
    liked: bool = False,
) -> pl.DataFrame:
    """
    Add a new row to the dataframe by specifying the 'id' value.
    Other columns will be filled with None or values from default_values dict.

    Args:
        df (pl.DataFrame): The dataframe to add the row to.
        id (str): The value for the 'id' column.
        default_values (dict, optional): Dictionary of column: value pairs to fill in.

    Returns:
        pl.DataFrame: DataFrame with the new row appended.
    """
    if default_values is None:
        default_values = {}
    # Prepare new row as dict
    new_row = {col: default_values.get(col, None) for col in df.columns}
    new_row["omdb_id"] = omdb_id
    new_row["viewed"] = viewed_date
    new_row["index"] = df["index"].max() + 1
    new_row["liked"] = liked

    creds = fetch_english_title(omdb_id)
    if creds == "Not found":
        print("ERROR: cannot upload creds!")
        raise ValueError("CredsNotFoundError")
    else:
        for col_name in [
            "title",
            "year",
            "genre",
            "director",
            "country",
            "actors",
            "box_office",
            "writer",
            "language",
            "imdb_rating",
        ]:
            new_row[col_name] = creds[
                "".join(
                    [
                        word.capitalize() if word != "imdb" else word
                        for word in col_name.split("_")
                    ]
                )
            ]
            if col_name == "box_office":
                new_row[col_name] = int(re.sub(r"[^\d]", "", new_row[col_name]))

    # Create single-row DataFrame
    new_row_df = pl.DataFrame([new_row])
    # Convert dtypes to match df
    new_row_df = new_row_df.cast(df.schema)
    # Concatenate
    return pl.concat([df, new_row_df], how="vertical")


# test_func
_ = add_row_by_id(
    df=movies_df,
    omdb_id="tt0038355",
    default_values=None,
)

## code

In [None]:
# check if everything is correct
add_row_by_id(
    df=movies_df,
    omdb_id="tt0038355",
    liked=False,
).tail()

index,title,year,viewed,liked,omdb_id,genre,director,country,actors,box_office,writer,language,imdb_rating
u32,str,i64,date,bool,str,str,str,str,str,i32,str,str,f32
615,"""The Princess Bride""",1987,2025-06-12,False,"""tt0093779""","""Adventure, Comedy, Family""","""Rob Reiner""","""United States""","""Cary Elwes, Mandy Patinkin, Ro…",30857814,"""William Goldman""","""English""",8.0
616,"""Grave of the Fireflies""",1988,2025-07-10,False,"""tt0095327""","""Animation, Drama, War""","""Isao Takahata""","""Japan""","""Tsutomu Tatsumi, Ayano Shirais…",516962,"""Akiyuki Nosaka, Isao Takahata""","""Japanese""",8.5
617,"""The Usual Suspects""",1995,2025-07-14,False,"""tt0114814""","""Crime, Drama, Mystery""","""Bryan Singer""","""United States, Germany""","""Kevin Spacey, Gabriel Byrne, C…",23341568,"""Christopher McQuarrie""","""English, Hungarian, Spanish, F…",8.5
618,"""Lone Star""",1996,2025-07-26,False,"""tt0116905""","""Drama, Mystery, Western""","""John Sayles""","""United States""","""Chris Cooper, Elizabeth Peña, …",12408986,"""John Sayles""","""English, Spanish""",7.4
619,"""The Big Sleep""",1946,2025-08-02,False,"""tt0038355""","""Crime, Film-Noir, Mystery""","""Howard Hawks""","""United States""","""Humphrey Bogart, Lauren Bacall…",25556,"""William Faulkner, Leigh Bracke…","""English""",7.9


In [11]:
print("init shape:\t", movies_df.shape[0])
movies_df = add_row_by_id(
    df=movies_df,
    omdb_id="tt0038355",
    liked=False,
)
print("result shape:\t", movies_df.shape[0])

init shape:	 618
result shape:	 619


## save df

In [12]:
file_name = "movies_df.parquet"
file_path = os.path.join(FILES_DIR, file_name)
movies_df.write_parquet(file_path)

In [13]:
file_name = "movies_df.xlsx"
file_path = os.path.join(FILES_DIR, file_name)
movies_df.write_excel(file_path)

<xlsxwriter.workbook.Workbook at 0x10618da60>