## imports

In [77]:
import polars as pl
from polars import col as c
import os, sys, requests, re
from dotenv import load_dotenv, find_dotenv
from tqdm import tqdm
from datetime import datetime
import subprocess

load_dotenv(find_dotenv())

True

## constants

In [78]:
OMDB_API_KEY = os.getenv("OMDB_API_KEY")
FILES_DIR = "../data"

## load df

In [79]:
file_name = "movies_df.parquet"
file_path = os.path.join(FILES_DIR, file_name)
movies_df = pl.read_parquet(file_path)

In [80]:
# file_name = "movies_df.xlsx"
# file_path = os.path.join(FILES_DIR, file_name)
# movies_df = pl.read_excel(file_path)

## custom funcs

In [81]:
def fetch_english_title(title):
    try:
        url = f"http://www.omdbapi.com/?apikey={OMDB_API_KEY}&i={requests.utils.quote(title)}"  # for index search
        # url = f"http://www.omdbapi.com/?apikey={OMDB_API_KEY}&t={requests.utils.quote(title)}"
        response = requests.get(url)
        data = response.json()
        if data.get("Response") == "True" and "Title" in data:
            return data
        else:
            print(f"Not found in OMDb: {title}")
            return "Not found"
    except Exception as e:
        print(f"OMDb error for {title}: {e}")
        return "Not found"

In [82]:
def add_row_by_id(
    df: pl.DataFrame,
    omdb_id: str,
    viewed_date: str = datetime.today().date(),
    default_values=None,
    liked: bool = False,
) -> pl.DataFrame:
    """
    Add a new row to the dataframe by specifying the 'id' value.
    Other columns will be filled with None or values from default_values dict.

    Args:
        df (pl.DataFrame): The dataframe to add the row to.
        id (str): The value for the 'id' column.
        default_values (dict, optional): Dictionary of column: value pairs to fill in.

    Returns:
        pl.DataFrame: DataFrame with the new row appended.
    """
    if default_values is None:
        default_values = {}
    # Prepare new row as dict
    new_row = {col: default_values.get(col, None) for col in df.columns}
    new_row["omdb_id"] = omdb_id
    new_row["viewed"] = viewed_date
    new_row["index"] = df["index"].max() + 1
    new_row["liked"] = liked

    creds = fetch_english_title(omdb_id)
    if creds == "Not found":
        print("ERROR: cannot upload creds!")
        raise ValueError("CredsNotFoundError")
    else:
        for col_name in [
            "title",
            "year",
            "genre",
            "director",
            "country",
            "actors",
            "box_office",
            "writer",
            "language",
            "imdb_rating",
        ]:
            new_row[col_name] = creds[
                "".join(
                    [
                        word.capitalize() if word != "imdb" else word
                        for word in col_name.split("_")
                    ]
                )
            ]
            if col_name == "box_office":
                try:
                    new_row[col_name] = int(re.sub(r"[^\d]", "", new_row[col_name]))
                except:
                    print(
                        f"the value for box_office is {new_row[col_name]}. Fill with null."
                    )
                    new_row[col_name] = None

    # Create single-row DataFrame
    new_row_df = pl.DataFrame([new_row])
    # Convert dtypes to match df
    new_row_df = new_row_df.cast(df.schema)
    # Concatenate
    return pl.concat([df, new_row_df], how="vertical")


# test_func
_ = add_row_by_id(
    df=movies_df,
    omdb_id="tt0038355",
    default_values=None,
)

## code

In [83]:
NEW_MOVIE_OMDB_ID = "tt23853982"

In [None]:
# viewed_date = datetime.today().date()

In [None]:
# check if everything is correct
display(
    add_row_by_id(
        df=movies_df,
        omdb_id=NEW_MOVIE_OMDB_ID,
        liked=False,
        # viewed_date="2025-03-07",  # deafult: datetime.today().date()
    ).tail()
)

# add check that movie was not viewed before
assert NEW_MOVIE_OMDB_ID not in movies_df["omdb_id"].unique()

index,title,year,viewed,liked,omdb_id,genre,director,country,actors,box_office,writer,language,imdb_rating
u32,str,i64,date,bool,str,str,str,str,str,i32,str,str,f32
631,"""The Devil Wears Prada""",2006,2025-09-11,False,"""tt0458352""","""Comedy, Drama""","""David Frankel""","""United States, France""","""Anne Hathaway, Meryl Streep, A…",124740460.0,"""Aline Brosh McKenna, Lauren We…","""English, French""",7.0
632,"""L'Avventura""",1960,2025-09-13,True,"""tt0053619""","""Drama, Mystery, Romance""","""Michelangelo Antonioni""","""Italy, France""","""Gabriele Ferzetti, Monica Vitt…",,"""Michelangelo Antonioni, Elio B…","""Italian, English, Greek""",7.7
633,"""Last Tango in Paris""",1972,2025-09-18,False,"""tt0070849""","""Drama, Romance""","""Bernardo Bertolucci""","""Italy, France""","""Marlon Brando, Maria Schneider…",36144000.0,"""Bernardo Bertolucci, Franco Ar…","""English, French""",6.8
634,"""Inside Llewyn Davis""",2013,2025-09-23,False,"""tt2042568""","""Drama, Music""","""Ethan Coen, Joel Coen""","""United States, United Kingdom,…","""Oscar Isaac, Carey Mulligan, J…",13235319.0,"""Joel Coen, Ethan Coen""","""English""",7.4
635,"""Parthenope""",2024,2025-03-07,False,"""tt23853982""","""Drama, Fantasy""","""Paolo Sorrentino""","""Italy, France""","""Celeste Dalla Porta, Stefania …",289303.0,"""Paolo Sorrentino""","""Italian, Neapolitan, English""",6.6


In [None]:
print("init shape:\t", movies_df.shape[0])
movies_df = add_row_by_id(
    df=movies_df,
    omdb_id=NEW_MOVIE_OMDB_ID,
    # viewed_date="2025-03-07",  # deafult: datetime.today().date()
    liked=False,
)
print("result shape:\t", movies_df.shape[0])

init shape:	 634
result shape:	 635


## save df

In [88]:
file_name = "movies_df.parquet"
file_path = os.path.join(FILES_DIR, file_name)
movies_df.write_parquet(file_path)

In [89]:
# import subprocess

In [90]:
# commit and push changes


try:
    # Add the file to the staging area
    subprocess.run(["git", "add", "-f", file_path], check=True)
    print(f"Added {file_path} to staging.")

    commit_message = "add movie"
    # Commit the changes
    subprocess.run(["git", "commit", "-m", commit_message], check=True)
    print(f"Committed changes with message: '{commit_message}'")

    # Push the changes to the remote repository
    subprocess.run(["git", "push"], check=True)
    #  remote_name, branch_name
    print(f"Pushed changes ")
    # to {remote_name}/{branch_name}.

except subprocess.CalledProcessError as e:
    print(f"Git command failed: {e}")
except FileNotFoundError:
    print("Git command not found. Ensure Git is installed and in your PATH.")

# Example usage:
# git_add_commit_push("your_file.txt", "Added new feature", "main")

Added ../data/movies_df.parquet to staging.
[main e3e339a] add movie
 1 file changed, 0 insertions(+), 0 deletions(-)
Committed changes with message: 'add movie'
Pushed changes 


To https://github.com/KonstantinBurkin/movie-list.git
   eb1b5e4..e3e339a  main -> main


In [91]:
file_name = "movies_df.xlsx"
file_path = os.path.join(FILES_DIR, file_name)
movies_df.write_excel(file_path)

<xlsxwriter.workbook.Workbook at 0x127600470>