## imports

In [53]:
import polars as pl
from polars import col as c
import os, sys, requests
from dotenv import load_dotenv, find_dotenv
from tqdm import tqdm

load_dotenv(find_dotenv())

True

## constants

In [54]:
OMDB_API_KEY = os.getenv("OMDB_API_KEY")
FILES_DIR = "../data"

## load df

In [55]:
file_name = "movies_df.parquet"
file_path = os.path.join(FILES_DIR, file_name)
movies_df = pl.read_parquet(file_path)

In [56]:
# file_name = "movies_df.xlsx"
# file_path = os.path.join(FILES_DIR, file_name)
# movies_df = pl.read_excel(file_path)

## code

In [57]:
movies_df.shape[0]

630

In [58]:
movies_df.tail()

index,title,year,viewed,liked,omdb_id,genre,director,country,actors,box_office,writer,language,imdb_rating
u32,str,i64,date,bool,str,str,str,str,str,i32,str,str,f32
626,"""Notorious""",1946,2025-08-17,False,"""tt0038787""","""Drama, Film-Noir, Romance""","""Alfred Hitchcock""","""United States""","""Cary Grant, Ingrid Bergman, Cl…",,"""Ben Hecht, Alfred Hitchcock, J…","""English, Portuguese, French""",7.9
627,"""A Fish Called Wanda""",1988,2025-08-24,False,"""tt0095159""","""Comedy, Crime""","""Charles Crichton, John Cleese""","""United Kingdom, United States""","""John Cleese, Jamie Lee Curtis,…",62493712.0,"""John Cleese, Charles Crichton""","""English, Italian, Russian, Fre…",7.5
628,"""The Waterer Watered""",1895,2025-08-28,False,"""tt0000014""","""Short, Comedy""","""Louis Lumière""","""France""","""François Clerc, Benoît Duval""",,"""N/A""","""None""",7.1
629,"""Eat Drink Man Woman""",1994,2025-09-07,True,"""tt0111797""","""Comedy, Drama, Romance""","""Ang Lee""","""Taiwan, United States""","""Sihung Lung, Kuei-Mei Yang, Wa…",7294403.0,"""Ang Lee, James Schamus, Hui-Li…","""Mandarin, French""",7.8
630,"""Sea of Love""",1989,2025-09-07,False,"""tt0098273""","""Crime, Drama, Mystery""","""Harold Becker""","""United States""","""Al Pacino, Ellen Barkin, John …",58571513.0,"""Richard Price""","""English""",6.8


In [59]:
# to replace
to_replace = {
    584: "tt0095705",
}

In [60]:
movies_df.filter(c("index") == 584)

index,title,year,viewed,liked,omdb_id,genre,director,country,actors,box_office,writer,language,imdb_rating
u32,str,i64,date,bool,str,str,str,str,str,i32,str,str,f32
584,"""The Naked Gun""",2025,,False,"""tt3402138""","""Action, Comedy, Crime""","""Akiva Schaffer""","""United States""","""Liam Neeson, Paul Walter Hause…",,"""Dan Gregor, Doug Mand, Akiva S…","""English""",


In [61]:
def fetch_english_title(title):
    try:
        url = f"http://www.omdbapi.com/?apikey={OMDB_API_KEY}&i={requests.utils.quote(title)}"  # for index search
        # url = f"http://www.omdbapi.com/?apikey={OMDB_API_KEY}&t={requests.utils.quote(title)}"
        response = requests.get(url)
        data = response.json()
        if data.get("Response") == "True" and "Title" in data:
            return data
        else:
            print(f"Not found in OMDb: {title}")
            return "Not found"
    except Exception as e:
        print(f"OMDb error for {title}: {e}")
        return "Not found"

In [62]:
# movies_df =
for key, val in to_replace.items():
    movies_df = movies_df.with_columns(
        pl.when(c("index") == key)
        .then(pl.lit(val))
        .otherwise(c("omdb_id"))
        .alias("omdb_id")
    )
    omdb_result = fetch_english_title(val)

    movies_df = movies_df.with_columns(
        pl.when(c("index") == key)
        .then(pl.lit(int(omdb_result["Year"])))
        .otherwise(c("year"))
        .alias("year"),
        pl.when(c("index") == key)
        .then(pl.lit(omdb_result["Title"]))
        .otherwise(c("title"))
        .alias("title"),
        pl.when(c("index") == key)
        .then(pl.lit(omdb_result["Genre"]))
        .otherwise(c("genre"))
        .alias("genre"),
        pl.when(c("index") == key)
        .then(pl.lit(omdb_result["Director"]))
        .otherwise(c("director"))
        .alias("director"),
        pl.when(c("index") == key)
        .then(pl.lit(omdb_result["Writer"]))
        .otherwise(c("writer"))
        .alias("writer"),
        pl.when(c("index") == key)
        .then(pl.lit(omdb_result["Country"]))
        .otherwise(c("country"))
        .alias("country"),
        pl.when(c("index") == key)
        .then(pl.lit(omdb_result["Language"]))
        .otherwise(c("language"))
        .alias("language"),
        pl.when(c("index") == key)
        .then(pl.lit(omdb_result["Actors"]))
        .otherwise(c("actors"))
        .alias("actors"),
        pl.when(c("index") == key)
        .then(
            pl.lit(
                None
                if omdb_result["imdbRating"] == "N/A"
                else float(omdb_result["imdbRating"])
            )
        )
        .otherwise(c("imdb_rating"))
        .alias("imdb_rating"),
        pl.when(c("index") == key)
        .then(
            pl.lit(
                None
                if omdb_result["BoxOffice"] in ["N/A", "/A"]
                else int(omdb_result["BoxOffice"].replace("$", "").replace(",", ""))
            )
        )
        .otherwise(c("box_office"))
        .alias("box_office"),
    )

In [63]:
movies_df.filter(c("index") == 584)

index,title,year,viewed,liked,omdb_id,genre,director,country,actors,box_office,writer,language,imdb_rating
u32,str,i64,date,bool,str,str,str,str,str,i32,str,str,f32
584,"""The Naked Gun: From the Files …",1988,,False,"""tt0095705""","""Comedy, Crime""","""David Zucker""","""United States""","""Leslie Nielsen, Priscilla Pres…",78756177,"""Jerry Zucker, Jim Abrahams, Da…","""English""",7.6


In [64]:
movies_df.shape[0]

630

In [65]:
movies_df.tail()

index,title,year,viewed,liked,omdb_id,genre,director,country,actors,box_office,writer,language,imdb_rating
u32,str,i64,date,bool,str,str,str,str,str,i32,str,str,f32
626,"""Notorious""",1946,2025-08-17,False,"""tt0038787""","""Drama, Film-Noir, Romance""","""Alfred Hitchcock""","""United States""","""Cary Grant, Ingrid Bergman, Cl…",,"""Ben Hecht, Alfred Hitchcock, J…","""English, Portuguese, French""",7.9
627,"""A Fish Called Wanda""",1988,2025-08-24,False,"""tt0095159""","""Comedy, Crime""","""Charles Crichton, John Cleese""","""United Kingdom, United States""","""John Cleese, Jamie Lee Curtis,…",62493712.0,"""John Cleese, Charles Crichton""","""English, Italian, Russian, Fre…",7.5
628,"""The Waterer Watered""",1895,2025-08-28,False,"""tt0000014""","""Short, Comedy""","""Louis Lumière""","""France""","""François Clerc, Benoît Duval""",,"""N/A""","""None""",7.1
629,"""Eat Drink Man Woman""",1994,2025-09-07,True,"""tt0111797""","""Comedy, Drama, Romance""","""Ang Lee""","""Taiwan, United States""","""Sihung Lung, Kuei-Mei Yang, Wa…",7294403.0,"""Ang Lee, James Schamus, Hui-Li…","""Mandarin, French""",7.8
630,"""Sea of Love""",1989,2025-09-07,False,"""tt0098273""","""Crime, Drama, Mystery""","""Harold Becker""","""United States""","""Al Pacino, Ellen Barkin, John …",58571513.0,"""Richard Price""","""English""",6.8


## save df

In [66]:
file_name = "movies_df.parquet"
file_path = os.path.join(FILES_DIR, file_name)
movies_df.write_parquet(file_path)

In [67]:
file_name = "movies_df.xlsx"
file_path = os.path.join(FILES_DIR, file_name)
movies_df.write_excel(file_path)

<xlsxwriter.workbook.Workbook at 0x140f11a60>