 Imports and Setup

In [6]:
from datetime import datetime
import pandas as pd
import json
import os
import requests

# API key setup
from constants import TMDB_API_KEY


In [7]:
class MovieEntry:
    def __init__(self, title, date, is_cinema):
        self.title = title
        self.date = date
        self.is_cinema = is_cinema

    def __repr__(self):
        return f"MovieEntry(title={self.title}, date={self.date}, is_cinema={self.is_cinema})"


In [8]:
def fetch_movie_details(title):
    cache_file = "movie_cache.json"
    if os.path.exists(cache_file):
        with open(cache_file, 'r') as f:
            cache = json.load(f)
    else:
        cache = {}

    if title in cache:
        return cache[title]

    url = f"https://api.themoviedb.org/3/search/movie?query={title}&api_key={TMDB_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data["results"]:
            details = data["results"][0]
            cache[title] = details
            with open(cache_file, 'w') as f:
                json.dump(cache, f)
            return details
    return None


In [9]:
def enrich_data_with_tmdb(df):
    details = []
    for title in df["Title"]:
        detail = fetch_movie_details(title)
        details.append(detail)

    df["Details"] = details
    return df

In [10]:
filename = "entertainment.txt"
entries = []

with open(filename, 'r') as file:
    for line in file:
        line = line.strip()
        if '(' in line and ')' in line:
            title, rest = line.split(" (", 1)
            date = rest.split(")")[0].strip()
            is_cinema = "cinema" in rest.lower()
        else:
            title = line
            date = ""
            is_cinema = False

        title = title.title().strip()
        movie_entry = MovieEntry(title, date, is_cinema)
        entries.append(movie_entry)


In [11]:
df = pd.DataFrame(
    [(e.title, e.date, e.is_cinema) for e in entries],
    columns=["Title", "Date_Watched", "Is_Cinema"]
)

In [12]:
df = enrich_data_with_tmdb(df)

In [13]:
df.to_csv("enriched_data.csv", index=False)
