In [1]:
import requests
import pandas as pd
import os

In [2]:
def get_movie_info(api_key, tconst):
    url = f"http://www.omdbapi.com/?apikey={api_key}&i={tconst}&plot=full"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['Response'] == 'True':
            return data
        else:
            return {'Error': data.get('Error', 'Unknown error occurred')}
    else:
        return {'Error': 'Failed to reach the OMDB API'}

In [None]:
api_key = 'api_key'

input_csv = 'CleanedData/tconst_not_in_dataset_csv_filtered.csv'
df = pd.read_csv(input_csv)

output_csv = 'CleanedData/data_api.csv'

os.makedirs(os.path.dirname(output_csv), exist_ok=True)

batch_size = 100
num_batches = len(df) // batch_size + (1 if len(df) % batch_size > 0 else 0)

for batch in range(num_batches):
    start_index = batch * batch_size
    end_index = start_index + batch_size
    current_batch = df['tconst'][start_index:end_index]
    
    batch_results = []
    
    for i, tconst in enumerate(current_batch):
        movie_info = get_movie_info(api_key, tconst)
        if 'Error' not in movie_info:
            box_office = movie_info.get('BoxOffice', 'N/A')
            if box_office != 'N/A':
                box_office = int(box_office.replace(",", "").replace("$", ""))
                print(f"Value found for {tconst}, {box_office}")
            batch_results.append({'tconst': tconst, 'BoxOffice': box_office})
        else:
            print(f"Error retrieving data for {tconst}: {movie_info['Error']}")
        print(f"Processed {start_index + i + 1} of {len(df)}")
        

    batch_df = pd.DataFrame(batch_results)
    
    if not batch_df.empty:
        if not os.path.isfile(output_csv):
            batch_df.to_csv(output_csv, index=False)
        else:
            batch_df.to_csv(output_csv, mode='a', header=False, index=False)

    print('##############################################')
    print(f"Batch {batch + 1} results saved to {output_csv}")

print(f"All results saved to {output_csv}")
