# Função 1 - Filmes de Romance da API tmdb

In [None]:
import os
import time
import json
import requests
import pandas as pd
import boto3
import logging
from botocore.exceptions import ClientError
from datetime import date

api_key = os.getenv("api_key")

filmes = []
today = date.today().strftime("%Y/%m/%d")

def upload_file(file_name, bucket, object_name=None):

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

def data(url):
    response = requests.get(url)
    data = response.json()
    
    for filme in data['results']:
        filmes.append(filme['id'])

def transforma_json(arquivo):
    json_data = json.dumps(arquivo)
    return json_data
    
def get_id(filmes, contador):
    detalhe_filmes = []
    
    if contador >= 101:
        return
    
    for unique_id in filmes[100 * (contador - 1) : 100 * contador ]:
        response = requests.get(f"https://api.themoviedb.org/3/movie/{unique_id}?api_key={api_key}&language=en")
        data = response.json()
        detalhe_filmes.append(data)
    
    with open(f'/tmp/tmdb_filmes{contador}.json', 'w', encoding='utf-8') as tmdb:
        tmdb.write(transforma_json(detalhe_filmes))
    
    upload_file(f'/tmp/tmdb_filmes{contador}.json', 'desafiofinalcompass',
            f'Raw/local/tbmd/json/{today}/tmdb_filmes{contador}.json')
    
    get_id(filmes, contador + 1)

def lambda_handler(event, context):
    
    for i in range(1, 501):
        data(f'https://api.themoviedb.org/3/discover/movie?api_key={api_key}a&with_genres=10749&page={i}')

    get_id(filmes, 1)


# Função 2 - Filmes de Drama da API tmdb

In [None]:
import os
import time
import json
import requests
import pandas as pd
import boto3
import logging
from botocore.exceptions import ClientError
from datetime import date

api_key = os.getenv("api_key")

filmes = []
today = date.today().strftime("%Y/%m/%d")

def upload_file(file_name, bucket, object_name=None):

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

def data(url):
    response = requests.get(url)
    data = response.json()
    
    for filme in data['results']:
        filmes.append(filme['id'])

def transforma_json(arquivo):
    json_data = json.dumps(arquivo)
    return json_data
    
def get_id(filmes, contador):
    detalhe_filmes = []
    
    if contador >= 101:
        return
    
    for unique_id in filmes[100 * (contador - 1) : 100 * contador ]:
        response = requests.get(f"https://api.themoviedb.org/3/movie/{unique_id}?api_key={api_key}&language=en")
        data = response.json()
        detalhe_filmes.append(data)
    
    with open(f'/tmp/tmdb_filmes_drama{contador}.json', 'w', encoding='utf-8') as tmdb:
        tmdb.write(transforma_json(detalhe_filmes))
    
    upload_file(f'/tmp/tmdb_filmes_drama{contador}.json', 'desafiofinalcompass',
            f'Raw/local/tbmd/json/{today}/tmdb_filmes_drama{contador}.json')
    
    get_id(filmes, contador + 1)

def lambda_handler(event, context):
    
    for i in range(1, 501):
        data(f'https://api.themoviedb.org/3/discover/movie?api_key={api_key}&with_genres=18&page={i}')

    get_id(filmes, 1)


# Função 3 - Filmes de Romance por ID externo do arquivo movies.csv

In [None]:
import os
import time
import json
import requests
import pandas as pd
import boto3
import logging
from botocore.exceptions import ClientError
from datetime import date

s3 = boto3.client('s3')
api_key = os.getenv("api_key")

filmes_csv_id = []
filme_externo_id = []
today = date.today().strftime("%Y/%m/%d")

def upload_file(file_name, bucket, object_name=None):

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

def data(url):
    response = requests.get(url)
    data = response.json()
    
    for movies in data['movie_results']:
        filme_externo_id.append(movies['id'])
        
def transforma_json(arquivo):
    json_data = json.dumps(arquivo)
    return json_data
    
def get_id(filmes, contador):
    detalhe_filmes_externos = []
    
    if contador >= 51:
        return
    
    for unique_id in filmes[100 * (contador - 1) : 100 * contador ]:
        response = requests.get(f"https://api.themoviedb.org/3/movie/{unique_id}?api_key={api_key}&language=en")
        data = response.json()
        detalhe_filmes_externos.append(data)
        
    with open(f'/tmp/tmdb_filmes_romance_externo{contador}.json', 'w', encoding='utf-8') as tmdb:
        tmdb.write(transforma_json(detalhe_filmes_externos))
        
    upload_file(f'/tmp/tmdb_filmes_romance_externo{contador}.json', 'desafiofinalcompass',
            f'Raw/local/tbmd/json/{today}/tmdb_filmes_romance_externo{contador}.json')
            
    get_id(filmes, contador + 1)

def lambda_handler(event, context):
    
    bucket = event['Records'][0]['s3']['bucket']['name']
    
    response = s3.get_object(Bucket=bucket, Key='Raw/local/CSV/Movies/2024/03/23/movies.csv')
    
    df = pd.read_csv(response['Body'], sep="|")
    
    df_filtrado = df.loc[df['genero'] == 'Romance']
    
    for i in df_filtrado['id']:
        filmes_csv_id.append(i)
        
    for filme_id in filmes_csv_id[0 : 5000]:
        data(f"https://api.themoviedb.org/3/find/{filme_id}?api_key={api_key}&external_source=imdb_id")
        
    get_id(filmes_csv_id, 1)
    
    

# Função 4 - Filmes de Drama por ID externo do arquivo movies.csv

In [None]:
import os
import time
import json
import requests
import pandas as pd
import boto3
import logging
from botocore.exceptions import ClientError
from datetime import date

s3 = boto3.client('s3')
api_key = os.getenv("api_key")

filmes_csv_id = []
filme_externo_id = []
today = date.today().strftime("%Y/%m/%d")

def upload_file(file_name, bucket, object_name=None):

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

def data(url):
    response = requests.get(url)
    data = response.json()
    
    for movies in data['movie_results']:
        filme_externo_id.append(movies['id'])
        
def transforma_json(arquivo):
    json_data = json.dumps(arquivo)
    return json_data
    
def get_id(filmes, contador):
    detalhe_filmes_externos = []
    
    if contador >= 51:
        return
    
    for unique_id in filmes[100 * (contador - 1) : 100 * contador ]:
        response = requests.get(f"https://api.themoviedb.org/3/movie/{unique_id}?api_key={api_key}&language=en")
        data = response.json()
        detalhe_filmes_externos.append(data)
        
    with open(f'/tmp/tmdb_filmes_drama_externo{contador}.json', 'w', encoding='utf-8') as tmdb:
        tmdb.write(transforma_json(detalhe_filmes_externos))
        
    upload_file(f'/tmp/tmdb_filmes_drama_externo{contador}.json', 'desafiofinalcompass',
            f'Raw/local/tbmd/json/{today}/tmdb_filmes_drama_externo{contador}.json')
            
    get_id(filmes, contador + 1)

def lambda_handler(event, context):
    
    bucket = event['Records'][0]['s3']['bucket']['name']
    
    response = s3.get_object(Bucket=bucket, Key='Raw/local/CSV/Movies/2024/03/23/movies.csv')
    
    df = pd.read_csv(response['Body'], sep="|")
    
    df_filtrado = df.loc[df['genero'] == 'Drama']
    
    for i in df_filtrado['id']:
        filmes_csv_id.append(i)
        
    for filme_id in filmes_csv_id[0 : 5000]:
        data(f"https://api.themoviedb.org/3/find/{filme_id}?api_key={api_key}&external_source=imdb_id")
        
    get_id(filmes_csv_id, 1)
    
    