In [1]:
import gradio as gr
import tensorflow as tf
from transformers import pipeline, AutoTokenizer, TFAutoModelForCausalLM
from googleapiclient.discovery import build
from PIL import Image
import numpy as np

In [2]:
# Load the model
model = tf.keras.models.load_model("./models/emotion_model.h5")

def preprocess_image(image):
    if isinstance(image, np.ndarray):
        img = Image.fromarray((image * 255).astype(np.uint8))
    else:
        img = Image.open(image)
    img = img.convert('L')
    img = img.resize((64, 64))
    img_rgb = Image.new('RGB', img.size)
    img_rgb.paste(img)
    img_arr = np.array(img_rgb).reshape(1, 64, 64, 3) / 255.0
    
    return img_arr

In [3]:
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
text_generation_model = TFAutoModelForCausalLM.from_pretrained(model_name)

API_KEY = 'AIzaSyD4-X0747wo4mXqJrxHwWb7mo1Yq3JhUhE'
youtube = build('youtube', 'v3', developerKey=API_KEY)



All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [4]:
def generate_sentence(emotion):
    input_text = f"Generate a song for {emotion} emotion"
    input_ids = tokenizer.encode(input_text, return_tensors="tf")
    # Increase the max_length
    generated_text = text_generation_model.generate(input_ids, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95)
    generated_sentence_full = tokenizer.decode(generated_text[0], skip_special_tokens=True)
    
    # Trim the sentence to the last period to ensure completeness
    generated_sentence_trimmed = generated_sentence_full.rsplit('.', 1)[0] + '.'
    
    return generated_sentence_trimmed




In [5]:
def recommend_music_from_image(image):
    image_arr = preprocess_image(image)
    prediction = model.predict(image_arr)
    emotions = ["angry", "contempt", "disgust", "fear", "happiness", "neutrality", "sadness", "surprise"]
    emotion = emotions[np.argmax(prediction)]

    gpt_sentence = generate_sentence(emotion)  # Getting the GPT-2 generated sentence
    
    search_query = gpt_sentence
    search_results = youtube.search().list(
        q=search_query,
        type='video',
        part='id',
        maxResults=5
    ).execute()

    video_ids = [item['id']['videoId'] for item in search_results['items']]
    video_urls = ['https://www.youtube.com/watch?v=' + video_id for video_id in video_ids]
    
    formatted_recommendations = [f'<a href="{url}" target="_blank">Video {i+1}</a>' for i, url in enumerate(video_urls)]
    recommendations_text = "<br>".join(formatted_recommendations)
    
    return emotion, gpt_sentence, recommendations_text  # Including the GPT-2 sentence in the output

iface = gr.Interface(
    fn=recommend_music_from_image,
    inputs="image",
    outputs=["text", "text", "html"],  # Added an additional text output for the GPT-2 generated sentence
    layout="vertical",
    title="Music Recommendation Based on Emotion",
    description="Upload an image, and we'll predict the emotion, generate a song line, and recommend music for you.",
)

iface.launch()


  iface = gr.Interface(


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.






The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Traceback (most recent call last):
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\routes.py", line 534, in predict
    output = await route_utils.call_process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\route_utils.py", line 226, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\blocks.py", line 1550, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\blocks.py", li



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Traceback (most recent call last):
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\routes.py", line 534, in predict
    output = await route_utils.call_process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\route_utils.py", line 226, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\blocks.py", line 1550, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\harie\AppData\Roaming\Python\Python311\site-packages\gradio\blocks.py", li