In [2]:
import pandas as pd
import re
import contractions
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer


def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    text = re.sub(r"@\w+|#\w+", "", text)
    text = re.sub(r"[^a-zA-Z0-9'\[\].,!?]", " ", text)
    text = ''.join([i for i in text if not i.isdigit()])
    text = re.sub(r'\[[^\]]*\]', '', text)
    text = re.sub(r'(?<=\S)[.,](?=\S)', '', text)
    text = " ".join(text.split())
    text = re.sub(r'([.!?])\1+', r'\1', text)
    text = contractions.fix(text)
    return text


def data_processing(df):

    # Clean the translation column for emotion classification
    df['cleaned_text'] = df['Translation'].astype(str).apply(clean_text)

    # Drop short or empty sentences
    df = df[df['cleaned_text'].str.strip() != '']
    df['text_length'] = df['cleaned_text'].apply(lambda x: len(x.split()))
    df = df[df['text_length'] >= 3]
    df.drop(columns=['text_length'], inplace=True)

    # === Tokenization (assuming you need to recreate or load tokenizer) ===
    tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
    tokenizer.fit_on_texts(df['cleaned_text'])  # or load from file if it was pre-fitted
    sequences = tokenizer.texts_to_sequences(df['cleaned_text'])
    padded = pad_sequences(sequences, padding='post', maxlen=300)  # adjust -- Emils model

    # === Load the .h5 Model ===
    model = load_model("balanced_model8.h5")

    # === Predict Emotions ===
    predictions = model.predict(padded)

    # === Map predictions to labels ===

    emotion_labels = ["neutral", "happiness", "sadness", "fear", "anger", "disgust", "surprise"]  
    predicted_emotions = [emotion_labels[pred.argmax()] for pred in predictions]

    # Add to DataFrame
    df['Emotion'] = predicted_emotions
    df.drop(columns=['cleaned_text'], inplace=True)

    # Save the final version
    df.to_csv("translated_emotions.csv", index=False)

# call 
df = pd.read_csv("translated_output.csv")
data_processing(df)

FileNotFoundError: [Errno 2] No such file or directory: 'translated_output.csv'