In [4]:
import os
from transformers import pipeline
import numpy as np
import pandas as pd
VIDEOS = ['jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp',
 'JFBB_Rhythm_Section_Day1_Morning Thomas',
 'JFBB_Rhythm_Section_Day1_Afternoon Thomas',
 'JFBB_Jazz_Funk_morning_video20230419101818 Luis Cal García',
 'JFBB_Jazz_Funk_afternoon_video20230419143359 Luis Cal García',
 'jazzaar-20230417-afternoon-JFLB-Cuban-Vocals-video Philipp',
 'jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp']

In [5]:
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)
columns = ['segment_id', 'start','end','sadness','joy','love','anger','fear','surprise']
dimensions = (-1,9)
for video in VIDEOS:
    output = []
    for root, dir, files in os.walk(f"../data/transcriptions/processed_merged/{video}"):
        for file in files:
            
            with open(f'{root}/{file}') as f:
                text = f.read()
                f.close()

            prediction = classifier(text)
            emotions = [x['score'] for x in prediction[0]]
            meta_info = [file.split('_')[i] for i in range(3)]
            output = output + meta_info + emotions
            
    output = np.reshape(output, dimensions)
    output_df = pd.DataFrame(output, columns=columns)
    output_df= output_df.astype({'segment_id': 'int32'})
    output_df = output_df.sort_values(by=['segment_id']).reset_index(drop=True)  
    output_df.to_csv(f'../data/emotion_from_transcript/distilbert-base-uncased-emotion/{video}.csv')



In [6]:
classifier = pipeline("text-classification",model='j-hartmann/emotion-english-distilroberta-base', return_all_scores=True)
columns = ['segment_id', 'start','end','anger','disgust','fear','joy','neutral','sadness', 'surprise']
dimensions = (-1,10)
for video in VIDEOS:
    output = []
    for root, dir, files in os.walk(f"../data/transcriptions/processed_merged/{video}"):
        for file in files:
            
            with open(f'{root}/{file}') as f:
                text = f.read()
                f.close()

            prediction = classifier(text)
            emotions = [x['score'] for x in prediction[0]]
            meta_info = [file.split('_')[i] for i in range(3)]
            output = output + meta_info + emotions
            
    output = np.reshape(output, dimensions)
    output_df = pd.DataFrame(output, columns=columns)
    output_df= output_df.astype({'segment_id': 'int32'})
    output_df = output_df.sort_values(by=['segment_id']).reset_index(drop=True)
    output_df.to_csv(f'../data/emotion_from_transcript/emotion-english-distilroberta-base/{video}.csv')



In [7]:
classifier = pipeline("sentiment-analysis")
columns = ['segment_id', 'start','end','positive', 'negative']
dimensions = (-1,5)
for video in VIDEOS:
    output = []
    for root, dir, files in os.walk(f"../data/transcriptions/processed_merged/{video}"):
        for file in files:
            
            print(f'{root}/{file}')
            with open(f'{root}/{file}') as f:
                text = f.read()
                f.close()

            prediction = classifier(text)
            if prediction[0]['label'] == 'POSITIVE':
                emotions = [1,0]
            else:
                emotions = [0,1]

            meta_info = [file.split('_')[i] for i in range(3)]
            output = output + meta_info + emotions
            
    output = np.reshape(output, dimensions)
    output_df = pd.DataFrame(output, columns=columns)
    output_df= output_df.astype({'segment_id': 'int32'})
    output_df = output_df.sort_values(by=['segment_id']).reset_index(drop=True)
    output_df.to_csv(f'../data/emotion_from_transcript/binary_sentiment/{video}.csv')


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/83_6985_7051_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/90_7676_7731_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/31_4014_4026_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/44_5034_5063_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/96_8544_8597_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/21_2585_2596_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/69_6265_6282_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp/56_5653_5696_seg.txt
../data/transcriptions/processed_merged/jazzaar-20230417