#### Define model to be used and name of the file and video source

In [None]:
import os
import whisper
# Define what model to use possible variations are
# 'tiny.en', 'tiny', 'base.en', 'base', 'small.en', 'small', 'medium.en' 'medium', 'large-v1', 'large-v2', 'large'
# NOTE: Large models require more than 11GB VRAM
model_name = "medium"
audio_model = whisper.load_model(model_name)

# Define url to download video and wanted filename

name = 'SET YOUR OWN'

url = 'SET YOUR OWN'

mp4_file = name+".mp4"
mp3_file = name+".mp3"
txt_file = name+".txt"

#### Get video from internet

In [None]:
import urllib.request
import m3u8_To_MP4
# Download video
if ".m3u8" in url:
    m3u8_To_MP4.multithread_download(url)
    # Rename file
    os.rename('m3u8_To_MP4.mp4', mp4_file)
elif ".mp4" in url:
    urllib.request.urlretrieve(url, mp4_file)

#### Convert to mp3 and filter noise and silent parts out

In [None]:
# NOTE: install ffmpeg with choco to windows
# ~5min for 1.5h file
# Convert mp4 to mp3
os.system('ffmpeg -i {} -vn -y {}'.format(mp4_file, mp3_file))
print("mp4 to mp3 conversion done")

# Remove noise remove noise using FFT filering
os.system("ffmpeg -i {} -af afftdn=nr=3 -y {}".format(mp3_file, "tmp_" + mp3_file))
print("Noise reduction done")

# Remove silent spots longer than 10s
os.system("ffmpeg -i {} -af silenceremove=stop_periods=-1:stop_duration=10:stop_threshold=-50dB -y {}".format("tmp_" + mp3_file, mp3_file))
print("Silent parts removed")

# Delete temporary file
os.remove("tmp_" + mp3_file)
os.remove(mp4_file)
print("Extra files removed")

In [None]:
LANGUAGES = {"en": "english","zh": "chinese","de": "german","es": "spanish","ru": "russian","ko": "korean","fr": "french","ja": "japanese","pt": "portuguese","tr": "turkish","pl": "polish","ca": "catalan","nl": "dutch","ar": "arabic","sv": "swedish","it": "italian","id": "indonesian","hi": "hindi","fi": "finnish","vi": "vietnamese","he": "hebrew","uk": "ukrainian","el": "greek","ms": "malay","cs": "czech","ro": "romanian","da": "danish","hu": "hungarian","ta": "tamil","no": "norwegian","th": "thai","ur": "urdu","hr": "croatian","bg": "bulgarian","lt": "lithuanian","la": "latin","mi": "maori","ml": "malayalam","cy": "welsh","sk": "slovak","te": "telugu","fa": "persian","lv": "latvian","bn": "bengali","sr": "serbian","az": "azerbaijani","sl": "slovenian","kn": "kannada","et": "estonian","mk": "macedonian","br": "breton","eu": "basque","is": "icelandic","hy": "armenian","ne": "nepali","mn": "mongolian","bs": "bosnian","kk": "kazakh","sq": "albanian","sw": "swahili","gl": "galician","mr": "marathi","pa": "punjabi","si": "sinhala","km": "khmer","sn": "shona","yo": "yoruba","so": "somali","af": "afrikaans","oc": "occitan","ka": "georgian","be": "belarusian","tg": "tajik","sd": "sindhi","gu": "gujarati","am": "amharic","yi": "yiddish","lo": "lao","uz": "uzbek","fo": "faroese","ht": "haitian creole","ps": "pashto","tk": "turkmen","nn": "nynorsk","mt": "maltese","sa": "sanskrit","lb": "luxembourgish","my": "myanmar","bo": "tibetan","tl": "tagalog","mg": "malagasy","as": "assamese","tt": "tatar","haw": "hawaiian","ln": "lingala","ha": "hausa","ba": "bashkir","jw": "javanese","su": "sundanese"}

#### Detect language if english model not used

In [None]:
if ".en" not in model_name:
    # Detect language
    audio = whisper.load_audio(mp3_file)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(audio_model.device)
    _, probs = audio_model.detect_language(mel)
    detected_language = LANGUAGES[max(probs, key=probs.get)]
    print(f"Detected language: {detected_language}")

#### Generate transcriptions

In [None]:
# Transcribe audio takes about 9 min with gtx1070 to trancsribe 1.5h audio using medium.en

if ".en" not in model_name:
    result = audio_model.transcribe(mp3_file, verbose= True, language=detected_language, temperature=0.8)
else:
    result = audio_model.transcribe(mp3_file, verbose= True, temperature=0.8)

#### Split each sentence to own row and delete .mp3-file

In [None]:
os.remove(mp3_file)

In [None]:
with open(txt_file, 'w') as f:
    # Write data to the file
    f.write(result["text"])
    
try:
    # Open the file for reading
    with open(txt_file, 'r') as file:
        contents = file.read()
except FileNotFoundError:
    # If the file does not exist, create a empty to be saved later
    contents = ''

# Split text where sentences end
contents = contents.replace('\n', '')
contents = contents.replace('?', '?\n')
contents = contents.replace('.', '.\n')
contents = contents.replace('!', '!\n')
# Only use this if some sentences are really long.
#contents = contents.replace(',', ',\n')

# Open the file for writing and overwrite the existing contents
with open(txt_file, 'w') as file:
    file.write(contents)

#### Embedd sentences and calulate cosine distance

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import seaborn as sns
import matplotlib.pyplot as plt

sentences = []
with open(txt_file, 'r') as file:
        for line in file:
                # Remove too short lines
                if len(line)>6:
                        sentences.append(line)

# Define model to be used to be used for text embedding and create similarity matrix
text_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
embeddings = text_model.encode(sentences)
similarities = cosine_similarity(embeddings)

# Plot the similarity heatmap
sns.heatmap(similarities).set_title('Cosine similarities matrix {}'.format(name))
#plt.savefig('{}.png'.format(name), dpi=2000)

# Get diagonal and the one next to it for sentence similarity checking
diagonals = [similarities.diagonal(), similarities.diagonal(1)+similarities.diagonal(1)[-1]]

#### Get diagonals and detect where paragraphs should end

In [None]:
# Calculate differences
difference = [diagonals[0][i]-diagonals[1][i] for i in range(len(diagonals[0])-1)]

from scipy.ndimage import gaussian_filter1d
# Perform n-wide Gaussian smoothing
n = 1
difference_smoothed = gaussian_filter1d(difference, n)
from scipy.signal import find_peaks

# Detect peaks where sentence difference is largest
peaks, _ = find_peaks(difference_smoothed, distance=5)

# Plot peaks
plt.clf()
plt.plot(difference_smoothed)
plt.plot(peaks, difference_smoothed[peaks], "x")
plt.show()

#### Parse final text file using detected paragraph

In [None]:
# Format text for last step and remove too short sentences.
with open(txt_file, 'r') as file:
            contents = [l.rstrip("\n").lstrip() for l in file.readlines() if len(l)>6]

# Add paragraph breaks to detected points and write to file
text = ''
for index, sentence in enumerate(contents):
        if index-1 in peaks:
                text += "\n\n"
                text += sentence
        else:
                text += " "+sentence

with open(txt_file, 'w') as file:
        file.write(text)

#### Import OpenAI and define API_key
###### You have to use your own api key here

In [None]:
import openai
with open("OPENAI_API_KEY", 'r') as file:
    API_key=(file.read())

#### Splitting text into chunks so that OpenAi token limit is not exceeded.

In [None]:
def split_text_to_chunks(text):
    max_len = 10000
    split_points = []
    curr_len = 0
    for i, char in enumerate(text):
        curr_len += 1
        if curr_len >= max_len and (char == "." or char == "!" or char == "?"):
            split_points.append(i+1)
            curr_len = 0
    if not split_points:
        return [text]
    else:
        split_texts = []
        prev_split = 0
        for split in split_points:
            split_texts.append(text[prev_split:split])
            prev_split = split
        split_texts.append(text[prev_split:])
        return split_texts

chunks = split_text_to_chunks(text)

#### Use OpenAI api to get bulletpoints for each text chunk

In [None]:
openai.api_key = API_key
bullet_points =  []

for index, text in enumerate(chunks):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=
        [
            {"role": "system", "content": "You are a student whose job is to summarize following text in to bullet points in the original language. You can use your own knowledge also when making notes if you notice some inconsistencies or typos on the lecture text. Use your own words and do not just copy the lecture."},
            {"role": "user", "content": text}
        ]
    )
    bullet_points.append(response.choices[0].message.content)

with open("Bullet_points_"+name+".txt", 'w') as file:
    file.write(' '.join(bullet_points))