# Whisper part

In [20]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())

True
1
0


In [21]:
from transformers import pipeline, logging

In [22]:
whisper = pipeline(task="automatic-speech-recognition",
                   model="openai/whisper-large-v3-turbo",
                   return_timestamps=True,
                   device=-1)

In [23]:
logging.set_verbosity_error()
text = whisper("audio.mp3")

In [24]:
text = text["text"]
text

" If you work and travel then this portable monitor setup is for you. It includes a portable monitor, a USB-C cable and a stand. The stand makes it possible to raise the monitor to eye level. This 4K monitor is 16 inches and it connects to the stand with this clever magnetic mechanism which is very strong. The monitor only needs one USB-C cable to work as it's capable to carry both video and power. I love this little setup and bring it with me everywhere I go since it significantly improves my productivity while working remotely."

# custom NLP part

In [25]:
import nltk, string
from nltk.corpus import stopwords

In [26]:
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Tymofii\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [27]:
stopwords_list = stopwords.words("english")

In [28]:
processed_text = [word for word in text.split() if word not in stopwords_list]

translator = str.maketrans({key: " " for key in string.punctuation})
clean_text = text.translate(translator)
clean_text = [word.lower() for word in clean_text.split() if word not in stopwords_list]

In [29]:
processed_text = " ".join(processed_text)
clean_text = " ".join(clean_text)

In [30]:
clean_text

'if work travel portable monitor setup it includes portable monitor usb c cable stand the stand makes possible raise monitor eye level this 4k monitor 16 inches connects stand clever magnetic mechanism strong the monitor needs one usb c cable work capable carry video power i love little setup bring everywhere i go since significantly improves productivity working remotely'

In [31]:
processed_text

'If work travel portable monitor setup you. It includes portable monitor, USB-C cable stand. The stand makes possible raise monitor eye level. This 4K monitor 16 inches connects stand clever magnetic mechanism strong. The monitor needs one USB-C cable work capable carry video power. I love little setup bring everywhere I go since significantly improves productivity working remotely.'

In [32]:
from nltk.tokenize import sent_tokenize

processed_text_lower = processed_text.lower()
sentences = sent_tokenize(processed_text)
sentences_lower = sent_tokenize(processed_text.lower())

print(sentences)

['If work travel portable monitor setup you.', 'It includes portable monitor, USB-C cable stand.', 'The stand makes possible raise monitor eye level.', 'This 4K monitor 16 inches connects stand clever magnetic mechanism strong.', 'The monitor needs one USB-C cable work capable carry video power.', 'I love little setup bring everywhere I go since significantly improves productivity working remotely.']


In [33]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [34]:
vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform(sentences_lower)

In [35]:
tfidf.shape

(6, 44)

In [36]:
print(tfidf)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 56 stored elements and shape (6, 44)>
  Coords	Values
  (0, 11)	0.43520448196839484
  (0, 41)	0.35687390090050225
  (0, 38)	0.43520448196839484
  (0, 25)	0.35687390090050225
  (0, 22)	0.22296688810681925
  (0, 31)	0.35687390090050225
  (0, 43)	0.43520448196839484
  (1, 25)	0.3758906283126315
  (1, 22)	0.2348481170853437
  (1, 15)	0.4583952083881375
  (1, 14)	0.4583952083881375
  (1, 39)	0.3758906283126315
  (1, 3)	0.3758906283126315
  (1, 34)	0.3173526971608497
  (2, 22)	0.20229065052473877
  (2, 34)	0.2733574548145914
  (2, 36)	0.32378015489850587
  (2, 20)	0.39484695918835844
  (2, 26)	0.39484695918835844
  (2, 29)	0.39484695918835844
  (2, 9)	0.39484695918835844
  (2, 16)	0.39484695918835844
  (3, 22)	0.16414511486440758
  (3, 34)	0.22181099671779383
  (3, 37)	0.32039147287189657
  :	:
  (3, 21)	0.32039147287189657
  (3, 35)	0.32039147287189657
  (4, 41)	0.27406729112675954
  (4, 22)	0.17123115722445717
  (4, 39)	0.274067

In [37]:
tfidf.transpose().toarray()

array([[0.        , 0.        , 0.        , 0.32039147, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.32039147, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.29269782],
       [0.        , 0.37589063, 0.        , 0.        , 0.27406729,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.33422257,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.33422257,
        0.        ],
       [0.        , 0.        , 0.        , 0.32039147, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.32039147, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.29269782],
       [0.        , 0.        , 0.39484696, 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.29269782],
       [0.43520448, 0

In [38]:
import pandas as pd

In [39]:
word_scores = tfidf.sum(axis=0).transpose()
word_scores

matrix([[0.32039147],
        [0.32039147],
        [0.29269782],
        [0.64995792],
        [0.33422257],
        [0.33422257],
        [0.32039147],
        [0.32039147],
        [0.29269782],
        [0.39484696],
        [0.29269782],
        [0.43520448],
        [0.29269782],
        [0.32039147],
        [0.45839521],
        [0.45839521],
        [0.39484696],
        [0.29269782],
        [0.29269782],
        [0.32039147],
        [0.39484696],
        [0.32039147],
        [0.99548193],
        [0.33422257],
        [0.33422257],
        [0.73276453],
        [0.39484696],
        [0.33422257],
        [0.29269782],
        [0.39484696],
        [0.29269782],
        [0.5968903 ],
        [0.29269782],
        [0.29269782],
        [0.81252115],
        [0.32039147],
        [0.59784745],
        [0.32039147],
        [0.43520448],
        [0.64995792],
        [0.33422257],
        [0.63094119],
        [0.29269782],
        [0.43520448]])

In [40]:
word_scores_df = pd.DataFrame(word_scores, 
                              index=vectorizer.get_feature_names_out(), 
                              columns=["score"]).sort_values(by=["score"], ascending=False)
word_scores_df.drop(index=[word for word in word_scores_df.index
                           if word in stopwords_list + ["like"]], 
                    inplace=True)
word_scores_df

Unnamed: 0,score
monitor,0.995482
stand,0.812521
portable,0.732765
cable,0.649958
usb,0.649958
work,0.630941
setup,0.59689
includes,0.458395
travel,0.435204
eye,0.394847


In [41]:
sentence_scores = tfidf.sum(axis=1)
sentence_scores_df = pd.DataFrame(sentence_scores, columns=["score"]).sort_values(by=["score"], ascending=False)
sentence_scores_df

Unnamed: 0,score
5,3.459692
4,3.272836
3,3.269479
2,2.773663
0,2.599202
1,2.596663


In [42]:
sentence_scores_df = sentence_scores_df.reset_index()
sentence_scores_df

Unnamed: 0,index,score
0,5,3.459692
1,4,3.272836
2,3,3.269479
3,2,2.773663
4,0,2.599202
5,1,2.596663


In [43]:
top_n = 2
indexes = (sentence_scores_df.head(top_n)["index"].astype(int))

In [44]:
summary = " ".join(sentences[i] for i in indexes)
keywords = ", ".join(word_scores_df.head(3).index.tolist())

In [45]:
print("Text:")
print(text)
print("\nSummary:")
print(summary)
print("\nKeywords:")
print(keywords)

Text:
 If you work and travel then this portable monitor setup is for you. It includes a portable monitor, a USB-C cable and a stand. The stand makes it possible to raise the monitor to eye level. This 4K monitor is 16 inches and it connects to the stand with this clever magnetic mechanism which is very strong. The monitor only needs one USB-C cable to work as it's capable to carry both video and power. I love this little setup and bring it with me everywhere I go since it significantly improves my productivity while working remotely.

Summary:
I love little setup bring everywhere I go since significantly improves productivity working remotely. The monitor needs one USB-C cable work capable carry video power.

Keywords:
monitor, stand, portable
