# Classify your YouTube history

## Prepare data

In [1]:
import pandas as pd
import numpy as np
import re
import nltk

from preparation import prepare_data
from select_by_date_interval import select_by_date_interval

In [2]:
PATH = "C:/Users/San/Documents/CS projects/yt_activity_analysis/data/Takeout/YouTube and YouTube Music/history/watch-history.json"
df = prepare_data(PATH)

start_date = "2023-01-01"
# end_date = "2022-11-01"
df = select_by_date_interval(df, start=start_date)
# df = df.loc[df["app"] == "YouTube Music"] # if you wanna work only with YT Music data
df

Unnamed: 0,video_title,channel_name,time,app
1,Жбурляю,Харцизи,2023-04-04 14:59:28.805000+03:00,YouTube Music
2,Ліхтар,Rohata Zhaba,2023-04-04 14:25:11.177000+03:00,YouTube
3,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,2023-04-04 14:12:17.193000+03:00,YouTube
4,Test Your English Vocabulary: SHAPES & PATTERNS,Learn English with Gill · engVid,2023-04-04 14:06:00.179000+03:00,YouTube
5,Finland joins NATO in historic shift prompted ...,FRANCE 24 English,2023-04-04 14:02:41.452000+03:00,YouTube
...,...,...,...,...
4391,Assassin's Creed Odyssey - Before You Buy,gameranx,2023-01-01 10:02:05.355000+02:00,YouTube
4392,#Ukraine's spy chief tells ABC News there will...,ABC News,2023-01-01 09:44:58.841000+02:00,YouTube
4393,Russian missile zooms over Kyiv before being s...,The Sun,2023-01-01 09:43:07.927000+02:00,YouTube
4394,YARMAK FT. TOF - МОЯ КРАЇНА,Yarmak Music,2023-01-01 00:07:14.838000+02:00,YouTube


Let's think about possible categories:
- music - it's the easiest one because I can do it without any classification algo by treating every video in YouTube Music as a song. However, you'll need to handle songs you listened in YouTube app. Maybe just check if video_title or channel_name happened already in YouTube Music. If so, then it's a song as well
- entertainment - games, movies, books, and other not so productive stuff
- enlightment/learning - english, math, cs, books, and other subjects
- politics - war, inner policies, and so on

However, what if some videos combine different categories? For example, a history video can be both entertaining and enlightning

In [3]:
nltk.download('toolbox')

[nltk_data] Downloading package toolbox to
[nltk_data]     C:\Users\San\AppData\Roaming\nltk_data...
[nltk_data]   Package toolbox is already up-to-date!


True

In [4]:
def categorize():
    pass

Let's start with deciding whether the video is a song or not

In [5]:
df["app"].value_counts()

YouTube          2507
YouTube Music    1888
Name: app, dtype: int64

In [6]:
def initial_classification(row):
    if row["app"] == "YouTube Music":
        category = "Music"
    else:
        category = "Not Music"
    return category

In [7]:
df["category"] = df.apply(initial_classification, axis=1)
df.head(3)

Unnamed: 0,video_title,channel_name,time,app,category
1,Жбурляю,Харцизи,2023-04-04 14:59:28.805000+03:00,YouTube Music,Music
2,Ліхтар,Rohata Zhaba,2023-04-04 14:25:11.177000+03:00,YouTube,Not Music
3,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,2023-04-04 14:12:17.193000+03:00,YouTube,Not Music


In [8]:
music_df = df.loc[df["app"] == "YouTube Music"]

In [9]:
"Antytila" in music_df["channel_name"].values

True

In [10]:
def classification(row):
    category = row["category"]
    if row["category"] == "Not Music":
        if row["channel_name"] in music_df["channel_name"].values:
            category = "Music"
        elif row["video_title"] in music_df["video_title"].values:
            category = "Music"
    return category

In [11]:
df["category"] = df.apply(classification, axis=1)
df

Unnamed: 0,video_title,channel_name,time,app,category
1,Жбурляю,Харцизи,2023-04-04 14:59:28.805000+03:00,YouTube Music,Music
2,Ліхтар,Rohata Zhaba,2023-04-04 14:25:11.177000+03:00,YouTube,Not Music
3,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,2023-04-04 14:12:17.193000+03:00,YouTube,Not Music
4,Test Your English Vocabulary: SHAPES & PATTERNS,Learn English with Gill · engVid,2023-04-04 14:06:00.179000+03:00,YouTube,Not Music
5,Finland joins NATO in historic shift prompted ...,FRANCE 24 English,2023-04-04 14:02:41.452000+03:00,YouTube,Not Music
...,...,...,...,...,...
4391,Assassin's Creed Odyssey - Before You Buy,gameranx,2023-01-01 10:02:05.355000+02:00,YouTube,Not Music
4392,#Ukraine's spy chief tells ABC News there will...,ABC News,2023-01-01 09:44:58.841000+02:00,YouTube,Not Music
4393,Russian missile zooms over Kyiv before being s...,The Sun,2023-01-01 09:43:07.927000+02:00,YouTube,Not Music
4394,YARMAK FT. TOF - МОЯ КРАЇНА,Yarmak Music,2023-01-01 00:07:14.838000+02:00,YouTube,Music


In [12]:
df["category"].value_counts()

Not Music    2378
Music        2017
Name: category, dtype: int64

Well, with several rules (that is a Rule-based system or smt), I got 129 more videos classified as songs. However, there are game/movie soundtrack collections classified as 'Not Music'

Time to assign some labels

In [13]:
# after sampling, drop useless for classification 
# cols such as weekday and time
# subdf = df.sample(n=109, random_state=42).drop(["time", "weekday"], axis=1)
# subdf.head(5)

To assign labels, sample your df, and save that sample in .csv file. Then in the .csv file itself, manualy assign labels and import for further actions

In [14]:
# comented out, so I don't accidentadly override my labeled data with unlabeled one
# subdf.to_csv("labeled.csv", index=False)

Possible labels:
- music
- rec (short form of recreation)/entertainment - games, movies, fictional books, and other not so productive stuff
- studies (enlightment/learning/study) - english, math, cs, books, and other subjects
- politics - war, inner politics, news, and so on
- sport - workouts and so on

However, what if some videos combine different categories? For example, a history video can be both entertaining and enlightning

In [15]:
labeled_df = pd.read_csv('labeled.csv', header = 0)
labeled_df

Unnamed: 0,video_title,channel_name,app,category
0,Valhalla (Extended Mix),Miss Monique,YouTube Music,Music
1,Why You Should Read The Stormlight Archive - B...,Daniel Greene,YouTube,Rec
2,"Тихо прийшов, тихо пішов або пісня спеціальног...",Riffmaster,YouTube Music,Music
3,"🎙️ [SingingMarch] ♯23 MARIAH CAREY – ""Whenever...",Mioune,YouTube,Music
4,Ukraine frontline: the battle for Bakhmut - B...,BBC News,YouTube,Politics
...,...,...,...,...
104,Браття українці,Shablya,YouTube Music,Music
105,Tran,Miss Monique,YouTube Music,Music
106,How South Koreans got so much taller,Vox,YouTube,Studies
107,Deep Rock Galactic - Playstation Launch Traile...,PlayStation,YouTube,Rec


In [16]:
labeled_df["category"].value_counts()

Music       52
Rec         21
Politics    20
Studies     14
Sport        2
Name: category, dtype: int64

In [17]:
labeled_df["target_label"] = labeled_df["category"].map({
    "Music": 0,
    "Rec": 1,
    "Politics": 2,
    "Studies": 3,
    "Sport": 4
})
labeled_df

Unnamed: 0,video_title,channel_name,app,category,target_label
0,Valhalla (Extended Mix),Miss Monique,YouTube Music,Music,0
1,Why You Should Read The Stormlight Archive - B...,Daniel Greene,YouTube,Rec,1
2,"Тихо прийшов, тихо пішов або пісня спеціальног...",Riffmaster,YouTube Music,Music,0
3,"🎙️ [SingingMarch] ♯23 MARIAH CAREY – ""Whenever...",Mioune,YouTube,Music,0
4,Ukraine frontline: the battle for Bakhmut - B...,BBC News,YouTube,Politics,2
...,...,...,...,...,...
104,Браття українці,Shablya,YouTube Music,Music,0
105,Tran,Miss Monique,YouTube Music,Music,0
106,How South Koreans got so much taller,Vox,YouTube,Studies,3
107,Deep Rock Galactic - Playstation Launch Traile...,PlayStation,YouTube,Rec,1


In [18]:
def preprocess(text):
    sentences = re.split("[\n\t]", text)
    # remove empty lines
    sentences = [sentence for sentence in sentences if sentence]
    # further cleaning
    sentences = [re.sub(r"[^0-9a-zA-Z\s]", "", sentence, re.I|re.A).lower() for sentence in sentences]
    sentences = [sentence.lower().strip() for sentence in sentences]
    wpt = nltk.WordPunctTokenizer()
    stop_words = nltk.corpus.stopwords.words("english")
    output = []
    for sentence in sentences:
        tokens = wpt.tokenize(sentence)
        filtered_tokens = [token for token in tokens if token not in stop_words]
        output.append(" ".join(filtered_tokens))
    return " ".join(output)

df["prepped"] = df["video_title"].apply(preprocess)
df.head(5)

Unnamed: 0,video_title,channel_name,time,app,category,prepped
1,Жбурляю,Харцизи,2023-04-04 14:59:28.805000+03:00,YouTube Music,Music,
2,Ліхтар,Rohata Zhaba,2023-04-04 14:25:11.177000+03:00,YouTube,Not Music,
3,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,2023-04-04 14:12:17.193000+03:00,YouTube,Not Music,drinkers chasers another rey skywalker movie
4,Test Your English Vocabulary: SHAPES & PATTERNS,Learn English with Gill · engVid,2023-04-04 14:06:00.179000+03:00,YouTube,Not Music,test english vocabulary shapes patterns
5,Finland joins NATO in historic shift prompted ...,FRANCE 24 English,2023-04-04 14:02:41.452000+03:00,YouTube,Not Music,finland joins nato historic shift prompted ukr...


From above, we can see that non-english text is not handled properly

Time for some language detection

In [19]:
from langdetect import detect
detect("Тихо прийшов, тихо пішов або пісня спеціального...")

'uk'

In [20]:
from langdetect import detect_langs
detect_langs("Тихо прийшов, тихо пішов або пісня спеціального...")

[uk:0.9999963383809275]

In [21]:
# from langdetect import detect

# def detect_language(text):
#     try:
#         return detect(text)
#     except:
#         return 'unknown'

# df['language'] = df['video_title'].apply(detect_language)
# df["language"].value_counts().head(10)

Well, this library mislabaled a lot of rows. I need to either read the library documentation or treat empty cell in 'prepped' as ukrainian video and again preprocess it but this time using ukrainian preprocessing func

In [22]:
# df.drop(["prepped"], axis=1).to_csv("languages_detected.csv", index=False)

I believe language detection is a dead end. I should try first preprocessing English text. If it returns empty string, I should preprocess the text with ukrainian preprocessing func. If it fails again, I should just drop the row

In [23]:
df.head()

Unnamed: 0,video_title,channel_name,time,app,category,prepped
1,Жбурляю,Харцизи,2023-04-04 14:59:28.805000+03:00,YouTube Music,Music,
2,Ліхтар,Rohata Zhaba,2023-04-04 14:25:11.177000+03:00,YouTube,Not Music,
3,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,2023-04-04 14:12:17.193000+03:00,YouTube,Not Music,drinkers chasers another rey skywalker movie
4,Test Your English Vocabulary: SHAPES & PATTERNS,Learn English with Gill · engVid,2023-04-04 14:06:00.179000+03:00,YouTube,Not Music,test english vocabulary shapes patterns
5,Finland joins NATO in historic shift prompted ...,FRANCE 24 English,2023-04-04 14:02:41.452000+03:00,YouTube,Not Music,finland joins nato historic shift prompted ukr...


In [24]:
len(df)

4395

In [25]:
count = sum(df['prepped'].str.len() < 4)
print(count)

991


We can see that around quarter of all rows are probably ukrainian videos

In [26]:
import nltk
# nltk.download('stopwords')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('punkt')
# nltk.download('tagsets')
# nltk.download('wordnet')
# nltk.download('omw')
# nltk.download('words')
nltk.download('ukrainian')

[nltk_data] Error loading ukrainian: Package 'ukrainian' not found in
[nltk_data]     index


False

In [27]:
from nltk.corpus import stopwords
print(stopwords.fileids())

['arabic', 'azerbaijani', 'basque', 'bengali', 'catalan', 'chinese', 'danish', 'dutch', 'english', 'finnish', 'french', 'german', 'greek', 'hebrew', 'hinglish', 'hungarian', 'indonesian', 'italian', 'kazakh', 'nepali', 'norwegian', 'portuguese', 'romanian', 'russian', 'slovene', 'spanish', 'swedish', 'tajik', 'turkish']


In [28]:
def preprocess_ukr(text):
    sentences = re.split("[\n\t]", text)
    # remove empty lines
    sentences = [sentence for sentence in sentences if sentence]
    # further cleaning
    sentences = [re.sub(r"[^0-9а-яА-Я\s]", "", sentence, re.I|re.A).lower() for sentence in sentences]
    sentences = [sentence.lower().strip() for sentence in sentences]
    wpt = nltk.WordPunctTokenizer()
    # stop_words = nltk.corpus.stopwords.words("ukrainian")
    output = []
    for sentence in sentences:
        tokens = wpt.tokenize(sentence)
        filtered_tokens = [token for token in tokens]
        output.append(" ".join(filtered_tokens))
    return " ".join(output) if len(output) >= 4 else ""

# filter DataFrame to only include rows where 'prepped' has length < 4
empty_rows = df[df['prepped'].str.len() < 4]

# apply ukrainian preprocessing to the 'video_title' column for these rows
empty_rows['prepped_ukr'] = empty_rows['video_title'].apply(preprocess_ukr)

# update the original DataFrame with the preprocessed values for the empty rows
df.update(empty_rows)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_rows['prepped_ukr'] = empty_rows['video_title'].apply(preprocess_ukr)


In [29]:
empty_rows

Unnamed: 0,video_title,channel_name,time,app,category,prepped,prepped_ukr
1,Жбурляю,Харцизи,2023-04-04 14:59:28.805000+03:00,YouTube Music,Music,,
2,Ліхтар,Rohata Zhaba,2023-04-04 14:25:11.177000+03:00,YouTube,Not Music,,
9,Мавка,Authentix,2023-04-04 12:57:54.594000+03:00,YouTube,Not Music,,
10,Харцизи - Забуті боги,Харцизи,2023-04-04 12:50:32.692000+03:00,YouTube,Music,,
13,Злива,Харцизи,2023-04-04 12:44:35.400000+03:00,YouTube,Music,,
...,...,...,...,...,...,...,...
4257,"Тихо прийшов, тихо пішов або пісня спеціальног...",Riffmaster,2023-01-03 17:29:21.578000+02:00,YouTube Music,Music,,
4310,Сухпай Збройних сил Республіки Корея 한국군 배급,ХЛОПЦІ З ЛІСУ,2023-01-02 21:39:10.283000+02:00,YouTube,Not Music,,
4335,"D.M.G. - Ненавижу , блять , цыган!!!",Сметанин Василий,2023-01-02 13:01:31.968000+02:00,YouTube,Not Music,dmg,
4346,Як військові відреагували на звернення Зеленсь...,Ukrainian Witness,2023-01-02 08:53:54.148000+02:00,YouTube,Not Music,,


Come to think of it, let's keep it simple and select only english rows for now

In [30]:
prepped_df = df[df['prepped'].str.len() >= 4]
prepped_df

Unnamed: 0,video_title,channel_name,time,app,category,prepped
3,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,2023-04-04 14:12:17.193000+03:00,YouTube,Not Music,drinkers chasers another rey skywalker movie
4,Test Your English Vocabulary: SHAPES & PATTERNS,Learn English with Gill · engVid,2023-04-04 14:06:00.179000+03:00,YouTube,Not Music,test english vocabulary shapes patterns
5,Finland joins NATO in historic shift prompted ...,FRANCE 24 English,2023-04-04 14:02:41.452000+03:00,YouTube,Not Music,finland joins nato historic shift prompted ukr...
6,Finland joins NATO in the alliance's fastest-e...,euronews,2023-04-04 14:01:33.369000+03:00,YouTube,Not Music,finland joins nato alliances fastestever acces...
7,Finland's Election Results Explained: How Sann...,TLDR News EU,2023-04-04 14:01:00.878000+03:00,YouTube,Not Music,finlands election results explained sanna mari...
...,...,...,...,...,...,...
4390,Pathfinder: Kingmaker Review,MandaloreGaming,2023-01-01 10:15:11.845000+02:00,YouTube,Not Music,pathfinder kingmaker review
4391,Assassin's Creed Odyssey - Before You Buy,gameranx,2023-01-01 10:02:05.355000+02:00,YouTube,Not Music,assassins creed odyssey buy
4392,#Ukraine's spy chief tells ABC News there will...,ABC News,2023-01-01 09:44:58.841000+02:00,YouTube,Not Music,ukraines spy chief tells abc news likely attac...
4393,Russian missile zooms over Kyiv before being s...,The Sun,2023-01-01 09:43:07.927000+02:00,YouTube,Not Music,russian missile zooms kyiv shot time


# CLASSIFICATION WITH ALREADY PREPARED SAMPLE

In [149]:
df = pd.read_csv('sample_1000.csv', index_col=0)
df.tail(3)

Unnamed: 0,video_title,channel_name,app,category,prepped,time
997,Would I Like to Visit Ukraine? - April Q&A,Jake Broe,YouTube,Politics,would like visit ukraine april qa,2023-04-04 10:47:03.150000+03:00
998,Deep Rock Galactic - 5th Anniversary Space Rig...,Thai,YouTube,Music,deep rock galactic 5th anniversary space rig m...,2023-04-04 12:47:36.411000+03:00
999,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,YouTube,Rec,drinkers chasers another rey skywalker movie,2023-04-04 14:12:17.193000+03:00


In [141]:
df["category"].value_counts()

Music       497
Rec         195
Politics    158
Studies     120
Sport        30
Name: category, dtype: int64

In [148]:
# df['target_label'] = pd.factorize(df['category'])[0]
df["target_label"] = df["category"].map({
    "Music": 0,
    "Rec": 1,
    "Politics": 2,
    "Studies": 3,
    "Sport": 4
})
df.tail(3)

Unnamed: 0,video_title,channel_name,app,category,prepped,time,target_label
997,Would I Like to Visit Ukraine? - April Q&A,Jake Broe,YouTube,Politics,would like visit ukraine april qa,2023-04-04 10:47:03.150000+03:00,2
998,Deep Rock Galactic - 5th Anniversary Space Rig...,Thai,YouTube,Music,deep rock galactic 5th anniversary space rig m...,2023-04-04 12:47:36.411000+03:00,0
999,Drinker's Chasers - ANOTHER Rey Skywalker Movie?!,Critical Drinker After Hours,YouTube,Rec,drinkers chasers another rey skywalker movie,2023-04-04 14:12:17.193000+03:00,1


In [143]:
df["target_label"].value_counts()

0    497
1    195
2    158
3    120
4     30
Name: target_label, dtype: int64

At last, time to try classification

In [144]:
from sklearn.model_selection import train_test_split

train_corpus, test_corpus, train_label_nums, test_label_nums, train_label_names, test_label_names = train_test_split(
    np.array(df['prepped']), np.array(df['target_label']), np.array(df['category']), test_size=0.2, random_state=0)
train_corpus.shape, test_corpus.shape

((800,), (200,))

In [145]:
from sklearn.model_selection import cross_val_score

from sklearn.feature_extraction.text import TfidfVectorizer
tv = TfidfVectorizer(min_df=0., max_df=1., norm='l2', use_idf=True, smooth_idf=True)
tv_train_features = tv.fit_transform(train_corpus)
tv_test_features = tv.transform(test_corpus)

In [146]:
from sklearn.svm import LinearSVC
svm = LinearSVC(penalty='l2', C=1, random_state=0)
svm.fit(tv_train_features, train_label_names)
svm_bow_tv_scores = cross_val_score(svm, tv_train_features, train_label_names, cv=5)
svm_bow_tv_mean_score = np.mean(svm_bow_tv_scores)

print('CV Accuracy (5-fold):', svm_bow_tv_scores)
print('Mean CV Accuracy:', svm_bow_tv_mean_score)
svm_bow_test_score = svm.score(tv_test_features, test_label_names)
print('Test Accuracy:', svm_bow_test_score)

CV Accuracy (5-fold): [0.84375 0.81875 0.80625 0.7875  0.79375]
Mean CV Accuracy: 0.8099999999999999
Test Accuracy: 0.795


In [147]:
from sklearn.metrics import classification_report
# predict labels for the test set
svm_predictions = svm.predict(tv_test_features)
# get the unique classes
unique_classes = list(set(test_label_names))
# print the classification report
print(classification_report(test_label_names, svm_predictions, labels=unique_classes))

              precision    recall  f1-score   support

     Studies       0.69      0.48      0.56        23
    Politics       0.80      0.67      0.73        48
       Sport       1.00      0.67      0.80         6
         Rec       0.80      0.88      0.84        42
       Music       0.80      0.93      0.86        81

    accuracy                           0.80       200
   macro avg       0.82      0.72      0.76       200
weighted avg       0.79      0.80      0.79       200



In [52]:
from sklearn.metrics import classification_report
# predict labels for the test set
svm_predictions = svm.predict(tv_test_features)
# get the unique classes
unique_classes = list(set(test_label_names))
# print the classification report
print(classification_report(test_label_names, svm_predictions, labels=unique_classes))

              precision    recall  f1-score   support

     Studies       0.82      0.90      0.86        20
    Politics       0.88      0.67      0.76        21
       Sport       1.00      1.00      1.00         4
         Rec       0.89      0.83      0.86        29
       Music       0.93      1.00      0.96        66

    accuracy                           0.90       140
   macro avg       0.90      0.88      0.89       140
weighted avg       0.90      0.90      0.90       140



Now let's see what labels were assigned in df format

In [53]:
# Assuming you have already trained your model and obtained the predicted labels
predicted_labels = svm.predict(tv_test_features)

# Create a new dataframe that contains the test data and the predicted labels
test_df = pd.DataFrame({'video_title': test_corpus, 'category': test_label_names, 'actual_label': test_label_nums, 'predicted_label': predicted_labels})

# Print the first 10 rows of the new dataframe
test_df.head(10)

Unnamed: 0,video_title,category,actual_label,predicted_label
0,happens arthur sawedoff shotgun instead revolver,Rec,1,Music
1,hero ages brandon sanderson stick landing part,Rec,1,Rec
2,let go frozensoundtrack version,Music,0,Music
3,kids song,Music,0,Music
4,one final effort,Music,0,Music
5,bad boys theme cops,Music,0,Music
6,sector,Music,0,Music
7,day 4 best full body yoga stretch 30 days yoga,Sport,4,Sport
8,containers vs vms whats difference,Studies,3,Studies
9,sonne,Music,0,Music


Select 100 rows from the big df of like 3-4k rows to test how the model performs in production or whatever it's called

In [59]:
# new rows were gotten with df.sample(n=600, random_state=42)
# random_state=42 because I want to get the same sample
# get the features for the new videos using the trained vectorizer
new_features = tv.transform(new_rows["prepped"])
# predict the category of the new videos using the trained LinearSVC
predicted_labels = svm.predict(new_features)
# add the predicted labels to the new_videos dataframe
new_rows['category'] = predicted_labels
new_rows

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_rows['category'] = predicted_labels


Unnamed: 0,video_title,channel_name,app,category,prepped
600,Sector,Daniel Deluxe,YouTube Music,Music,sector
601,"Let's talk about the tech layoffs, objectively.",Karolina Sowinska,YouTube,Politics,lets talk tech layoffs objectively
602,Defence strategy for small nations - force des...,Perun,YouTube,Politics,defence strategy small nations force design fr...
603,Berserkir,Danheim,YouTube Music,Music,berserkir
604,SadSvit - Силуети (feat. СТРУКТУРА ЩАСТЯ) Lyri...,SadSvit,YouTube Music,Music,sadsvit feat lyric video
...,...,...,...,...,...
695,Berserkir,Danheim,YouTube Music,Music,berserkir
696,Let's talk about Abrams approval and timelines...,Beau of the Fifth Column,YouTube,Politics,lets talk abrams approval timelines
697,Covert Operations,Adam Schneider,YouTube Music,Music,covert operations
698,What is NLP (Natural Language Processing)?,IBM Technology,YouTube,Studies,nlp natural language processing


In [60]:
# new_rows.to_csv("tried_600-700.csv")