In [64]:
import pandas as pd
import numpy as np
import joblib
from nltk.corpus import stopwords
import re

In [94]:
df = pd.read_json('subjects.json')

In [95]:
df['keywords'] = df['keywords'].apply(lambda x: x.lower())

In [96]:
df

Unnamed: 0,subject,keywords
0,pf,c language
1,chem1,chemistry hard water analysis alkalinity phase...
2,chem2,chemistry electrochemistry electrode battery f...
3,chem3,chemistry dry corrosion wet
4,chem4,chemistry fuel calorimeter petroleum combustion
...,...,...
95,dett2,differential equation equations transform tech...
96,dett3,differential equation equations transform tech...
97,dett4,differential equation equations transform tech...
98,dett5,differential equation equations transform tech...


In [83]:
def remove_stop_words(text):
    new_text = [word for word in text.split() if word not in stopwords.words('english')]
    s = set(new_text)
    text = " ".join(s)
    return text

In [97]:
df['keywords'] = df['keywords'].apply(lambda x: remove_stop_words(x))

In [98]:
new_corpus = df['keywords'].astype(str).tolist()

In [99]:
new_corpus

['language c',
 'water chemistry phase alkalinity analysis rule hard',
 'cell chemistry battery fuel electrochemistry electrode',
 'chemistry wet dry corrosion',
 'petroleum chemistry calorimeter fuel combustion',
 'polymer polymerization chemistry thermosetting addition thermoplastic monomer',
 'woodward-fieser chemistry absorption uv spectroscopy',
 'topology network modulation architecture communicatoin networks computer transmission osi tcp/ip networking analog digital',
 'aloha hamming code network link window networks computer layer sliding csma data networking crc checksum',
 'routing classless network ipv6 networks computer layer subnet ipv4 classful networking addressing',
 'service socket quality network silly congestion window networks computer tcp/ip flow rtp networking udp multiplex control',
 'http snmp smtp dns network telnet networks computer pop3 imap ftp networking dhcp',
 'er-diagram tables relational database management',
 'normalization dependency selection query i

In [100]:
def make_corpus():
    corpus = ""
    for each in df['keywords']:
        corpus = corpus + " " + each.lower()
    return corpus.strip()

In [101]:
main_corpus = make_corpus()

In [102]:
main_corpus = set(main_corpus.split())

## TF-IDF Vectorization

In [107]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [108]:
tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 3))

In [109]:
vectors = tfidf.fit_transform(df['keywords']).toarray()

In [110]:
vectors.shape

(100, 3836)

In [111]:
new_vec = tfidf.transform(['Understanding electrode potential'])

In [112]:
joblib.dump(tfidf, 'tfidf.joblib')

['tfidf.joblib']

## Multinomial Naive Bayes

In [123]:
from sklearn.naive_bayes import MultinomialNB

In [124]:
nb = MultinomialNB()

In [125]:
nb.fit(vectors, df['subject'])

In [126]:
nb.predict(new_vec)

array(['chem2'], dtype='<U6')

In [127]:
nb.predict(tfidf.transform(['What is Z Transform?'.lower()]))

array(['dett5'], dtype='<U6')

In [128]:
nb.predict(tfidf.transform(['z transform learn about z']))

array(['dett5'], dtype='<U6')

In [129]:
joblib.dump(nb, 'mnb_model.joblib')

['mnb_model.joblib']

# Trying Out YouTube

In [2]:
yt = pd.read_json('youtube.video_info.json')

In [4]:
yt = yt.drop('_id', axis=1)

In [5]:
yt

Unnamed: 0,videoId,title,description,views,likes,tag,url,channel_title,duration,publisedAt,Dislikes
0,mBoX_JCKZTE,Scrapy Course – Python Web Scraping for Beginners,The Scrapy Beginners Course will teach you eve...,436714,9886,,https://www.youtube.com/watch?v=mBoX_JCKZTE,freeCodeCamp.org,PT4H37M9S,2023-04-27T14:38:08Z,0
1,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,# DISCORD (NEW): https://discord.gg/C4J2uckpbR...,267940,5091,"[scrapy python tutorial, scrapy crawlspider, s...",https://www.youtube.com/watch?v=s4jtkzHhLzY,John Watson Rooney,PT23M22S,2020-12-09T19:00:09Z,0
2,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,Try Atlas: https://www.mongodb.com/cloud/atlas...,46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,CodeWithHarry,PT50M3S,2023-06-21T11:30:04Z,0
3,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,Today we learn how to build a professional web...,111544,2287,"[python, web crawler, web crawling, python web...",https://www.youtube.com/watch?v=m_3gjHGxIJc,NeuralNine,PT34M31S,2022-11-23T13:30:12Z,0
4,41opDqo1im8,Python Scrapy Tutorial for Beginners,This Python Scrapy tutorial for beginners that...,102,5,"[python scrapy tutorial for beginners, python ...",https://www.youtube.com/watch?v=41opDqo1im8,jGeek,PT1H59M,2023-03-22T06:07:19Z,0
...,...,...,...,...,...,...,...,...,...,...,...
595,fmKNmoZxiVU,Standard Template Library (STL) in c++ | Intr...,Standard Template Library (STL) mainly compose...,52898,644,"[stl in c++, stl in c++ tutorial, stl in c++ t...",https://www.youtube.com/watch?v=fmKNmoZxiVU,Education 4u,PT7M4S,2018-01-23T16:25:49Z,0
596,c9iREsYpayk,The C++ Standard Template Library (STL) | C++ ...,The Standard Template Library (STL) is a set o...,252714,9881,,https://www.youtube.com/watch?v=c9iREsYpayk,CodeWithHarry,PT14M49S,2020-09-12T07:39:51Z,0
597,-Svq5IYPWbc,STL Templates in C++ - Generic Functions and C...,"In this video, we will learn about STL templat...",16903,643,"[c++ stl templates, what are c++ templates, c+...",https://www.youtube.com/watch?v=-Svq5IYPWbc,CodeBeauty,PT19M7S,2024-02-27T15:02:12Z,0
598,kKJeekDKU30,C++ Templates: Must for Competitive Programmin...,"Download the best IDE for C, C# and C++: https...",364110,11354,,https://www.youtube.com/watch?v=kKJeekDKU30,CodeWithHarry,PT13M16S,2020-09-07T14:36:06Z,0


In [6]:
comments = pd.read_json('youtube.comments.json')

In [10]:
comments.rename(columns={'_id': 'videoId'}, inplace=True)

In [11]:
comments

Unnamed: 0,videoId,comments
0,s4jtkzHhLzY,[{'commentText': 'I struggle to understand all...
1,GogxAQ2JP4A,[{'commentText': 'bhai mera to connect hi nhi ...
2,m_3gjHGxIJc,[{'commentText': 'Limited Offer with Coupon Co...
3,41opDqo1im8,[{'commentText': 'I really wish you could spea...
4,irqbmMNs2Bo,[{'commentText': 'Thank you for made this vide...
...,...,...
849,XRcC7bAtL3c,"[{'commentText': 'Thank you sir', 'publishedAt..."
850,WLvU5EQVZqY,[{'commentText': '🟣 JOIN our 𝐋𝐈𝐕𝐄 𝐢𝐧𝐭𝐞𝐫𝐯𝐢𝐞𝐰 𝐭𝐫...
851,-b2lciNd2L4,[{'commentText': '- The mnemonic of Root's pos...
852,g_S5WuasWUE,[{'commentText': 'I understood what is happeni...


In [13]:
temp = pd.merge(yt, comments, on='videoId', how='left')

In [29]:
temp = temp.dropna(subset=['comments']).reset_index().drop(['index', 'level_0'], axis=1)

In [30]:
temp

Unnamed: 0,videoId,title,description,views,likes,tag,url,channel_title,duration,publisedAt,Dislikes,comments
0,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,# DISCORD (NEW): https://discord.gg/C4J2uckpbR...,267940,5091,"[scrapy python tutorial, scrapy crawlspider, s...",https://www.youtube.com/watch?v=s4jtkzHhLzY,John Watson Rooney,PT23M22S,2020-12-09T19:00:09Z,0,[{'commentText': 'I struggle to understand all...
1,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,Try Atlas: https://www.mongodb.com/cloud/atlas...,46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,CodeWithHarry,PT50M3S,2023-06-21T11:30:04Z,0,[{'commentText': 'bhai mera to connect hi nhi ...
2,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,Today we learn how to build a professional web...,111544,2287,"[python, web crawler, web crawling, python web...",https://www.youtube.com/watch?v=m_3gjHGxIJc,NeuralNine,PT34M31S,2022-11-23T13:30:12Z,0,[{'commentText': 'Limited Offer with Coupon Co...
3,41opDqo1im8,Python Scrapy Tutorial for Beginners,This Python Scrapy tutorial for beginners that...,102,5,"[python scrapy tutorial for beginners, python ...",https://www.youtube.com/watch?v=41opDqo1im8,jGeek,PT1H59M,2023-03-22T06:07:19Z,0,[{'commentText': 'I really wish you could spea...
4,irqbmMNs2Bo,C Language Tutorial for Beginners (with Notes ...,You can join the NEW Web Development batch us...,32553624,499466,"[C++, C++ coding, C++ full course, C++ placeme...",https://www.youtube.com/watch?v=irqbmMNs2Bo,Apna College,PT10H32M7S,2022-03-06T18:16:41Z,0,[{'commentText': 'Thank you for made this vide...
...,...,...,...,...,...,...,...,...,...,...,...,...
580,fmKNmoZxiVU,Standard Template Library (STL) in c++ | Intr...,Standard Template Library (STL) mainly compose...,52898,644,"[stl in c++, stl in c++ tutorial, stl in c++ t...",https://www.youtube.com/watch?v=fmKNmoZxiVU,Education 4u,PT7M4S,2018-01-23T16:25:49Z,0,"[{'commentText': 'Hindi me explain kar do', 'p..."
581,c9iREsYpayk,The C++ Standard Template Library (STL) | C++ ...,The Standard Template Library (STL) is a set o...,252714,9881,,https://www.youtube.com/watch?v=c9iREsYpayk,CodeWithHarry,PT14M49S,2020-09-12T07:39:51Z,0,[{'commentText': 'White ink on black board VS ...
582,-Svq5IYPWbc,STL Templates in C++ - Generic Functions and C...,"In this video, we will learn about STL templat...",16903,643,"[c++ stl templates, what are c++ templates, c+...",https://www.youtube.com/watch?v=-Svq5IYPWbc,CodeBeauty,PT19M7S,2024-02-27T15:02:12Z,0,[{'commentText': '🚀📈💻🔥 My Practical Programmin...
583,kKJeekDKU30,C++ Templates: Must for Competitive Programmin...,"Download the best IDE for C, C# and C++: https...",364110,11354,,https://www.youtube.com/watch?v=kKJeekDKU30,CodeWithHarry,PT13M16S,2020-09-07T14:36:06Z,0,[{'commentText': 'Competitive programming kon ...


In [48]:
def extract_comments(comments: list):
    comment_list = []
    for each in comments:
        comment_list.append(each['commentText'])

    return comment_list

In [49]:
extract_comments(temp['comments'][0])

['I struggle to understand all commands in Python, however John has opened the door to me with his videos on scraping, Thank you John',
 'in the yield command since we are accessing each product through for loop can we not just use getall or get for the task since it is anyway one product?\nThanks, for the video . Really helped me',
 'Thanks for all the videos, would you be able to do an update video/series for Scrapy?',
 'can you do this using Xpath',
 'This is too difficult... my python terminal doesnt even recognize the first few commands. Once you get stuck as a beginner youre pretty much screwed if you dont have someone to help you.',
 'Excellent tutorial.',
 "I'm getting a 403 error after pasting the url",
 'Thanks, this was really useful',
 "Sir, I need your help, how can I contact you, it's related a project, I am trying for so many days tomorrow is last day 🙂",
 'can scrapy work on websites which generate pages using scripts.',
 "Hi John, thanks for share your knowledge! I wan

In [51]:
temp['comments'] = temp['comments'].apply(extract_comments)

In [52]:
temp

Unnamed: 0,videoId,title,description,views,likes,tag,url,channel_title,duration,publisedAt,Dislikes,comments
0,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,# DISCORD (NEW): https://discord.gg/C4J2uckpbR...,267940,5091,"[scrapy python tutorial, scrapy crawlspider, s...",https://www.youtube.com/watch?v=s4jtkzHhLzY,John Watson Rooney,PT23M22S,2020-12-09T19:00:09Z,0,[I struggle to understand all commands in Pyth...
1,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,Try Atlas: https://www.mongodb.com/cloud/atlas...,46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,CodeWithHarry,PT50M3S,2023-06-21T11:30:04Z,0,[bhai mera to connect hi nhi ho rha cluster ke...
2,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,Today we learn how to build a professional web...,111544,2287,"[python, web crawler, web crawling, python web...",https://www.youtube.com/watch?v=m_3gjHGxIJc,NeuralNine,PT34M31S,2022-11-23T13:30:12Z,0,[Limited Offer with Coupon Code: NEURALNINE\n5...
3,41opDqo1im8,Python Scrapy Tutorial for Beginners,This Python Scrapy tutorial for beginners that...,102,5,"[python scrapy tutorial for beginners, python ...",https://www.youtube.com/watch?v=41opDqo1im8,jGeek,PT1H59M,2023-03-22T06:07:19Z,0,[I really wish you could speak better English.]
4,irqbmMNs2Bo,C Language Tutorial for Beginners (with Notes ...,You can join the NEW Web Development batch us...,32553624,499466,"[C++, C++ coding, C++ full course, C++ placeme...",https://www.youtube.com/watch?v=irqbmMNs2Bo,Apna College,PT10H32M7S,2022-03-06T18:16:41Z,0,"[Thank you for made this video ✨, Maa college ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
580,fmKNmoZxiVU,Standard Template Library (STL) in c++ | Intr...,Standard Template Library (STL) mainly compose...,52898,644,"[stl in c++, stl in c++ tutorial, stl in c++ t...",https://www.youtube.com/watch?v=fmKNmoZxiVU,Education 4u,PT7M4S,2018-01-23T16:25:49Z,0,"[Hindi me explain kar do, THRGPRB19KN95414, Te..."
581,c9iREsYpayk,The C++ Standard Template Library (STL) | C++ ...,The Standard Template Library (STL) is a set o...,252714,9881,,https://www.youtube.com/watch?v=c9iREsYpayk,CodeWithHarry,PT14M49S,2020-09-12T07:39:51Z,0,[White ink on black board VS Black ink on whit...
582,-Svq5IYPWbc,STL Templates in C++ - Generic Functions and C...,"In this video, we will learn about STL templat...",16903,643,"[c++ stl templates, what are c++ templates, c+...",https://www.youtube.com/watch?v=-Svq5IYPWbc,CodeBeauty,PT19M7S,2024-02-27T15:02:12Z,0,[🚀📈💻🔥 My Practical Programming Course: https:/...
583,kKJeekDKU30,C++ Templates: Must for Competitive Programmin...,"Download the best IDE for C, C# and C++: https...",364110,11354,,https://www.youtube.com/watch?v=kKJeekDKU30,CodeWithHarry,PT13M16S,2020-09-07T14:36:06Z,0,[Competitive programming kon kon karta hai? (y...


In [53]:
temp = temp.drop(['Dislikes', 'channel_title'], axis=1)

In [55]:
temp.head()

Unnamed: 0,videoId,title,description,views,likes,tag,url,duration,publisedAt,comments
0,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,# DISCORD (NEW): https://discord.gg/C4J2uckpbR...,267940,5091,"[scrapy python tutorial, scrapy crawlspider, s...",https://www.youtube.com/watch?v=s4jtkzHhLzY,PT23M22S,2020-12-09T19:00:09Z,[I struggle to understand all commands in Pyth...
1,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,Try Atlas: https://www.mongodb.com/cloud/atlas...,46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,PT50M3S,2023-06-21T11:30:04Z,[bhai mera to connect hi nhi ho rha cluster ke...
2,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,Today we learn how to build a professional web...,111544,2287,"[python, web crawler, web crawling, python web...",https://www.youtube.com/watch?v=m_3gjHGxIJc,PT34M31S,2022-11-23T13:30:12Z,[Limited Offer with Coupon Code: NEURALNINE\n5...
3,41opDqo1im8,Python Scrapy Tutorial for Beginners,This Python Scrapy tutorial for beginners that...,102,5,"[python scrapy tutorial for beginners, python ...",https://www.youtube.com/watch?v=41opDqo1im8,PT1H59M,2023-03-22T06:07:19Z,[I really wish you could speak better English.]
4,irqbmMNs2Bo,C Language Tutorial for Beginners (with Notes ...,You can join the NEW Web Development batch us...,32553624,499466,"[C++, C++ coding, C++ full course, C++ placeme...",https://www.youtube.com/watch?v=irqbmMNs2Bo,PT10H32M7S,2022-03-06T18:16:41Z,"[Thank you for made this video ✨, Maa college ..."


In [59]:
temp.isna().sum()

videoId        0
title          0
description    0
views          0
likes          0
tag            0
url            0
duration       0
publisedAt     0
comments       0
dtype: int64

In [58]:
temp['tag'] = temp['tag'].fillna("")

In [60]:
temp['tag'] = temp['tag'].apply(lambda x: " ".join(x))

In [63]:
temp.head()

Unnamed: 0,videoId,title,description,views,likes,tag,url,duration,publisedAt,comments
0,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,# DISCORD (NEW): https://discord.gg/C4J2uckpbR...,267940,5091,scrapy python tutorial scrapy crawlspider scra...,https://www.youtube.com/watch?v=s4jtkzHhLzY,PT23M22S,2020-12-09T19:00:09Z,[I struggle to understand all commands in Pyth...
1,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,Try Atlas: https://www.mongodb.com/cloud/atlas...,46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,PT50M3S,2023-06-21T11:30:04Z,[bhai mera to connect hi nhi ho rha cluster ke...
2,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,Today we learn how to build a professional web...,111544,2287,python web crawler web crawling python web cra...,https://www.youtube.com/watch?v=m_3gjHGxIJc,PT34M31S,2022-11-23T13:30:12Z,[Limited Offer with Coupon Code: NEURALNINE\n5...
3,41opDqo1im8,Python Scrapy Tutorial for Beginners,This Python Scrapy tutorial for beginners that...,102,5,python scrapy tutorial for beginners python sc...,https://www.youtube.com/watch?v=41opDqo1im8,PT1H59M,2023-03-22T06:07:19Z,[I really wish you could speak better English.]
4,irqbmMNs2Bo,C Language Tutorial for Beginners (with Notes ...,You can join the NEW Web Development batch us...,32553624,499466,C++ C++ coding C++ full course C++ placement c...,https://www.youtube.com/watch?v=irqbmMNs2Bo,PT10H32M7S,2022-03-06T18:16:41Z,"[Thank you for made this video ✨, Maa college ..."


In [70]:
from nltk.corpus import stopwords

In [71]:
import string

In [72]:
def remove_punc(text):
    return text.translate(str.maketrans("", "", string.punctuation))

In [73]:
def remove_url(text):
    pattern = re.compile(r'https?://\S+|www\.\S+')
    return pattern.sub(r'', text)

In [74]:
def remove_newline(text):
    pattern = re.compile(r'\n')
    return pattern.sub(r'', text)

In [75]:
def remove_emojis(text):
    pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"  # dingbats
                           u"\U000024C2-\U0001F251"  # enclosed characters
                           "]+", flags=re.UNICODE)
    return pattern.sub(r'', text)

In [76]:
def remove_timestamps(text):
    pattern = re.compile(r'\(.*?\)')
    return pattern.sub(r'', text)

In [77]:
def remove_non_alpha(text):
    pattern = re.compile(r'[^\x00-\x7F]+')
    return pattern.sub(r'', text)

In [78]:
from nltk.tokenize import sent_tokenize

In [79]:
from rake_nltk import Rake

In [80]:
rake = Rake()

In [84]:
def process_desc(desc):
    text = remove_non_alpha(remove_timestamps(remove_emojis(remove_newline(remove_url(desc)))))
    tokens = sent_tokenize(text)
    top_3 = tokens[:3]
    final = []
    for each in top_3:
        sentence = remove_stop_words(each)
        rake.extract_keywords_from_text(sentence)
        keywords = rake.get_ranked_phrases()
        for key in keywords:
            rake.extract_keywords_from_text(key)
            new_keywords = rake.get_ranked_phrases()
            final.append(new_keywords[0])
    return " ".join(final[:3])

In [86]:
temp['description'] = temp['description'].apply(process_desc)

In [87]:
temp['combo'] = temp['title'] + ' ' + temp['description'] + ' ' + temp['tag']

In [90]:
temp['combo'] = temp['combo'].apply(lambda x: x.lower())

In [91]:
temp.head()

Unnamed: 0,videoId,title,description,views,likes,tag,url,duration,publisedAt,comments,combo
0,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,discord beginners scrapy new python aimed tuto...,267940,5091,scrapy python tutorial scrapy crawlspider scra...,https://www.youtube.com/watch?v=s4jtkzHhLzY,PT23M22S,2020-12-09T19:00:09Z,[I struggle to understand all commands in Pyth...,scrapy for beginners - a complete how to examp...
1,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,"1 javascript student course c ++, min ]: css 4...",46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,PT50M3S,2023-06-21T11:30:04Z,[bhai mera to connect hi nhi ho rha cluster ke...,web scraping using scrapy | scrapy tutorial + ...
2,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,algorithm outro books proxy learn intro websit...,111544,2287,python web crawler web crawling python web cra...,https://www.youtube.com/watch?v=m_3gjHGxIJc,PT34M31S,2022-11-23T13:30:12Z,[Limited Offer with Coupon Code: NEURALNINE\n5...,coding web crawler in python with scrapy algor...
3,41opDqo1im8,Python Scrapy Tutorial for Beginners,beginners try introduction also newbies toward...,102,5,python scrapy tutorial for beginners python sc...,https://www.youtube.com/watch?v=41opDqo1im8,PT1H59M,2023-03-22T06:07:19Z,[I really wish you could speak better English.],python scrapy tutorial for beginners beginners...
4,irqbmMNs2Bo,C Language Tutorial for Beginners (with Notes ...,00java live day -- complete class 5000 life in...,32553624,499466,C++ C++ coding C++ full course C++ placement c...,https://www.youtube.com/watch?v=irqbmMNs2Bo,PT10H32M7S,2022-03-06T18:16:41Z,"[Thank you for made this video ✨, Maa college ...",c language tutorial for beginners (with notes ...


In [92]:
temp['combo'].isna().sum()

0

In [93]:
temp['comments'].isna().sum()

0

In [103]:
def keep_words(text):
    return " ".join(main_corpus.intersection(set(text.split())))

In [105]:
temp['combo'] = temp['combo'].apply(keep_words)

In [106]:
temp.head()

Unnamed: 0,videoId,title,description,views,likes,tag,url,duration,publisedAt,comments,combo
0,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,discord beginners scrapy new python aimed tuto...,267940,5091,scrapy python tutorial scrapy crawlspider scra...,https://www.youtube.com/watch?v=s4jtkzHhLzY,PT23M22S,2020-12-09T19:00:09Z,[I struggle to understand all commands in Pyth...,page scraping python scrapy web multiple shell...
1,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,"1 javascript student course c ++, min ]: css 4...",46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,PT50M3S,2023-06-21T11:30:04Z,[bhai mera to connect hi nhi ho rha cluster ke...,scraping python 1 c web mongodb css min data j...
2,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,algorithm outro books proxy learn intro websit...,111544,2287,python web crawler web crawling python web cra...,https://www.youtube.com/watch?v=m_3gjHGxIJc,PT34M31S,2022-11-23T13:30:12Z,[Limited Offer with Coupon Code: NEURALNINE\n5...,scraping python web algorithm proxy programmin...
3,41opDqo1im8,Python Scrapy Tutorial for Beginners,beginners try introduction also newbies toward...,102,5,python scrapy tutorial for beginners python sc...,https://www.youtube.com/watch?v=41opDqo1im8,PT1H59M,2023-03-22T06:07:19Z,[I really wish you could speak better English.],scraping python web try learning scrapy
4,irqbmMNs2Bo,C Language Tutorial for Beginners (with Notes ...,00java live day -- complete class 5000 life in...,32553624,499466,C++ C++ coding C++ full course C++ placement c...,https://www.youtube.com/watch?v=irqbmMNs2Bo,PT10H32M7S,2022-03-06T18:16:41Z,"[Thank you for made this video ✨, Maa college ...",code c full c++ language types alternate progr...


In [113]:
temp['vectors'] = temp['combo'].apply(lambda x: tfidf.transform([x]).toarray())

In [116]:
temp['vectors'] = temp['vectors'].apply(lambda x: x.reshape(1, -1))

In [117]:
temp.head()

Unnamed: 0,videoId,title,description,views,likes,tag,url,duration,publisedAt,comments,combo,vectors
0,s4jtkzHhLzY,Scrapy for Beginners - A Complete How To Examp...,discord beginners scrapy new python aimed tuto...,267940,5091,scrapy python tutorial scrapy crawlspider scra...,https://www.youtube.com/watch?v=s4jtkzHhLzY,PT23M22S,2020-12-09T19:00:09Z,[I struggle to understand all commands in Pyth...,page scraping python scrapy web multiple shell...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,GogxAQ2JP4A,Web Scraping using Scrapy | Scrapy Tutorial + ...,"1 javascript student course c ++, min ]: css 4...",46067,1012,,https://www.youtube.com/watch?v=GogxAQ2JP4A,PT50M3S,2023-06-21T11:30:04Z,[bhai mera to connect hi nhi ho rha cluster ke...,scraping python 1 c web mongodb css min data j...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,m_3gjHGxIJc,Coding Web Crawler in Python with Scrapy,algorithm outro books proxy learn intro websit...,111544,2287,python web crawler web crawling python web cra...,https://www.youtube.com/watch?v=m_3gjHGxIJc,PT34M31S,2022-11-23T13:30:12Z,[Limited Offer with Coupon Code: NEURALNINE\n5...,scraping python web algorithm proxy programmin...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
3,41opDqo1im8,Python Scrapy Tutorial for Beginners,beginners try introduction also newbies toward...,102,5,python scrapy tutorial for beginners python sc...,https://www.youtube.com/watch?v=41opDqo1im8,PT1H59M,2023-03-22T06:07:19Z,[I really wish you could speak better English.],scraping python web try learning scrapy,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,irqbmMNs2Bo,C Language Tutorial for Beginners (with Notes ...,00java live day -- complete class 5000 life in...,32553624,499466,C++ C++ coding C++ full course C++ placement c...,https://www.youtube.com/watch?v=irqbmMNs2Bo,PT10H32M7S,2022-03-06T18:16:41Z,"[Thank you for made this video ✨, Maa college ...",code c full c++ language types alternate progr...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


In [119]:
temp.to_pickle('pickling.pickle')

In [122]:
temp.to_json('pickling.json')

In [120]:
temp.to_csv('pickling.csv')