# Youtube Video Title Predictor for Dhruv Rathee
A **Machine Learning** `Model` to Predict Title of Youtube Videos of Specifically Dhruv Rathee With Input of `Description` and `No. of Views`.

## Fetch The Data

In [1]:
from fetch_data import fetch
fetch()

## Vectorize The Data

In [2]:
from vectorize_data import vectorize_video_text
vectorize_video_text()

## Load The Data

In [3]:
import pandas as pd
videos = pd.read_csv("vectorized_dhruv_rathee_videos.csv")
videos.head()

Unnamed: 0.1,Unnamed: 0,Title,Description,Views,Likes
0,0,[[0.33333333 0.33333333 0.33333333 0.33333333 ...,[[0.02941176 0.02941176 0.02941176 0.05882353 ...,2793372,145552
1,1,[[0.40824829 0.40824829 0.40824829 0.40824829 ...,[[0.0298275 0.0298275 0.0298275 0.0298275 ...,3314998,167680
2,2,[[0.37796447 0.37796447 0.37796447 0.37796447 ...,[[0.03286204 0.03286204 0.03286204 0.06572408 ...,5896099,328429
3,3,[[0.37796447 0.37796447 0.37796447 0.37796447 ...,[[0.03041495 0.03041495 0.03041495 0.03041495 ...,4141493,198121
4,4,[[0.37796447 0.37796447 0.37796447 0.37796447 ...,[[0.0302337 0.0302337 0.0302337 0.0302337 ...,3296577,131908


## Info of the DataSet

In [4]:
videos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 371 entries, 0 to 370
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Unnamed: 0   371 non-null    int64 
 1   Title        371 non-null    object
 2   Description  371 non-null    object
 3   Views        371 non-null    int64 
 4   Likes        371 non-null    int64 
dtypes: int64(3), object(2)
memory usage: 14.6+ KB


In [5]:
videos.describe()

Unnamed: 0.1,Unnamed: 0,Views,Likes
count,371.0,371.0,371.0
mean,185.0,3335111.0,183855.299191
std,107.242715,2364448.0,113729.54529
min,0.0,54558.0,1424.0
25%,92.5,1612798.0,101329.0
50%,185.0,2826966.0,166028.0
75%,277.5,4581922.0,239584.5
max,370.0,12864340.0,718797.0


## Split Input and Output from the DataSet

In [6]:
# Store the remaining columns in 'X'
X = videos.drop('Title', axis=1)

# Split the 'Title' column into a separate variable 'y'
y = videos['Title']

In [7]:
X.head()

Unnamed: 0.1,Unnamed: 0,Description,Views,Likes
0,0,[[0.02941176 0.02941176 0.02941176 0.05882353 ...,2793372,145552
1,1,[[0.0298275 0.0298275 0.0298275 0.0298275 ...,3314998,167680
2,2,[[0.03286204 0.03286204 0.03286204 0.06572408 ...,5896099,328429
3,3,[[0.03041495 0.03041495 0.03041495 0.03041495 ...,4141493,198121
4,4,[[0.0302337 0.0302337 0.0302337 0.0302337 ...,3296577,131908


In [8]:
y.head()

0    [[0.33333333 0.33333333 0.33333333 0.33333333 ...
1    [[0.40824829 0.40824829 0.40824829 0.40824829 ...
2    [[0.37796447 0.37796447 0.37796447 0.37796447 ...
3    [[0.37796447 0.37796447 0.37796447 0.37796447 ...
4    [[0.37796447 0.37796447 0.37796447 0.37796447 ...
Name: Title, dtype: object

# Split `Train` and `Test` Set

In [9]:
from sklearn.model_selection import train_test_split

# Split Dataset Into Train and Test Set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Take Info of Both `Train` and `Test` Set

In [10]:
X_train.head()

Unnamed: 0.1,Unnamed: 0,Description,Views,Likes
192,192,[[0.06119901 0.0305995 0.0305995 0.0305995 ...,1695648,117018
75,75,[[0.03077287 0.03077287 0.03077287 0.03077287 ...,6060899,325096
84,84,[[0.02786391 0.02786391 0.05572782 0.05572782 ...,2860754,137997
359,359,[[0.01933834 0.01933834 0.01933834 0.01933834 ...,208711,13237
16,16,[[0.03307706 0.06615412 0.03307706 0.03307706 ...,2035775,110762


In [12]:
y_train.head()

192    [[0.33333333 0.66666667 0.33333333 0.33333333 ...
75     [[0.35355339 0.35355339 0.35355339 0.35355339 ...
84     [[0.31622777 0.31622777 0.31622777 0.31622777 ...
359    [[0.30151134 0.30151134 0.30151134 0.30151134 ...
16     [[0.33333333 0.33333333 0.33333333 0.33333333 ...
Name: Title, dtype: object

In [14]:
X_test.head()

Unnamed: 0.1,Unnamed: 0,Description,Views,Likes
327,327,[[0.02448511 0.02448511 0.02448511 0.02448511 ...,1138893,57851
33,33,[[0.03205853 0.09617558 0.03205853 0.03205853 ...,5666776,213287
15,15,[[0.03175003 0.03175003 0.03175003 0.06350006 ...,5914905,239606
314,314,[[0.02423219 0.02423219 0.02423219 0.07269657 ...,816234,50920
57,57,[[0.03196014 0.03196014 0.03196014 0.03196014 ...,7154900,365901


In [15]:
y_test.head()

327    [[0.33333333 0.33333333 0.33333333 0.33333333 ...
33     [[0.33333333 0.33333333 0.33333333 0.33333333 ...
15     [[0.37796447 0.37796447 0.37796447 0.37796447 ...
314    [[0.37796447 0.37796447 0.37796447 0.37796447 ...
57     [[0.33333333 0.33333333 0.33333333 0.33333333 ...
Name: Title, dtype: object