In [1]:
import numpy as np
import pandas as pd
import sklearn
import json
from pprint import pprint
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from collections import Counter
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics

from sklearn.model_selection import KFold 
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import FeatureUnion
from scipy.sparse import coo_matrix, hstack, vstack
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import OneHotEncoder, LabelBinarizer
from sklearn.metrics import f1_score

## Importando DataSet

In [2]:
filename = 'FRvideos.csv'
category_filename = 'FR_category_id.json'

videos = pd.read_csv(filename)
videos = videos.dropna(axis=0)
print("DataSet null inputs: \n"+str(videos.isnull().sum())+"\n")
print(filename+" shape: "+str(videos.shape))
videos.head()

DataSet null inputs: 
video_id                  0
trending_date             0
title                     0
channel_title             0
category_id               0
publish_time              0
tags                      0
views                     0
likes                     0
dislikes                  0
comment_count             0
thumbnail_link            0
comments_disabled         0
ratings_disabled          0
video_error_or_removed    0
description               0
dtype: int64

FRvideos.csv shape: (37812, 16)


Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description
0,Ro6eob0LrCY,17.14.11,Malika LePen : Femme de Gauche - Trailer,Le Raptor Dissident,24,2017-11-13T17:32:55.000Z,"Raptor""|""Dissident""|""Expliquez""|""moi""|""cette""|...",212702,29282,1108,3817,https://i.ytimg.com/vi/Ro6eob0LrCY/default.jpg,False,False,False,Dimanche.\n18h30.\nSoyez présents pour la vidé...
1,Yo84eqYwP98,17.14.11,"LA PIRE PARTIE ft Le Rire Jaune, Pierre Croce,...",Le Labo,24,2017-11-12T15:00:02.000Z,[none],432721,14053,576,1161,https://i.ytimg.com/vi/Yo84eqYwP98/default.jpg,False,False,False,Le jeu de société: https://goo.gl/hhG1Ta\n\nGa...
2,ceqntSXE-10,17.14.11,DESSINS ANIMÉS FRANÇAIS VS RUSSES 2 - Daniil...,Daniil le Russe,23,2017-11-13T17:00:38.000Z,"cartoon""|""pokémon""|""école""|""ours""|""мультфильм",482153,76203,477,9580,https://i.ytimg.com/vi/ceqntSXE-10/default.jpg,False,False,False,Une nouvelle dose de dessins animés français e...
3,WuTFI5qftCE,17.14.11,PAPY GRENIER - METAL GEAR SOLID,Joueur Du Grenier,20,2017-11-12T17:00:02.000Z,"Papy grenier""|""Metal Gear Solid""|""PS1""|""Tirage...",925222,85016,550,4303,https://i.ytimg.com/vi/WuTFI5qftCE/default.jpg,False,False,False,"Nouvel ,épisode de Papy Grenier ! Ce mois-ci o..."
4,ee6OFs8TdEg,17.14.11,QUI SAUTERA LE PLUS HAUT ? (VÉLO SKATE ROLLER ...,Aurelien Fontenoy,17,2017-11-13T16:30:03.000Z,"vélo""|""vtt""|""bmx""|""freestyle""|""bike""|""mtb""|""di...",141695,8091,72,481,https://i.ytimg.com/vi/ee6OFs8TdEg/default.jpg,False,False,False,Sauts à plus de 4 mètres de haut dans un tramp...


# Tratamento do DataSet

## Correção de formato de data-hora para padrão Unix

In [3]:
videos['trending_date'] = pd.to_datetime(videos['trending_date'], format='%y.%d.%m')
videos['publish_time'] = pd.to_datetime(videos['publish_time'], format='%Y-%m-%dT%H:%M:%S.%fZ')

# separates date and time into two columns from 'publish_time' column

videos.insert(4, 'publish_date', videos['publish_time'].dt.date)
videos['publish_time'] = videos['publish_time'].dt.time
videos['publish_date']=pd.to_datetime(videos['publish_date'])

videos.head()

Unnamed: 0,video_id,trending_date,title,channel_title,publish_date,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description
0,Ro6eob0LrCY,2017-11-14,Malika LePen : Femme de Gauche - Trailer,Le Raptor Dissident,2017-11-13,24,17:32:55,"Raptor""|""Dissident""|""Expliquez""|""moi""|""cette""|...",212702,29282,1108,3817,https://i.ytimg.com/vi/Ro6eob0LrCY/default.jpg,False,False,False,Dimanche.\n18h30.\nSoyez présents pour la vidé...
1,Yo84eqYwP98,2017-11-14,"LA PIRE PARTIE ft Le Rire Jaune, Pierre Croce,...",Le Labo,2017-11-12,24,15:00:02,[none],432721,14053,576,1161,https://i.ytimg.com/vi/Yo84eqYwP98/default.jpg,False,False,False,Le jeu de société: https://goo.gl/hhG1Ta\n\nGa...
2,ceqntSXE-10,2017-11-14,DESSINS ANIMÉS FRANÇAIS VS RUSSES 2 - Daniil...,Daniil le Russe,2017-11-13,23,17:00:38,"cartoon""|""pokémon""|""école""|""ours""|""мультфильм",482153,76203,477,9580,https://i.ytimg.com/vi/ceqntSXE-10/default.jpg,False,False,False,Une nouvelle dose de dessins animés français e...
3,WuTFI5qftCE,2017-11-14,PAPY GRENIER - METAL GEAR SOLID,Joueur Du Grenier,2017-11-12,20,17:00:02,"Papy grenier""|""Metal Gear Solid""|""PS1""|""Tirage...",925222,85016,550,4303,https://i.ytimg.com/vi/WuTFI5qftCE/default.jpg,False,False,False,"Nouvel ,épisode de Papy Grenier ! Ce mois-ci o..."
4,ee6OFs8TdEg,2017-11-14,QUI SAUTERA LE PLUS HAUT ? (VÉLO SKATE ROLLER ...,Aurelien Fontenoy,2017-11-13,17,16:30:03,"vélo""|""vtt""|""bmx""|""freestyle""|""bike""|""mtb""|""di...",141695,8091,72,481,https://i.ytimg.com/vi/ee6OFs8TdEg/default.jpg,False,False,False,Sauts à plus de 4 mètres de haut dans un tramp...


## Tratando para pegar somente a ultima entrada para cada video (a entrada mais atualizada)

In [4]:
videos_lastentry = videos.sort_values('trending_date', ascending=False).drop_duplicates(['video_id'])
print(filename+" shape: "+str(videos_lastentry.shape))
videos_lastentry.head()

FRvideos.csv shape: (28112, 17)


Unnamed: 0,video_id,trending_date,title,channel_title,publish_date,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description
40722,NlxE_QQMRzg,2018-06-14,"Նռան հատիկ, Սերիա 192 / Pomegranate seed / Nra...",PanArmenian TV,2018-06-13,1,18:30:00,"Նռան հատիկ|""Սերիա 192""|""Pomegranate seed""|""Nra...",78117,244,74,46,https://i.ytimg.com/vi/NlxE_QQMRzg/default.jpg,False,False,False,Follow Armenia TV on social platforms:Instagra...
40597,d0mgnlt-sgY,2018-06-14,COUPE DU MONDE - MES PRONOS !,Bruce & les Guez !,2018-06-13,20,13:51:45,"bruce grannec|""fifa""|""fifa 18""|""fifa18""|""fut""|...",74063,4445,222,774,https://i.ytimg.com/vi/d0mgnlt-sgY/default.jpg,False,False,False,Twitch - http://www.twitch.tv/BruceGrannecTwit...
40593,HyqTJpG_JbE,2018-06-14,هذه هي الدول التي لم تصوت على المغرب لإستضافة ...,DailyProFootball,2018-06-13,17,12:05:22,"المغرب 2026|""morocco 2026""|""كأس العالم 2026""|""...",184156,504,217,1368,https://i.ytimg.com/vi/HyqTJpG_JbE/default.jpg,False,False,False,FACEBOOK انضم الى صفحتنا علىhttps://www.facebo...
40592,LR308Yr8tsg,2018-06-14,LIVE NOW ! - 68th FIFA Congress 2018,FIFATV,2018-06-13,17,12:24:25,[none],312470,2752,412,133,https://i.ytimg.com/vi/LR308Yr8tsg/default.jpg,False,False,False,Follow the congress LIVE on FIFA on YouTube ! ...
40591,gGJKg4RgA1k,2018-06-14,LA FAQ DE WAUQUIEZ : L'ANALYSE de MisterJDay,MisterJDay,2018-06-11,23,16:32:17,"Laurent Wauquiez|""FAQ""|""Foire aux questions""|""...",209862,25495,875,2479,https://i.ytimg.com/vi/gGJKg4RgA1k/default.jpg,False,False,False,Hier matin j'avais envie de faire une pause da...


## Buscando o nome da categoria pelo identificador e armazenando em nova coluna

In [5]:
with open(category_filename) as f:
    category = json.load(f)

In [6]:
def category_replace(c_id):
    for i in category["items"]:
        if int(c_id) == int(i["id"]):
             return i["snippet"]["title"]
    return "None"

In [7]:
videos_lastentry['category_name'] = videos_lastentry.apply(lambda row: category_replace(row['category_id']), axis=1)
print(filename+" shape: "+str(videos_lastentry.shape))
videos_lastentry.head()

FRvideos.csv shape: (28112, 18)


Unnamed: 0,video_id,trending_date,title,channel_title,publish_date,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,category_name
40722,NlxE_QQMRzg,2018-06-14,"Նռան հատիկ, Սերիա 192 / Pomegranate seed / Nra...",PanArmenian TV,2018-06-13,1,18:30:00,"Նռան հատիկ|""Սերիա 192""|""Pomegranate seed""|""Nra...",78117,244,74,46,https://i.ytimg.com/vi/NlxE_QQMRzg/default.jpg,False,False,False,Follow Armenia TV on social platforms:Instagra...,Film & Animation
40597,d0mgnlt-sgY,2018-06-14,COUPE DU MONDE - MES PRONOS !,Bruce & les Guez !,2018-06-13,20,13:51:45,"bruce grannec|""fifa""|""fifa 18""|""fifa18""|""fut""|...",74063,4445,222,774,https://i.ytimg.com/vi/d0mgnlt-sgY/default.jpg,False,False,False,Twitch - http://www.twitch.tv/BruceGrannecTwit...,Gaming
40593,HyqTJpG_JbE,2018-06-14,هذه هي الدول التي لم تصوت على المغرب لإستضافة ...,DailyProFootball,2018-06-13,17,12:05:22,"المغرب 2026|""morocco 2026""|""كأس العالم 2026""|""...",184156,504,217,1368,https://i.ytimg.com/vi/HyqTJpG_JbE/default.jpg,False,False,False,FACEBOOK انضم الى صفحتنا علىhttps://www.facebo...,Sports
40592,LR308Yr8tsg,2018-06-14,LIVE NOW ! - 68th FIFA Congress 2018,FIFATV,2018-06-13,17,12:24:25,[none],312470,2752,412,133,https://i.ytimg.com/vi/LR308Yr8tsg/default.jpg,False,False,False,Follow the congress LIVE on FIFA on YouTube ! ...,Sports
40591,gGJKg4RgA1k,2018-06-14,LA FAQ DE WAUQUIEZ : L'ANALYSE de MisterJDay,MisterJDay,2018-06-11,23,16:32:17,"Laurent Wauquiez|""FAQ""|""Foire aux questions""|""...",209862,25495,875,2479,https://i.ytimg.com/vi/gGJKg4RgA1k/default.jpg,False,False,False,Hier matin j'avais envie de faire une pause da...,Comedy


## Transformando categorias em OneHotEncoder

In [8]:
# DESNECESSARIO - UTILIZADO ONEHOTENCODER() NO MAPPER
#videos_lastentry = videos_lastentry.join(pd.get_dummies(videos_lastentry['category_name']))
#videos_lastentry.head()

## Definindo sentimento por video

In [9]:
#positive sentiment = 60% da quantidade de likes+dislikes
videos_lastentry['sentiment'] = videos_lastentry['likes'] >= 0.6*(videos_lastentry['likes']+videos_lastentry['dislikes']) 
videos_lastentry.head()

Unnamed: 0,video_id,trending_date,title,channel_title,publish_date,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,category_name,sentiment
40722,NlxE_QQMRzg,2018-06-14,"Նռան հատիկ, Սերիա 192 / Pomegranate seed / Nra...",PanArmenian TV,2018-06-13,1,18:30:00,"Նռան հատիկ|""Սերիա 192""|""Pomegranate seed""|""Nra...",78117,244,74,46,https://i.ytimg.com/vi/NlxE_QQMRzg/default.jpg,False,False,False,Follow Armenia TV on social platforms:Instagra...,Film & Animation,True
40597,d0mgnlt-sgY,2018-06-14,COUPE DU MONDE - MES PRONOS !,Bruce & les Guez !,2018-06-13,20,13:51:45,"bruce grannec|""fifa""|""fifa 18""|""fifa18""|""fut""|...",74063,4445,222,774,https://i.ytimg.com/vi/d0mgnlt-sgY/default.jpg,False,False,False,Twitch - http://www.twitch.tv/BruceGrannecTwit...,Gaming,True
40593,HyqTJpG_JbE,2018-06-14,هذه هي الدول التي لم تصوت على المغرب لإستضافة ...,DailyProFootball,2018-06-13,17,12:05:22,"المغرب 2026|""morocco 2026""|""كأس العالم 2026""|""...",184156,504,217,1368,https://i.ytimg.com/vi/HyqTJpG_JbE/default.jpg,False,False,False,FACEBOOK انضم الى صفحتنا علىhttps://www.facebo...,Sports,True
40592,LR308Yr8tsg,2018-06-14,LIVE NOW ! - 68th FIFA Congress 2018,FIFATV,2018-06-13,17,12:24:25,[none],312470,2752,412,133,https://i.ytimg.com/vi/LR308Yr8tsg/default.jpg,False,False,False,Follow the congress LIVE on FIFA on YouTube ! ...,Sports,True
40591,gGJKg4RgA1k,2018-06-14,LA FAQ DE WAUQUIEZ : L'ANALYSE de MisterJDay,MisterJDay,2018-06-11,23,16:32:17,"Laurent Wauquiez|""FAQ""|""Foire aux questions""|""...",209862,25495,875,2479,https://i.ytimg.com/vi/gGJKg4RgA1k/default.jpg,False,False,False,Hier matin j'avais envie de faire une pause da...,Comedy,True


## Normalização das features numéricas

In [10]:
videos_lastentry.describe()

Unnamed: 0,category_id,views,likes,dislikes,comment_count
count,28112.0,28112.0,28112.0,28112.0,28112.0
mean,20.359882,363792.3,12965.44,626.7208,1405.451
std,6.937157,1597467.0,73098.7,9330.435,11280.82
min,1.0,284.0,0.0,0.0,0.0
25%,17.0,12381.5,278.0,13.0,44.0
50%,23.0,56511.5,1449.5,62.0,187.0
75%,24.0,231758.0,5488.0,262.25,653.0
max,44.0,100911600.0,4750254.0,1353661.0,1040912.0


In [11]:
# Normalização
videos_lastentry['views_nonnormal'] = videos_lastentry['views']
videos_lastentry['views'] = (videos_lastentry['views']-videos_lastentry['views'].min())/(videos_lastentry['views'].max()-videos_lastentry['views'].min())
#videos_lastentry['likes'] = (videos_lastentry['likes']-videos_lastentry['likes'].min())/(videos_lastentry['likes'].max()-videos_lastentry['likes'].min())
#videos_lastentry['dislikes'] = (videos_lastentry['dislikes']-videos_lastentry['dislikes'].min())/(videos_lastentry['dislikes'].max()-videos_lastentry['dislikes'].min())
videos_lastentry['comment_count'] = (videos_lastentry['comment_count']-videos_lastentry['comment_count'].min())/(videos_lastentry['comment_count'].max()-videos_lastentry['comment_count'].min())
videos_lastentry.describe()

Unnamed: 0,category_id,views,likes,dislikes,comment_count,views_nonnormal
count,28112.0,28112.0,28112.0,28112.0,28112.0,28112.0
mean,20.359882,0.003602,12965.44,626.7208,0.00135,363792.3
std,6.937157,0.01583,73098.7,9330.435,0.010837,1597467.0
min,1.0,0.0,0.0,0.0,0.0,284.0
25%,17.0,0.00012,278.0,13.0,4.2e-05,12381.5
50%,23.0,0.000557,1449.5,62.0,0.00018,56511.5
75%,24.0,0.002294,5488.0,262.25,0.000627,231758.0
max,44.0,1.0,4750254.0,1353661.0,1.0,100911600.0


## Separando conjuntos de treino, validação e teste

In [12]:
train_data = videos_lastentry.sample(frac=0.8, random_state=200)
test_data = videos_lastentry.drop(train_data.index)
print(train_data.shape, test_data.shape)

(22490, 20) (5622, 20)


# Classificador

## União de features

In [13]:
mapper = DataFrameMapper([
    ('title', CountVectorizer(max_features = 2000)),
    ('description', CountVectorizer(max_features = 2000)),
    ('tags', CountVectorizer(max_features = 2000)),
    ('category_name', LabelBinarizer()),
    ('comments_disabled', None),
    ('views', None),
    ('comment_count', None)
])
x_train = mapper.fit_transform(train_data)
y_train = train_data['sentiment'].values

In [14]:
x_train.shape

(22490, 6021)

In [15]:
classifier = LogisticRegression(penalty = 'l1')
classifier.fit(x_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l1', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [16]:
x_test = mapper.transform(test_data)
y_test = test_data['sentiment'].values
predicted = classifier.predict(x_test)

In [17]:
probs = classifier.predict_proba(x_test)
print(probs)

[[2.60106707e-04 9.99739893e-01]
 [2.42692805e-04 9.99757307e-01]
 [9.51846931e-04 9.99048153e-01]
 ...
 [1.12662454e-04 9.99887338e-01]
 [5.56761016e-02 9.44323898e-01]
 [9.67094074e-05 9.99903291e-01]]


In [18]:
print("Acuracia: "+str(metrics.accuracy_score(y_test, predicted)))
print("ROC AUC score: "+str(metrics.roc_auc_score(y_test, probs[:, 1])))

Acuracia: 0.9766986837424404
ROC AUC score: 0.8863000712989371


In [19]:
predicted = classifier.predict(mapper.transform(videos_lastentry))

In [20]:
videos_output = videos_lastentry
videos_output['predicted_sentiment'] = predicted

videos_output = videos_output.sort_values('predicted_sentiment', ascending=False)
videos_output.head()

Unnamed: 0,video_id,trending_date,title,channel_title,publish_date,category_id,publish_time,tags,views,likes,...,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,category_name,sentiment,views_nonnormal,predicted_sentiment
40722,NlxE_QQMRzg,2018-06-14,"Նռան հատիկ, Սերիա 192 / Pomegranate seed / Nra...",PanArmenian TV,2018-06-13,1,18:30:00,"Նռան հատիկ|""Սերիա 192""|""Pomegranate seed""|""Nra...",0.000771,244,...,4.4e-05,https://i.ytimg.com/vi/NlxE_QQMRzg/default.jpg,False,False,False,Follow Armenia TV on social platforms:Instagra...,Film & Animation,True,78117,True
13819,rUc9sPSKCuQ,2018-01-24,LE JOURNAL DU 23 JANVIER 2018,Le Média,2018-01-23,25,19:46:02,"Le Média""|""Indépendant""|""Télévision""|""Actualit...",0.000183,3162,...,0.0004,https://i.ytimg.com/vi/rUc9sPSKCuQ/default.jpg,False,False,False,Le Journal est en accès libre grâce aux Socios...,News & Politics,True,18762,True
13958,m_LGx0z2D4A,2018-01-24,Kisabac Lusamutner eter 23.01.18 Krak Tesatsnere,KisabacLusamutner,2018-01-23,29,17:58:10,"Kisabac""|""Lusamutner""|""hrach""|""muradyan""|""qesh...",0.000291,118,...,0.0,https://i.ytimg.com/vi/m_LGx0z2D4A/default.jpg,True,False,False,ԿՐԱԿ ՏԵՍԱԾՆԵՐԸ - 2017 թվականի նոյեմբերի 15-ին ...,,True,29663,True
13959,yJKrlI0GWWs,2018-01-24,Donald Trump Didn't Get the Women's March,Jimmy Kimmel Live,2018-01-23,24,08:30:01,"jimmy""|""kimmel""|""live""|""late""|""night""|""talk""|""...",0.005056,8499,...,0.001556,https://i.ytimg.com/vi/yJKrlI0GWWs/default.jpg,False,False,False,"Due to the government shutdown this weekend, D...",Entertainment,True,510479,True
13960,jQj6AVvc-FA,2018-01-24,LES MEILLEURS PACKS TOTY FRANÇAIS ! FIFA 18,PUN,2018-01-23,20,18:14:31,"pun""|""MEILLEURS PACKS TOTY FR""|""MEILLEURS PACK...",0.000663,4415,...,0.000175,https://i.ytimg.com/vi/jQj6AVvc-FA/default.jpg,False,False,False,Vendre ou acheter des Crédits sécurisés sur FI...,Gaming,True,67187,True


In [21]:
f1_score(videos_output['sentiment'].astype(int), videos_output['predicted_sentiment'].astype(int), average='macro')

0.7597548173701074

In [22]:
get_n_features = 10
print("The "+str(get_n_features)+" most positive-weighted words are: ")
for feature_id in sorted(range(len(classifier.coef_[0])), key=lambda i: classifier.coef_[0][i], reverse=True)[:get_n_features]:
    print("%s (%f)" % (mapper.transformed_names_[feature_id], classifier.coef_[0][feature_id]))
print()
print("The "+str(get_n_features)+" most negative-weighted words are: ")
for feature_id in sorted(range(len(classifier.coef_[0])), key=lambda i: classifier.coef_[0][i], reverse=False)[:get_n_features]:
    print("%s (%f)" % (mapper.transformed_names_[feature_id], classifier.coef_[0][feature_id]))

The 10 most positive-weighted words are: 
title_nouvelles (2.098522)
description_good (1.884774)
description_enfin (1.363805)
description_date (1.355599)
tags_couple (1.349098)
category_name_Education (1.328256)
description_sera (1.310016)
description_près (1.272631)
description_mes (1.267992)
tags_new (1.261071)

The 10 most negative-weighted words are: 
title_logan (-2.875551)
title_eurovision (-2.615731)
title_nutella (-2.510123)
title_selon (-2.482656)
title_ماكرون (-2.329565)
title_سلمى (-2.259705)
title_bayern (-2.200695)
description_directeur (-1.955435)
title_42 (-1.922950)
title_met (-1.863603)


# K-Fold

In [23]:
def run_kfold(clf,X,Y):
    kf = KFold(n_splits=10)
    outcomes = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        clf.fit(X_train, y_train)
        predictions = clf.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        outcomes.append(accuracy)
    mean_outcome = np.mean(outcomes)
    print("Mean Accuracy: {0}".format(mean_outcome)) 

In [24]:
run_kfold(classifier,x_train,y_train)



Mean Accuracy: 0.9722543352601157


In [25]:
videos_lastentry['like_ratio'] = videos_lastentry['likes']/(videos_lastentry['likes']+videos_lastentry['dislikes'])

In [26]:
from IPython.display import HTML, display

# We choose the 10 most trending videos
selected_columns = ['title', 'channel_title', 'thumbnail_link', 'publish_date', 'category_name', 'likes', 'dislikes', 'views_nonnormal', 'like_ratio']

most_frequent = videos_lastentry.groupby(selected_columns)['video_id'].agg(
    {"code_count": len}).sort_values(by=['like_ratio']
).head(10).reset_index()

# Construction of HTML table with miniature photos assigned to the most popular movies
table_content = ''
max_title_length = 50

for date, row in most_frequent.T.iteritems():
    HTML_row = '<tr>'
    HTML_row += '<td><img src="' + str(row[2]) + '"style="width:100px;height:100px;"></td>'
    HTML_row += '<td>' + str(row[1]) + '</td>'
    HTML_row += '<td>' + str(row[0])  + '</td>'
    HTML_row += '<td>' + str(row[4]) + '</td>'
    HTML_row += '<td>' + str(row[3]) + '</td>'
    HTML_row += '<td>' + str(row[5]) + '</td>'
    HTML_row += '<td>' + str(row[6]) + '</td>'
    HTML_row += '<td>' + str(row[7]) + '</td>'
    
    table_content += HTML_row + '</tr>'

display(HTML(
    '<table><tr><th>Photo</th><th>Channel Name</th><th style="width:250px;">Title</th><th>Category</th><th>Publish Date</th><th>Likes</th><th>Dislikes</th><th>Views</th></tr>{}</table>'.format(table_content))
)

is deprecated and will be removed in a future version
  import sys


Photo,Channel Name,Title,Category,Publish Date,Likes,Dislikes,Views
,France 3 Nouvelle-Aquitaine,Le rorqual échoué en Gironde disséqué par les scientifiques du Pelagis de La Rochelle,News & Politics,2017-12-30 00:00:00,0,1,3080
,RL Football,Spanish Manager ruben de la barrera got sent off for stopping albacete attack,Sports,2018-03-26 00:00:00,0,1,4962
,France 3 Auvergne-Rhône-Alpes,"Interview Mathieu Faivre, 7ème du slalom géant",News & Politics,2018-02-18 00:00:00,0,7,6886
,MHD TUBE,قناة النهار ترد على فيديو أنس تينا ديزاد جوكر راني زعفان Anes Tina Rani za3fane,People & Blogs,2017-11-19 00:00:00,27,497,24142
,MONSIEUR FRANTZ,Bad buz Lille : fellation à la discothèque Le Network,People & Blogs,2017-12-20 00:00:00,18,304,78801
,Happy Life Channel,"وزير الإتصال يقصف أنس تينا راني زعفان , Anes Tina Rani za3fane",People & Blogs,2017-11-20 00:00:00,19,290,17054
,Youtube dans le monde,Champigny-sur-Marne (94) : une policière lynchée par la foule à la soirée « faceblack » 4/4,News & Politics,2018-01-01 00:00:00,69,941,155953
,sangoyacongoactu,Marche du 21 Janvier 2018 : Bruno Tshibala et Général Kasongo dans les rues de Kinshasa,Music,2018-01-21 00:00:00,8,104,8720
,Nouvelles Chaudes,Anémone : l’ac­trice regrette d’avoir eu des enfants,People & Blogs,2017-12-27 00:00:00,24,272,20299
,Demo Sophie,5 MAI CNT L'après,News & Politics,2018-04-19 00:00:00,24,240,4066


In [27]:
videos_lastentry.loc[videos_lastentry['description'].str.contains("andretti")]

Unnamed: 0,video_id,trending_date,title,channel_title,publish_date,category_id,publish_time,tags,views,likes,...,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,category_name,sentiment,views_nonnormal,predicted_sentiment,like_ratio
