In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import offline, iplot
from IPython.display import display, HTML
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="plotly")



pd.options.display.float_format = "{:,.1f}".format

In [None]:
def update_layout(title_font_size = 28, hover_font_size = 16, hover_bgcolor = "#45FFCA", showlegend = False):
    fig.update_layout(
        showlegend = showlegend,
        title = {
            "font" : {
                "size" :title_font_size,
                "family" :"tahoma"
            }
        },
        hoverlabel={
            "bgcolor": hover_bgcolor,
            "font_size": hover_font_size,
            "font_family": "tahoma"
        }
    )

# Load The Dataset

In [None]:
df=pd.read_csv('/kaggle/input/youtube-dataset-all-data-scienceanalyst-channels/Youtube_dataset_all_dataScience_channels.csv')
df.sample(n=10)

# Now, Let's Get a Quick Overview 🧐

In [None]:
df.shape

In [None]:
df.info()

# Now, It's Time For Data Cleaning & Preprocessing🧐🔨

In [None]:
#as the null value are in the Views Like_count Comment_Count  we can replace it will 0     
df=df.fillna(0)

In [None]:
df.info()

In [None]:
df.dtypes

In [None]:
df['Published_date'] = pd.to_datetime(df['Published_date'], format='%Y-%m-%d')

In [None]:
df['Views'] = df['Views'].astype('int64')
df['Like_count'] = df['Like_count'].astype('int64')
df['Comment_Count'] = df['Comment_Count'].astype('int64')

In [None]:
df.info()

# Now, Let's Dive Deeper Into Each Important Column 🌊🧐

<h3 style = "background-color: #111;
             padding: 15px;
             font: bold 22px arial;
             color: lightgreen;
             border: 2px solid lime;
             border-radius: 8px">
♠ Rating Column 👦 🧔</h3>

In [None]:
Channel = df["Channel_Name"].value_counts()
(Channel / df.shape[0] * 100).apply(lambda x: f"{x: 0.2f} %")

# Channel Name and Number of videos(%)

In [None]:
# Create the bar plot
fig = px.bar(
    data_frame=Channel.head(10), 
    x=Channel.head(10).index, 
    y=(Channel / sum(Channel) * 100).head(10),
    color=Channel.head(10).index,
    color_discrete_sequence=["#FF0060", "#45FFCA", "#45FFCA", "#293462", "#FF55BB", "#293462"],
    labels={"index": "Channel Name", "y": "(%)"},
    title="Channel Name and Number of videos(%)",
    text=Channel.head(10).apply(lambda x: f"{x / sum(Channel) * 100:0.2f}%"),
    template="plotly_dark"
)

# Update the layout
fig.update_layout(
    hoverlabel=dict(bgcolor="#111"),
    title_font=dict(size=24, color='lightgreen'),
    xaxis=dict(title='Channel Name'),
    yaxis=dict(title='Videos(%)'),
)

# Update traces
fig.update_traces(
    textfont={
        "family": "tahoma",
        "size": 13,
    },
    hovertemplate="Channel Name: %{x}<br>Videos: %{y:0.2f}%"
)

# Display the plot
iplot(fig)

<h3 style = "background-color: #111;
             padding: 15px;
             font: bold 22px arial;
             color: gold;
             border: 3px solid tomato;
             border-radius: 8px">
► From This Graph We Can Say that Ws cube tech uploads most videos !!!</h3>

# Number of Videos of data Science educators Through Years

In [None]:
view_by_year=df.groupby(df['Published_date'].dt.year)['Views'].agg('sum')
view_by_year

In [None]:
fig = px.area(view_by_year, 
            x = view_by_year.index, 
            y =view_by_year, 
            labels = {"index" :"Year", "y" :"View Counts"},
            line_shape="spline", 
            color_discrete_sequence=["#FF0060"],
            title = "Views in Data Science Content Through Years ⌛",
            template="plotly_dark",
              markers="O",
              
       )


update_layout()

fig.update_traces(
    textfont = {
        "family": "tahoma",
         "size": 13,
    },
    hovertemplate= "Year: %{x}<br>View Counts: %{y:0.2s}"
)
iplot(fig)

In [None]:
year=df['Published_date'].dt.year.value_counts().sort_index()
year

In [None]:
fig = px.area(year, 
        x = year.index, 
        y =year, 
        labels = {"index" :"Year", "y" :"Videos Counts"},
        line_shape="spline", 
        color_discrete_sequence=["#45FFCA"],
        title = "Number of Videos of data Science educators Through Years 🗓️",
        template="plotly_dark",
       )


update_layout()
iplot(fig)

<h3 style = "background-color: #111;
             padding: 15px;
             font: bold 22px arial;
             color: red;
             border: 3px solid tomato;
             border-radius: 8px">
  Videos of data Science educators were highest in the year 2020-2022 probably beacuse of covid !!!</h3>

# Top 10 Data Science YouTube channels

In [None]:
df1= df.groupby('Channel_Name', as_index=False)['Views'].agg(['count', 'mean', 'sum'])
df1.columns = ['Channel_Name','video_count', 'view_mean', 'view_count']
df1  

In [None]:
df2 = df.groupby('Channel_Name', as_index=False)['Like_count'].agg(['mean', 'count', 'sum'])
df2.columns = ['Channel_Name','Like_mean', 'video_count', 'Like_count']
df2

In [None]:
df3 = df.groupby('Channel_Name', as_index=False)['Comment_Count'].agg(['mean', 'count', 'sum'])
df3.columns = ['Channel_Name','Comment_mean', 'video_count', 'Comment_count']
df3 

In [None]:
Channel_comparison = pd.concat([df1, df2[['Like_mean','Like_count']],df3[['Comment_mean','Comment_count']]],axis=1).query('video_count>20')
Channel_comparison

In [None]:
channel_views=Channel_comparison.sort_values(by='view_mean',ascending=True)
channel_views.tail(10)

In [None]:
fig = px.bar(
    data_frame= channel_views.tail(10), 
    orientation = "h", 
    y = channel_views.tail(10).Channel_Name, 
    x = channel_views.tail(10).view_mean,
    color = channel_views.tail(10).view_mean,

    labels = {"x": "average views", "y" : "Data Science YouTube channels"},
    title = "Top 10 Data Science YouTube channels with highest average views",
    text_auto= ".2s",
    template = "plotly_dark",
     color_continuous_scale=['#A084E8', '#00FFAB', '#00FFAB']
      )

update_layout(hover_bgcolor="#222", hover_font_size=14)

fig.update_traces(
    textposition = "outside",
    textfont = {
        "family": "tahoma",
         "size": 13,
    },
    hovertemplate= "Data Science YouTube channels: %{y}<br>average views: %{x:0.3s}"
)
iplot(fig)

In [None]:
channel_Like_mean=Channel_comparison.sort_values(by='Like_mean',ascending=False)
channel_Like_mean.head(10)

In [None]:
fig = px.bar(channel_Like_mean.head(10),  
                 x = "Channel_Name", 
                 y = "Like_mean",
                 color = "Channel_Name",
                 template = "plotly_dark",
                 color_discrete_sequence=["#45FFCA","#FF0060","#FF55BB", "#FFFDAF"],
                 opacity = 0.89,
                title = "Top 10 Data Science YouTube channels with highest average Likes")


update_layout(showlegend=True, hover_bgcolor="#222", hover_font_size=14)

fig.update_traces(
    textfont = {
        "family": "tahoma",
         "size": 13,
    },
    hovertemplate= "Channel_Name: %{x}<br>average Likes: %{y}"
)
iplot(fig)

In [None]:
channel_Comment_mean=Channel_comparison.sort_values(by='Comment_mean',ascending=True)
channel_Comment_mean.tail(10)

In [None]:
fig = px.scatter(channel_Comment_mean.tail(10),  
                 y = "Channel_Name", 
                 x = "Comment_mean", 
                 size = "Comment_mean",
                 color = "Comment_mean",
                 template = "plotly_dark",
                 labels={"name" :"Data Science YouTube channels", "score" :"average comments"},
                 opacity = 0.89,
                title = "Top 10 Data Science YouTube channels with highest average comments",
                color_continuous_scale=['#A084E8', '#8BE8E5', '#00FFAB'])


update_layout(hover_bgcolor="#222", hover_font_size=14)

fig.update_traces(
    textfont = {
        "family": "tahoma",
         "size": 13,
    },
    hovertemplate= "YouTube channels: %{y}<br>average comments: %{x}"
)
iplot(fig)

<h3 style = "background-color: #010101;
             padding: 15px;
             font: bold 20px arial;
             color: gold;
             border: 2px solid tomato;
             border-radius: 8px">
♣ Now, will these 3 visuals we can notice some common youtube channels name we can say that probably they are the  <span style = "color: tomato">Best Data Science Educators</span> in The YouTube !! 👑🎬</h3>

#  Correlation

<h3 style = "background-color: #010101;
             padding: 18px;
             font: bold 24px arial;
             color: #45FFCA;
             border: 2px solid green;
             border-radius: 8px">
♣ Is There Any Correlation in the data set?!🎬</h3>

In [None]:
df_corr = Channel_comparison[["video_count", "view_mean",  "Like_mean", "Comment_mean"]]

In [None]:
df_corr.corr(numeric_only=True)

In [None]:
# Create The Plotly Heat Map
fig = px.imshow(
    df_corr.corr(numeric_only = True), 
    text_auto="0.2f", aspect="auto", template="plotly_dark",
    title="Correlation",
    color_continuous_scale = "earth"
) 

update_layout(showlegend=True, hover_bgcolor="#222", hover_font_size=15)

fig.update_traces(
    textfont = {
        "family": "tahoma",
         "size": 16,
    },
    hovertemplate= "Feature[1]: %{x}<br>Feature[2]: %{y}"
)
iplot(fig)

<h3 style = "background-color: #000;
             padding: 18px;
             font: bold 20px tahoma;
             color: gold;
             border: 3px solid tomato;
             border-radius: 8px">
    ► From The HeatMap, We Can Say 😉🧐
    <BR><HR style = "border: 1px solid #555">
        1) There Is a Very Strong Positive Correlation Between <U>view_mean</U> and <U>Like_mean</U> (1.0)🔗..
    <BR>
        2) There Is a Strong Positive Correlation Between <U>view_mean</U> and <U>Comment_mean</U> (0.89)🔗
    

</h3>

In [None]:
fig = px.scatter(
    Channel_comparison, 
    x = Channel_comparison["view_mean"], 
    y = Channel_comparison["Like_mean"], 
    trendline="ols",
    template="plotly_dark",
    color = Channel_comparison["Like_mean"],
    color_discrete_sequence=["#45FFCA"],
    title = "Relation Between Views & Likes",
                 
          )

update_layout(showlegend=True, hover_bgcolor="#222", hover_font_size=15)
fig.update_traces(
    textfont = {
        "family": "tahoma",
         "size": 16,
    },
    hovertemplate= "Views: %{x:0.3s}<br>Likes: %{y:0.3s}"
)
iplot(fig)

In [None]:
fig = px.scatter(
    Channel_comparison, 
    x = Channel_comparison["view_mean"], 
    y = Channel_comparison["Comment_mean"], 
    trendline="ols",
    template="plotly_dark",
    color = Channel_comparison["Comment_mean"],
    color_discrete_sequence=["#45FFCA"],
    title = "Relation Between Views & Comments",
                 
          )

update_layout(showlegend=True, hover_bgcolor="#222", hover_font_size=15)
fig.update_traces(
    textfont = {
        "family": "tahoma",
         "size": 16,
    },
    hovertemplate= "Views: %{x:0.4s}<br>Comments: %{y:0.4s}"
)
iplot(fig)

# Now lets try to find best videos📺

In [None]:
df.sample(10)

In [None]:
best_videos=df.sort_values(by=['Views', 'Like_count', 'Comment_Count'], ascending=True)
best_videos.tail(10)

In [None]:
fig = px.bar(
    data_frame=best_videos.tail(10), 
    orientation="h", 
    y=best_videos.tail(10).Title, 
    x=best_videos.tail(10).Views,
    color=best_videos.tail(10).Views,
    labels={"x": "View,Likes and Comment", "y": "Videos"},
    title="Top 10 Videos on the basis of View,Likes and Comment",
    text_auto=".2s",
    template="plotly_dark",
    color_continuous_scale=['#FF5733', '#C70039', '#900C3F']
)
update_layout(hover_bgcolor="#222", hover_font_size=14)

fig.update_traces(
    textposition = "outside",
    textfont = {
        "family": "tahoma",
         "size": 13,
    },
    hovertemplate= "Videos: %{y}<br>AVG View: %{x:0.3s}"
)
iplot(fig)

<h3 style = "background-color: #111;
             padding: 15px;
             font: bold 18px arial;
             color: lightgreen;
             border: 2px solid lime;
             border-radius: 8px">
► These are the best video according to the audience .
<BR>
