In [105]:
import pandas as pd
from pathlib import Path
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# Load csv
vaccine_data = Path("data/covid_vaccines_by_county_cleaned.csv")
video_data = Path("data/video_data_cleaned.csv")

# Read csv
vaccine_csv = pd.read_csv(vaccine_data)
video_csv = pd.read_csv(video_data)

In [106]:
channels = len(video_csv["Channel Title"].unique())
channels

50

In [107]:
video_csv[["View Count", "Like Count", "Tag Count"]].describe()


Unnamed: 0,View Count,Like Count,Tag Count
count,20743.0,20595.0,20743.0
mean,80733.46,2459.846,14.753941
std,402946.4,12563.89,11.085445
min,2.0,0.0,0.0
25%,1220.5,43.0,2.0
50%,4946.0,169.0,17.0
75%,28304.0,884.0,22.0
max,23999960.0,1005758.0,49.0


In [108]:
video_csv[["View Count", "Like Count", "Tag Count"]].median()

View Count    4946.0
Like Count     169.0
Tag Count       17.0
dtype: float64

In [109]:
channel_average_views = video_csv.groupby(["Channel Title"])["View Count"].mean().unique()
channel_average_views

array([119258.9375    ,  99233.92519685,  58114.69354839,   1632.36711712,
        42916.80487805,  33664.66366366,  52323.888     , 252267.28210117,
         3809.10743802,   1157.26530612,  43840.46376812,    926.10305958,
        82361.16055046,  52036.71392405,   2312.63865546,  20333.39722864,
       139788.27480916,   2736.48      , 183398.91447368, 358660.22368421,
         8034.85714286, 166002.19760479,  26419.54545455, 130077.14184397,
        86356.85542169, 206254.27868852, 100655.13625304,  13487.47482014,
         5374.42941176,  13398.97674419,  17260.55421687,   1703.13416988,
         2593.24390244,  15673.81355932,  83417.48474178,  45761.0935695 ,
        16621.93933266,  78849.95454545,   7165.0817757 ,  12362.87434555,
       308008.27565982, 457893.        ,  76584.72839506,  29826.5323475 ,
       183165.87234043,   4098.82840237,  46966.        , 679409.15048026,
       142305.02325581,   3575.32384342])

In [110]:
channel_highest_views = video_csv.groupby(["Channel Title"])["View Count"].max()
channel_highest_views

Channel Title
A Life Engineered                                  686097
Alex The Analyst                                  1491055
BABE ENGINEER                                     1671079
Black Heights - Advancing Your Career               58543
Boyd Clewis: The Six-Figure Tech Career Coach      940923
CareerFoundry                                      927976
Chris Sean                                         689375
Coding with Lewis                                 3037798
CompTIA                                            159311
Cut The Tech                                         7087
Data With Mo                                       637992
Degree Free                                        185109
Devslopes                                         5965689
Dorian Develops                                   1777326
Hustle With Drae                                   117695
IT Career Questions                               1196726
Internet Made Coder                               3464786


In [111]:
channel_name = video_csv["Channel Title"]

In [112]:
average_likes = video_csv.groupby(["Channel Title"])["Like Count"].mean()
average_likes

Channel Title
A Life Engineered                                 5196.875000
Alex The Analyst                                  2750.307087
BABE ENGINEER                                     2112.887097
Black Heights - Advancing Your Career               91.493243
Boyd Clewis: The Six-Figure Tech Career Coach     2196.829268
CareerFoundry                                     1008.285285
Chris Sean                                        2230.448000
Coding with Lewis                                15544.914397
CompTIA                                             98.254167
Cut The Tech                                        62.510204
Data With Mo                                      1782.797101
Degree Free                                         26.528180
Devslopes                                         2919.215596
Dorian Develops                                   2514.227848
Hustle With Drae                                   121.512821
IT Career Questions                                573.8

In [113]:
highest_likes = video_csv.groupby(["Channel Title"])["Like Count"].max()
highest_likes

Channel Title
A Life Engineered                                  23569.0
Alex The Analyst                                   28239.0
BABE ENGINEER                                      60630.0
Black Heights - Advancing Your Career               2444.0
Boyd Clewis: The Six-Figure Tech Career Coach      51909.0
CareerFoundry                                      24044.0
Chris Sean                                         36058.0
Coding with Lewis                                 193015.0
CompTIA                                             4941.0
Cut The Tech                                         518.0
Data With Mo                                       27978.0
Degree Free                                         8184.0
Devslopes                                         216486.0
Dorian Develops                                   101194.0
Hustle With Drae                                    2333.0
IT Career Questions                                18729.0
Internet Made Coder                       

In [118]:
average_comments = video_csv.groupby(["Channel Title"])["Comment Count"].mean()
average_comments

Channel Title
A Life Engineered                                 181.343750
Alex The Analyst                                  185.370079
BABE ENGINEER                                      86.500000
Black Heights - Advancing Your Career              13.418919
Boyd Clewis: The Six-Figure Tech Career Coach      89.479675
CareerFoundry                                      25.459459
Chris Sean                                        207.169355
Coding with Lewis                                 191.507782
CompTIA                                             7.661157
Cut The Tech                                       11.551020
Data With Mo                                      103.710145
Degree Free                                         1.309179
Devslopes                                          95.908257
Dorian Develops                                   230.197970
Hustle With Drae                                   11.042017
IT Career Questions                                70.196759
Internet M

In [119]:
highest_comments = video_csv.groupby(["Channel Title"])["Comment Count"].max()
highest_comments

Channel Title
A Life Engineered                                  759.0
Alex The Analyst                                  3809.0
BABE ENGINEER                                     2191.0
Black Heights - Advancing Your Career              267.0
Boyd Clewis: The Six-Figure Tech Career Coach     1510.0
CareerFoundry                                      850.0
Chris Sean                                        2378.0
Coding with Lewis                                 3100.0
CompTIA                                            359.0
Cut The Tech                                       107.0
Data With Mo                                      1194.0
Degree Free                                        320.0
Devslopes                                         4425.0
Dorian Develops                                   9672.0
Hustle With Drae                                   305.0
IT Career Questions                               1443.0
Internet Made Coder                               2825.0
Jack Ross        

In [120]:
video_summary_df = pd.DataFrame({
    "Average Views" : channel_views_average,
    "Average Likes" : average_likes,
    "Average Comments" : average_comments,
    "Most Views" : channel_highest_views,
    "Most Likes" : highest_likes,
    "Most Comments" : highest_comments
    
})
video_summary_df

Unnamed: 0_level_0,Average Views,Average Likes,Average Comments,Most Views,Most Likes,Most Comments
Channel Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A Life Engineered,119258.9375,5196.875,181.34375,686097,23569.0,759.0
Alex The Analyst,99233.925197,2750.307087,185.370079,1491055,28239.0,3809.0
BABE ENGINEER,58114.693548,2112.887097,86.5,1671079,60630.0,2191.0
Black Heights - Advancing Your Career,1632.367117,91.493243,13.418919,58543,2444.0,267.0
Boyd Clewis: The Six-Figure Tech Career Coach,42916.804878,2196.829268,89.479675,940923,51909.0,1510.0
CareerFoundry,33664.663664,1008.285285,25.459459,927976,24044.0,850.0
Chris Sean,52323.888,2230.448,207.169355,689375,36058.0,2378.0
Coding with Lewis,252267.282101,15544.914397,191.507782,3037798,193015.0,3100.0
CompTIA,3809.107438,98.254167,7.661157,159311,4941.0,359.0
Cut The Tech,1157.265306,62.510204,11.55102,7087,518.0,107.0
