In [1]:
import csv
import numpy as np
from collections import defaultdict, namedtuple
import pafy
import random

In [2]:
def merge_overlapping_intervals(intervals):
    intervals.sort(key=lambda x: x[0])
    merged = [intervals[0]]
    for current in intervals:
        previous = merged[-1]
        if current[0] <= previous[1]:
            previous[1] = max(previous[1], current[1])
        else:
            merged.append(current)

    intervals.clear()
    intervals.extend(merged)

In [3]:
Video = namedtuple("Video", ["video_id", "segments", "pafy"])

In [4]:
videos = dict()  # video_id -> Video 

with open("segments.csv", "r") as f:
    csv_reader = csv.reader(f, delimiter=',')
    for row in csv_reader:
        video_id = row[0]

        # Let's sample 1% of all video IDs:
        random.seed(video_id)
        if random.uniform(0, 1) > 0.01:
            continue
        
        start_timestamp = float(row[1])
        end_timestamp = float(row[2])
        
        try:
            if video_id not in videos:
                videos[video_id] = Video(video_id, [], pafy.new(video_id))
                print("Processed video #%d: %s" % (len(videos.items()), videos[video_id].pafy.title))
            videos[video_id].segments.append(np.array([start_timestamp, end_timestamp]))
        except:
            print("Could not process video %s" % video_id)
        


Processed video #1: Dating during the pandemic
Processed video #2: Minecraft, But The Mobs Spawn In Stacks...
Processed video #3: Top 10 CAMPER FRIENDLY Guns in COD HISTORY
Processed video #4: Der Anti-Webvideopreis 2020
Processed video #5: ROAD RAGE IN AMERICA 2019 | BAD DRIVERS USA, CANADA
Processed video #6: How bad is this $20 SSD??
Processed video #7: They SAID this would be EASY... - Gaming at 8K 60fps
Processed video #8: Red's Overpriced "Mini Mag" Cards - The Real Story
Processed video #9: The 787 day journey ends here [0% HCIM]
Processed video #10: DIY Smart Mirror (that doesn't steam up!)
Processed video #11: Body Jump-Start and BioCharger RECTIFICATION
Processed video #12: The Iodine Myth
Processed video #13: Can You Spot the FAKE Gamer?  - Jubilee React #2
Processed video #14: The Insane Engineering of the A-10 Warthog
Processed video #15: He payed $150 000 to look like BTS JIMIN
Processed video #16: Self quarantining with some old friends
Processed video #17: We drove thes

ERROR: tYRS0aXaBC8: YouTube said: Unable to extract video data


Could not process video tYRS0aXaBC8
Processed video #82: WILL MAKES A DAMASCUS DAGGER AND MORE!! PART 2
Processed video #83: H3 Podcast #12 - Steve-O
Processed video #84: $50 Minecraft Server Computer | Step by Step Guide 2019
Processed video #85: The African World War | Animated History
Processed video #86: Brownies
Processed video #87: Things You Can Only Do In Skyrim VR
Processed video #88: Racing In The Rain At Bathurst Is Amazing
Processed video #89: Johnny vs. Crash Team Racing Nitro-Fueled
Processed video #90: What are Geedis and The Land of Ta? - Tales From the Internet
Processed video #91: Legends of Runeterra.exe
Processed video #92: Is 64 cores too many cores?!? w/ Steve from Gamers Nexus
Processed video #93: I played CS:GO with randoms and this happened..
Processed video #94: Building our NEW Video Editing Workstation - Start to Finish
Processed video #95: HOLY $H!T: Ion Hints At TIME SKIP After Shadowlands?! Arthas! New Char Customisation, Boost + MORE!
Processed video #96

ERROR: 8_4wtVnK8gU: YouTube said: Unable to extract video data


Processed video #125: Trying Prison Food With An Ex-Convict
Could not process video 8_4wtVnK8gU
Processed video #126: La vérité sur 6 sociétés secrètes
Processed video #127: REACTING TO MY MOST POPULAR VIDEOS
Processed video #128: bat soup 🦇🍜
Processed video #129: GIANT MONSTERS - Terrible Writing Advice
Processed video #130: This Mini-DTX Motherboard is NO JOKE!
Processed video #131: Epic Captain Marvel 2 News! Director GONE & Brie Making Demands!
Processed video #132: f0.7 – Ultrafast Lenses – Legends, budget options, modding, and testing – Epic Episode #9
Processed video #133: Balancing Strandbeest
Processed video #134: AJS News - 2K Football is Back!, COD at 15 Million, Reggie to Gamestop, Cyberpunk News, Stadia Doom!
Processed video #135: I'M RANK 1 IN THIS OSRS BOSS
Processed video #136: Kingdom Hearts Timeline - Chronologically Confused - Angry Video Game Nerd (AVGN)
Processed video #137: A WHOLE Gaming PC Setup for just $75... Can it GAME at 720p!?
Processed video #138: How Gre

ERROR: iHj6eAJWp2w: YouTube said: Unable to extract video data


Could not process video iHj6eAJWp2w
Processed video #150: JE RAPPELLE ENFIN CDISCOUNT
Processed video #151: Iran's Ballistic Missile Capabilities
Processed video #152: The NEW Caledonian Sleeper - London to Inverness Review (Club Solo Ensuite)
Processed video #153: À quoi sert la fourrure des capuches ?
Processed video #154: BEST uses for the new Controller Remapping on Nintendo Switch
Processed video #155: OnePlus 7 Pro - Hidden Camera Durability Test! Will it Scratch?
Processed video #156: Tommy Chong - Sting Operation: When the DEA Is Onto You - This Is Not Happening
Processed video #157: s1mple Drops A GODLIKE 81 FRAGS Vs BIG... Huge CHOKE! | ESl Pro League 2020
Processed video #158: Best Knife Openings of February 2020
Processed video #159: Oxford Admissions Question (No Calculator)
Processed video #160: Tow truck tricks: Don't get scammed after an accident (CBC Marketplace)
Processed video #161: This printer prints on ANYTHING!
Processed video #162: Disney Transport is huge. But 

ERROR: tqrYJIVZYtY: YouTube said: Unable to extract video data


Could not process video tqrYJIVZYtY
Processed video #167: Whatever Happened to Tourettes Guy?
Processed video #168: UFO || KREALPLAY (ft. Dana Snyder)
Processed video #169: History of Space Travel - Looking to the Stars - Extra History - #1
Processed video #170: Project Binky - Episode 30 - Austin Mini GT-Four - Turbocharged 4WD Mini
Processed video #171: FAKE Guitarists in the Media... DO THEY THINK WE'RE IDIOTS!?
Processed video #172: Top tips for making passive income in 2019. (as a millionaire)
Processed video #173: Best of Buzzfeed Pizza Cake - Buzzfeed Test #159
Processed video #174: The best video on YouTube, by far. (JackAsk #89)
Processed video #175: Wheelies, workout-fails en vetrolmeters in DUMPERTREETEN (222)!
Processed video #176: How Are These Players Grandmaster?! - Overwatch Streamer Moments Ep. 50
Processed video #177: Is Meat Bad for You? Is Meat Unhealthy?
Processed video #178: Pro players reaction to ZywOo plays (Part 2)
Processed video #179: Qui des HOMMES♂️ ou des

ERROR: ndZRdw_AdTI: YouTube said: Unable to extract video data


Processed video #224: The Truth About The Dark Web
Could not process video ndZRdw_AdTI
Processed video #225: Bill Bert Podcast | Episode 10
Processed video #226: Quiet 12V NOCTUA Fan on 24V Creality Ender 3
Processed video #227: The Legend RETURNS!  SteelSeries Sensei Ten Review
Processed video #228: Awesomest Blade Throwing Lesson EVER!
Processed video #229: I FOUND THE 5 BEST BANK ACCOUNTS!
Processed video #230: UHOH! NEW Joker Controversy, Trump Impeachment Whistleblower Updates, & Greta Thunberg Mocked
Processed video #231: If Video Games Were Under Quarantine
Processed video #232: tik toks to stop you from simping😋🥵💦
Processed video #233: Bill Bert Podcast | Episode 16
Processed video #234: Impeachment BACKFIRE Gets WORSE As Trump now DEFEATS Every 2020 Democrat In Major Polling
Processed video #235: ASRock X570 Creator Motherboard Analysis: The New Buzzword
Processed video #236: Samsung Galaxy Buds for $129 ?! - REVIEW
Processed video #237: I watched this transgender short film s

ERROR: nOOZvuOEdlU: YouTube said: Unable to extract video data


Processed video #247: Yo They Killed Cripps
Could not process video nOOZvuOEdlU
Processed video #248: Building A Modern PING PONG TABLE (It's Also A Dining Table) 🏓 Woodworking
Processed video #249: MrBeast - H3 Podcast #111
Processed video #250: TOTALEMENT PERDUS EN ALLEMAGNE ! (les ennuis commencent..) - RedboxTrip2 #2
Processed video #251: (1643) Simple Dimple from Raymond
Processed video #252: Having Dinner in front of a Jet Engine/Leaf Blower
Processed video #253: 8 Things Faker Does That You Probably Don't - League of Legends Season 10
Processed video #254: Dave Ramsey | The Ben Shapiro Show Sunday Special Ep. 36
Processed video #255: I Meditated Every Day for a Month. Here's What Happened
Processed video #256: Why Nikita Dragun Is Under Fire, John Bolton "Resigns", College Athlete Compensation, & WSP Day
Processed video #257: my mii's future girlfriend...
Processed video #258: My Cheap Mercedes E63 Wagon Might Be the Worst Car I Have Ever Owned ***BROKEN AGAIN***
Processed video

ERROR: B2ypFcHQBwI: YouTube said: Unable to extract video data


Processed video #276: Huge Abandoned Coal Mine with Cart Roller Coaster!
Could not process video B2ypFcHQBwI
Processed video #277: Imaqtpie - NEW SUPPORT MAIN... SUPPANYA
Processed video #278: Dennis bestookt gemeentehuis met bijtende mieren
Processed video #279: Sophos vs Malware
Processed video #280: Ad Astra is Clickbait
Processed video #281: OBS UPDATE 25: Vulkan & UWP Game Captures, Scene Importers (Everything you need to know)
Processed video #282: Wireless CHARGING for Gaming Mice? -- Logitech G PowerPlay Review
Processed video #283: Aventador Defeated by First Renter!
Processed video #284: How Michael Thomas SHATTERED the NFL record books
Processed video #285: ESPER CONTROL to SHUT DOWN Lukka Agent
Processed video #286: 10 Best Butt Joint Methods | Woodworking Tips & Tricks
Processed video #287: Speedzone-használt teszt: Audi A3 1.6(1998):Prémiumnak készült, használtként csak simán jó választás
Processed video #288: Echo: Best Hero in the HISTORY of Overwatch - Overwatch Funny 

ERROR: jBCchveAB4I: YouTube said: Unable to extract video data


Could not process video jBCchveAB4I
Processed video #289: How the Swiss Protected Hitler's Gold
Processed video #290: Modern Warfare In Depth: PKM LMG Review (Best Weapon for Ground War)
Processed video #291: Things I learned from Gorgc's PA | NO DEATHS
Processed video #292: DIE BESTE GAMING TASTATUR FÜR UNTER 40 EURO! Sharkoon SGK5 im Test
Processed video #293: Гарик Мартиросян х Олег Майами | ЧТО БЫЛО ДАЛЬШЕ? (Сабуров, Щербаков, Рептилоид, Тамби, Детков)
Processed video #294: 6 "Necessities" I No Longer Need Since Moving To Europe | Making it Work
Processed video #295: WOKE Writers End Up Single... Surprising? | Ep 127
Processed video #296: Chris D'Elia & Toki the Dumdum | TigerBelly 80
Processed video #297: WOTC heard that I liked "Cats" ... | Ikoria Story Preview
Processed video #298: Path of Exile 3.9: METAMORPH DAY # 1 Highlights HEADHUNTER DAY 1, THE DAY OF THE RIPS AND BUGS
Processed video #299: I Waited MONTHS for This!
Processed video #300: UK Flat Earth Activist is Completel

ERROR: 73zAI7yAgKo: YouTube said: Unable to extract video data


Processed video #338: tmux - The Desktop Environment for your Terminal - Lunduke Show
Could not process video 73zAI7yAgKo
Processed video #339: What will the new NFL 2K game look like?
Processed video #340: Stories from inside Microsoft (1998 - 2003)
Processed video #341: Sex Toys Unboxing (please read description)
Processed video #342: Ranking All the YouTube Rewinds
Processed video #343: JE REFAIS VOS VIEUX LOGOS (gratuitement pour la visibilité)
Processed video #344: Master Chief VS Doomguy (Halo VS Doom) | DEATH BATTLE!
Processed video #345: WEEKEND PRODUCT LAUNCH vlog
Processed video #346: Someone Stole my Spare Quad Plate Clutch. Out of my Insight
Processed video #347: I Was WRONG. I Was SO WRONG. DO NOT BUY Walmart's Overpowered Gaming PCs!!!!
Processed video #348: EVGA RTX 2080 Ti Kingpin PCB Review & Samsung vs. Micron
Processed video #349: Les records du monde les plus Incroyables !
Processed video #350: What Happened to PSY - Much More Than Just Gangnam Style
Processed video

ERROR: 2uOG2-gwjyk: YouTube said: Unable to extract video data


Could not process video 2uOG2-gwjyk
Processed video #355: Here's how you play a HEALER (Destiny 2)
Processed video #356: GIVING my OPPONENT GOOD CARDS and WINNING? 🍊
Processed video #357: Реутов ТВ: понять Россию через юмор / вДудь
Processed video #358: Why Scottie Pippen Was As Important As Michael Jordan
Processed video #359: RTX 3080 CPU Benchmark, AMD Ryzen 9 3950X vs. Intel Core i9 10900K
Processed video #360: ИгроСториз: PS5 и Xbox Series X — успех или позор?
Processed video #361: EVE V Tablet - Crowdsourced Surface Competitor (NO LONGER AVAILABLE)
Processed video #362: Jeffree Star faked everything about... well, everything.
Processed video #363: Joe Burrow is a SAVAGE!
Processed video #364: I'm Fixing my C10 Fuel Cell and Giving Away a New Miller Welder! Finnegan's Garage Ep.100
Processed video #365: ULTIMATE TIK TOK CRINGE MEMES #69 (ayy)
Processed video #366: 3 Bizarre Love Triangles That Ended in Murder
Processed video #367: Dota 2 WTF Moments 367
Processed video #368: Minec

ERROR: sAAtOn-c3DU: YouTube said: Unable to extract video data


Processed video #388: America’s War On Tumbleweeds
Could not process video sAAtOn-c3DU
Processed video #389: Werkbank und Shelter - Prepper Camp #009 | Fritz Meinecke
Processed video #390: Are these systems REALLY worth the effort?


In [5]:
# Merge overlapping segments.
for video in videos.values():
    merge_overlapping_intervals(video.segments)

In [19]:
# Let's take a look at some stats here.
def compute_stats(v):
    length = v.pafy.length
    
    segments = np.array(v.segments)
    total_segment_length = np.sum(segments[:, 1] - segments[:, 0])
    
    segment_ratio = total_segment_length / length
    
    return v, length, total_segment_length, segment_ratio
    
stats = [compute_stats(v) for v in videos.values()]
# Sort by decreasing segment ratio.
stats.sort(key=lambda x: -x[2])

_, video_lengths, total_segment_lengths, segment_ratios = zip(*stats)

In [20]:
from scipy.stats import describe

In [21]:
print(describe(video_lengths))

print(describe(total_segment_lengths))

print(describe(segment_ratios))

DescribeResult(nobs=390, minmax=(92, 26133), mean=1249.3333333333333, variance=3486314.5107112247, skewness=7.863723252680748, kurtosis=86.76541929196647)
DescribeResult(nobs=390, minmax=(1.1509940000000003, 1381.4160960000008), mean=59.29610081371311, variance=8354.080342361867, skewness=8.47331157579276, kurtosis=111.78246878455114)
DescribeResult(nobs=390, minmax=(0.0016372603129445238, 0.5357235919732442), mean=0.06340330732256481, variance=0.0044048296201444175, skewness=2.3455079722704464, kurtosis=8.639189319499005)


In [23]:
# What video has the worst ratio?
stats[2]

(Video(video_id='t3pI3JLsdpA', segments=[array([1343.728749, 1703.497484])], pafy=Title: MrBeast - H3 Podcast #111
 Author: H3 Podcast
 ID: t3pI3JLsdpA
 Duration: 01:15:02
 Rating: 4.8653631
 Views: 1514758
 Thumbnail: http://i.ytimg.com/vi/t3pI3JLsdpA/default.jpg),
 4502,
 359.7687349999999,
 0.07991309084851174)