## Data Analysis and Cleaning

In [1]:
import numpy as np
import pandas as pd

In [10]:
all_titles = pd.read_csv('data/title.basics.tsv', sep='\t')
all_ratings = pd.read_csv('data/title.ratings.tsv', sep='\t')

# Filter titles to only include movies and TV series
movies_and_series = all_titles[all_titles['titleType'].isin(['movie', 'tvSeries'])]

# Convert columns to correct data types
all_ratings['averageRating'] = pd.to_numeric(all_ratings['averageRating'], errors='coerce')
all_ratings['numVotes'] = pd.to_numeric(all_ratings['numVotes'], errors='coerce')

# Merge titles and ratings, keeping only movies and TV series
merged_data = movies_and_series.merge(all_ratings, on='tconst', how='inner')

# Filter movies/series with at least 1000 votes
df_filtered = merged_data[merged_data['numVotes'] >= 1000]

# Sort by rating (descending), then by numVotes (descending for tie-breaking)
df_sorted = df_filtered.sort_values(by=['averageRating', 'numVotes'], ascending=[False, False])

# Save to new TSV file
df_sorted.to_csv('data/filtered_sorted_with_ratings.tsv', sep='\t', index=False)

In [11]:
# IMDb-style weighted rating system for top 10,000 movies and series

# Calculate overall statistics for the weighted rating formula
overall_mean_rating = df_filtered['averageRating'].mean()
min_votes_required = df_filtered['numVotes'].quantile(0.75)  # Use 75th percentile as minimum

print(f"Overall mean rating: {overall_mean_rating:.2f}")
print(f"Minimum votes threshold (75th percentile): {min_votes_required:.0f}")

# Apply Bayesian weighted rating formula
# Weighted Rating = (v / (v + m)) * R + (m / (v + m)) * C
# Where: v = votes, m = min votes, R = average rating, C = overall mean
def calculate_weighted_rating(row):
    v = row['numVotes']
    R = row['averageRating']
    m = min_votes_required
    C = overall_mean_rating
    
    weighted_rating = (v / (v + m)) * R + (m / (v + m)) * C
    return weighted_rating

# Add weighted rating column
df_filtered['weightedRating'] = df_filtered.apply(calculate_weighted_rating, axis=1)

# Sort by weighted rating (descending), then by numVotes (descending for tie-breaking)
df_weighted_sorted = df_filtered.sort_values(by=['weightedRating', 'numVotes'], ascending=[False, False])

# Get top 10,000 based on weighted ratings
top_10000_weighted = df_weighted_sorted.head(10000)

# Save the weighted top 10,000 to file
top_10000_weighted.to_csv('data/top_10000_weighted_ratings.tsv', sep='\t', index=False)

print(f"\nTop 10 movies/series by weighted rating:")
print(top_10000_weighted[['primaryTitle', 'averageRating', 'numVotes', 'weightedRating']].head(10))

Overall mean rating: 6.39
Minimum votes threshold (75th percentile): 10202

Top 10 movies/series by weighted rating:
                      primaryTitle  averageRating  numVotes  weightedRating
175388                Breaking Bad            9.5   2405005        9.486874
67193     The Shawshank Redemption            9.3   3104334        9.290476
153336  Avatar: The Last Airbender            9.3    417720        9.230683
129412                    The Wire            9.3    413512        9.229994
176712             Game of Thrones            9.2   2485498        9.188523
39197                The Godfather            9.2   2163561        9.186824
80066                 The Sopranos            9.2    545115        9.148422
162488             The Dark Knight            9.1   3079402        9.091060
308327             Attack on Titan            9.1    649760        9.058146
234194                   Aspirants            9.1    316623        9.015484


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['weightedRating'] = df_filtered.apply(calculate_weighted_rating, axis=1)


## Adding Cast to data we already have

### Clean title.principals.tsv to only have movies/series in top 10000

In [33]:
# clean title.principals.tsv to only have movies/series in top 10000
all_principals = pd.read_csv('data/title.principals.tsv', sep='\t')
top_10000_df = pd.read_csv('data/top_10000_weighted_ratings.tsv', sep='\t')

# Filter all_principals to only include titles in top_10000_df
filtered_principals = all_principals[all_principals['tconst'].isin(top_10000_df['tconst'])]
filtered_principals.to_csv('data/filtered_title_principals_top_10000.tsv', sep='\t', index=False)

### Getting top 3 actor/actress names and their respective characters

In [3]:
principals = pd.read_csv('data/cast_data/filtered_title_principals_top_10000.tsv', sep='\t')
names = pd.read_csv('data/cast_data/name.basics.tsv', sep='\t')
top10000 = pd.read_csv('data/top_10000_weighted_ratings.tsv', sep='\t')

# Create a sample of top 10 entries for testing
top10_sample = top10000.head(10)

# Get each tconst from top10_sample and find the top 3 UNIQUE actors/actresses nconst in principals and their respective characters
def get_top_3_cast(tconst):
    cast = principals[principals['tconst'] == tconst]
    cast = cast[cast['category'].isin(['actor', 'actress'])]
    
    result = []
    seen_nconst = set()  # Track unique nconst values
    
    for _, row in cast.iterrows():
        nconst = row['nconst']
        
        # Skip if we've already seen this nconst
        if nconst in seen_nconst:
            continue
            
        character = row['characters']
        
        # Clean the character field - remove brackets and extra quotes
        if pd.notna(character) and character != '\\N':
            import json
            try:
                # Try to parse as JSON first
                character_list = json.loads(character)
                if isinstance(character_list, list) and len(character_list) > 0:
                    clean_character = character_list[0]  # Get first character name
                else:
                    clean_character = str(character)
            except (json.JSONDecodeError, ValueError):
                # If JSON parsing fails, try manual cleaning
                clean_character = character.strip('[]"').replace('""', '"')
        else:
            clean_character = "Unknown Character"
        
        name_row = names[names['nconst'] == nconst]
        
        if not name_row.empty:
            actor_name = name_row.iloc[0]['primaryName']
            result.append((actor_name, clean_character))
            seen_nconst.add(nconst)
            
            # Stop when we have 3 unique cast members
            if len(result) >= 3:
                break
    
    print(f"Processed {tconst}: Found {len(result)} unique cast members.")
    print(result)
    
    return result

# Apply function with progress bar
from tqdm import tqdm
tqdm.pandas(desc="Processing cast data")

print("Extracting top 3 unique cast members for each title (top 10 sample)...")
top10_sample['top_3_cast'] = top10_sample['tconst'].head(10).progress_apply(get_top_3_cast) # Remove .head(10) to process all 10,000 entries

# Save results
top10_sample.to_csv('data/cast_data/top_10_with_cast.tsv', sep='\t', index=False)
print(f"\nProcessing completed! Results saved to 'data/cast_data/top_10_with_cast.tsv'")

# Display sample results
print("\nSample results:")
print(top10_sample[['primaryTitle', 'top_3_cast']])

Extracting top 3 unique cast members for each title (top 10 sample)...


Processing cast data:  20%|██        | 2/10 [00:01<00:07,  1.09it/s]

Processed tt0903747: Found 3 unique cast members.
[('Bryan Cranston', 'Walter White'), ('Aaron Paul', 'Jesse Pinkman'), ('Anna Gunn', 'Skyler White')]


Processing cast data:  30%|███       | 3/10 [00:03<00:08,  1.25s/it]

Processed tt0111161: Found 3 unique cast members.
[('Tim Robbins', 'Andy Dufresne'), ('Morgan Freeman', "Ellis Boyd 'Red' Redding"), ('Bob Gunton', 'Warden Norton')]


Processing cast data:  40%|████      | 4/10 [00:05<00:08,  1.43s/it]

Processed tt0417299: Found 3 unique cast members.
[('Dee Bradley Baker', 'Appa'), ('Zach Tyler Eisen', 'Aang'), ('Mae Whitman', 'Katara')]


Processing cast data:  50%|█████     | 5/10 [00:06<00:07,  1.53s/it]

Processed tt0306414: Found 3 unique cast members.
[('Dominic West', "Detective James 'Jimmy' McNulty"), ('Lance Reddick', 'Lieutenant Cedric Daniels'), ('Sonja Sohn', "Detective Shakima 'Kima' Greggs")]


Processing cast data:  60%|██████    | 6/10 [00:08<00:06,  1.58s/it]

Processed tt0944947: Found 3 unique cast members.
[('Emilia Clarke', 'Daenerys Targaryen'), ('Peter Dinklage', 'Tyrion Lannister'), ('Kit Harington', 'Jon Snow')]


Processing cast data:  70%|███████   | 7/10 [00:10<00:04,  1.62s/it]

Processed tt0068646: Found 3 unique cast members.
[('Marlon Brando', 'Don Vito Corleone'), ('Al Pacino', 'Michael'), ('James Caan', 'Sonny')]


Processing cast data:  80%|████████  | 8/10 [00:12<00:03,  1.65s/it]

Processed tt0141842: Found 3 unique cast members.
[('James Gandolfini', 'Tony Soprano'), ('Lorraine Bracco', 'Dr. Jennifer Melfi'), ('Edie Falco', 'Carmela Soprano')]


Processing cast data:  90%|█████████ | 9/10 [00:13<00:01,  1.67s/it]

Processed tt0468569: Found 3 unique cast members.
[('Christian Bale', 'Bruce Wayne'), ('Heath Ledger', 'Joker'), ('Aaron Eckhart', 'Harvey Dent')]


Processing cast data: 100%|██████████| 10/10 [00:15<00:00,  1.68s/it]

Processed tt2560140: Found 3 unique cast members.
[('Jessie James Grelle', 'Armin Arlert'), ('Bryce Papenbrook', 'Eren Jaeger'), ('Trina Nishimura', 'Mikasa Ackermann')]


Processing cast data: 100%|██████████| 10/10 [00:17<00:00,  1.72s/it]

Processed tt14392248: Found 3 unique cast members.
[('Naveen Kasturia', 'Abhilash Sharma'), ('Shivankit Singh Parihar', 'Guri'), ('Abhilash Thapliyal', 'SK')]

Processing completed! Results saved to 'data/cast_data/top_10_with_cast.tsv'

Sample results:
                 primaryTitle  \
0                Breaking Bad   
1    The Shawshank Redemption   
2  Avatar: The Last Airbender   
3                    The Wire   
4             Game of Thrones   
5               The Godfather   
6                The Sopranos   
7             The Dark Knight   
8             Attack on Titan   
9                   Aspirants   

                                          top_3_cast  
0  [(Bryan Cranston, Walter White), (Aaron Paul, ...  
1  [(Tim Robbins, Andy Dufresne), (Morgan Freeman...  
2  [(Dee Bradley Baker, Appa), (Zach Tyler Eisen,...  
3  [(Dominic West, Detective James 'Jimmy' McNult...  
4  [(Emilia Clarke, Daenerys Targaryen), (Peter D...  
5  [(Marlon Brando, Don Vito Corleone), (Al Pacin...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top10_sample['top_3_cast'] = top10_sample['tconst'].head(10).progress_apply(get_top_3_cast) # Remove .head(10) to process all 10,000 entries


## Web Scraping for Movie/Series Description

In [32]:
# Web scraping script for StreamWithVPN data
%pip install beautifulsoup4
import requests
from bs4 import BeautifulSoup
import time
import re
from urllib.parse import quote
import pandas as pd
from tqdm import tqdm

# Load the top 10,000 weighted ratings data
top_10000_df = pd.read_csv('data/top_10000_weighted_ratings.tsv', sep='\t')

def clean_title_for_url(title):
    """
    Clean and format movie/series title for URL generation
    """
    # Remove special characters and replace spaces with hyphens
    cleaned = re.sub(r'[^\w\s-]', '', title)
    cleaned = re.sub(r'\s+', '-', cleaned.strip())
    return cleaned.lower()

def generate_streamwithvpn_url(title, year):
    """
    Generate StreamWithVPN URL based on title and year
    Example: "The Wolf's Call" (2019) -> "https://www.streamwithvpn.com/the-wolfs-call-2019"
    """
    clean_title = clean_title_for_url(title)
    # Handle cases where year might be NaN or missing
    if pd.isna(year):
        return f"https://www.streamwithvpn.com/{clean_title}"
    else:
        return f"https://www.streamwithvpn.com/{clean_title}-{int(year)}"

def scrape_movie_data(url, tconst, title, year, endYear, titleType, isAdult, runtime, genres, rating, numVotes):
    """
    Scrape movie/series data from StreamWithVPN
    Returns dictionary with description, cast, and streaming platforms
    """
    try:
        # Add delay to be respectful to the server
        time.sleep(1)
        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        # Try the original URL first
        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            print(f"✓ Success with original URL: {url}")
        except requests.exceptions.HTTPError as e:
            if e.response.status_code in [404, 403]:  # Page not found or forbidden
                # Try without year
                url_without_year = generate_streamwithvpn_url(title, None)
                print(f"⚠ Original URL failed ({e.response.status_code}), trying without year: {url_without_year}")
                
                try:
                    response = requests.get(url_without_year, headers=headers, timeout=10)
                    response.raise_for_status()
                    print(f"✓ Success with URL without year: {url_without_year}")
                    # Update the URL in the data dictionary for accuracy
                    url = url_without_year
                except requests.exceptions.HTTPError:
                    print(f"✗ Both URLs failed for {title}")
                    raise  # Re-raise the exception to be caught by outer try-catch
            else:
                raise  # Re-raise non-404/403 errors
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Initialize data dictionary
        movie_data = {
            'tconst': tconst,
            'titleType': titleType,
            'title': title,
            'year': year,
            'endYear': endYear,
            'isAdult': isAdult,
            'runtime': runtime,
            'genres': genres,
            'rating': rating,
            'numVotes': numVotes,
            'description': None,
            'cast': None,
            'streaming_platforms': None,
            'url': url,
            'scrape_status': 'success'
        }
        
        # Extract DESCRIPTION - multiple approaches
        description_element = soup.find('span', class_='rt-Text EntryDetailDescription_contentDescription__tXYGO EntryDetailDescription_expanded__3a0Gs')
        
        if not description_element:
            description_element = soup.find('span', class_=re.compile('EntryDetailDescription_contentDescription'))
        
        if not description_element:
            description_element = soup.find('span', class_=re.compile('contentDescription'))
        
        if not description_element:
            description_element = soup.select_one('span[class*="EntryDetailDescription_contentDescription"]')
        
        if description_element:
            movie_data['description'] = description_element.get_text(strip=True)
            print(f"✓ Found description for {title}: {movie_data['description'][:100]}...")
        else:
            print(f"✗ No description found for {title}")
        
        """
        # Extract CAST information
        cast_list = []
        # Target container div
        container_div = soup.find('div', class_='rt-Flex rt-r-fd-column rt-r-gap rt-r-px rt-r-pt rt-r-w', style='--gap: 2px; --pl: 16px; --pr: 16px; --pt: 8px; --width: 100%;')
        if container_div:
            cast_spans = container_div.find_all('span', {'data-accent-color': 'gray', 'class_': 'rt-Text rt-r-size-3 rt-r-weight-medium', 'style': 'min-width: 0px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;'})
            if cast_spans:
                for span in cast_spans:
                    cast_list.append(span.get_text(strip=True))
                movie_data['cast'] = ', '.join(cast_list)
                print(f"✓ Found cast for {title}: {movie_data['cast'][:100]}...")
            else:
                print(f"✗ Span not found for {title}")
        else:
            print(f"✗ Div not found for {title}")

        # Extract STREAMING PLATFORMS information
        platform_elements = soup.find_all('h2', class_='rt-Heading rt-r-size-5 rt-r-weight-medium rt-r-ta-left')
        if platform_elements:
            platforms = [elem.get_text(strip=True) for elem in platform_elements]
            movie_data['streaming_platforms'] = ', '.join(platforms)
        else:
            print(f"✗ No streaming platforms found for {title}")
        """
        
        return movie_data
        
    except requests.RequestException as e:
        print(f"Request error for {title}: {e}")
        return {
            'tconst': tconst,
            'title': title,
            'url': url,
            'description': None,
            'cast': None,
            'streaming_platforms': None,
            'scrape_status': f'request_error: {str(e)}'
        }
    except Exception as e:
        print(f"Parsing error for {title}: {e}")
        return {
            'tconst': tconst,
            'title': title,
            'url': url,
            'description': None,
            'cast': None,
            'streaming_platforms': None,
            'scrape_status': f'parsing_error: {str(e)}'
        }

# Initialize list to store scraped data
scraped_data = []

# Sample scraping for first 2 entries (for faster debugging)
print("Starting web scraping")
sample_df = top_10000_df.head(50) # Change to 10000 for full run (2 for testing)

for index, row in tqdm(sample_df.iterrows(), total=len(sample_df), desc="Scraping movies"):
    tconst = row['tconst']
    title = row['primaryTitle']
    year = row['startYear']
    endYear = row['endYear']
    titleType = row['titleType']
    isAdult = row['isAdult']
    runtime = row['runtimeMinutes']
    genres = row['genres']
    rating = row['averageRating']
    numVotes = row['numVotes']
    
    
    # Generate URL
    url = generate_streamwithvpn_url(title, year)
    print(f"\nScraping: {title} ({year}) - {url}")
    
    # Scrape data
    movie_data = scrape_movie_data(url, tconst, title, year, endYear, titleType, isAdult, runtime, genres, rating, numVotes)
    scraped_data.append(movie_data)

# Convert to DataFrame
scraped_df = pd.DataFrame(scraped_data)

# Display results
print(f"\nScraping completed! Found data for {len(scraped_df)} entries")
print(f"Success rate: {len(scraped_df[scraped_df['scrape_status'] == 'success'])} / {len(scraped_df)}")

# Show sample results
print("\nSample scraped data:")
print(scraped_df[['title', 'url', 'scrape_status', 'description', 'cast']].head())

# Save scraped data
scraped_df.to_csv('data/top10000_final.tsv', sep='\t', index=False)
print("\nSample data saved to 'data/top10000_final.tsv'")


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.
Starting web scraping


Scraping movies:   0%|          | 0/50 [00:00<?, ?it/s]


Scraping: Breaking Bad (2008) - https://www.streamwithvpn.com/breaking-bad-2008


Scraping movies:   2%|▏         | 1/50 [00:01<01:24,  1.73s/it]

✓ Success with original URL: https://www.streamwithvpn.com/breaking-bad-2008
✓ Found description for Breaking Bad: Walter White, a New Mexico chemistry teacher, is diagnosed with Stage III cancer and given a prognos...

Scraping: The Shawshank Redemption (1994) - https://www.streamwithvpn.com/the-shawshank-redemption-1994


Scraping movies:   4%|▍         | 2/50 [00:03<01:21,  1.69s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-shawshank-redemption-1994
✓ Found description for The Shawshank Redemption: Imprisoned in the 1940s for the double murder of his wife and her lover, upstanding banker Andy Dufr...

Scraping: Avatar: The Last Airbender (2005) - https://www.streamwithvpn.com/avatar-the-last-airbender-2005


Scraping movies:   6%|▌         | 3/50 [00:05<01:20,  1.72s/it]

✓ Success with original URL: https://www.streamwithvpn.com/avatar-the-last-airbender-2005
✓ Found description for Avatar: The Last Airbender: In a war-torn world of elemental magic, a young boy reawakens to undertake a dangerous mystic quest ...

Scraping: The Wire (2002) - https://www.streamwithvpn.com/the-wire-2002


Scraping movies:   8%|▊         | 4/50 [00:06<01:19,  1.72s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-wire-2002
✓ Found description for The Wire: Told from the points of view of both the Baltimore homicide and narcotics detectives and their targe...

Scraping: Game of Thrones (2011) - https://www.streamwithvpn.com/game-of-thrones-2011


Scraping movies:  10%|█         | 5/50 [00:08<01:20,  1.80s/it]

✓ Success with original URL: https://www.streamwithvpn.com/game-of-thrones-2011
✓ Found description for Game of Thrones: Seven noble families fight for control of the mythical land of Westeros. Friction between the houses...

Scraping: The Godfather (1972) - https://www.streamwithvpn.com/the-godfather-1972


Scraping movies:  12%|█▏        | 6/50 [00:10<01:17,  1.77s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-godfather-1972
✓ Found description for The Godfather: Spanning the years 1945 to 1955, a chronicle of the fictional Italian-American Corleone crime family...

Scraping: The Sopranos (1999) - https://www.streamwithvpn.com/the-sopranos-1999


Scraping movies:  14%|█▍        | 7/50 [00:12<01:20,  1.88s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-sopranos-1999
✓ Found description for The Sopranos: The story of New Jersey-based Italian-American mobster Tony Soprano and the difficulties he faces as...

Scraping: The Dark Knight (2008) - https://www.streamwithvpn.com/the-dark-knight-2008


Scraping movies:  16%|█▌        | 8/50 [00:14<01:16,  1.83s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-dark-knight-2008
✓ Found description for The Dark Knight: Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney ...

Scraping: Attack on Titan (2013) - https://www.streamwithvpn.com/attack-on-titan-2013


Scraping movies:  18%|█▊        | 9/50 [00:16<01:16,  1.87s/it]

✓ Success with original URL: https://www.streamwithvpn.com/attack-on-titan-2013
✓ Found description for Attack on Titan: 100 years ago, the last remnants of humanity were forced to retreat behind the towering walls of a f...

Scraping: Aspirants (2021) - https://www.streamwithvpn.com/aspirants-2021


Scraping movies:  20%|██        | 10/50 [00:18<01:12,  1.82s/it]

✓ Success with original URL: https://www.streamwithvpn.com/aspirants-2021
✓ Found description for Aspirants: Aspirants is a story of 3 friends - Abhilash, SK, and Guri. The story takes place in the past and th...

Scraping: The Lord of the Rings: The Return of the King (2003) - https://www.streamwithvpn.com/the-lord-of-the-rings-the-return-of-the-king-2003


Scraping movies:  22%|██▏       | 11/50 [00:19<01:09,  1.78s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-lord-of-the-rings-the-return-of-the-king-2003
✓ Found description for The Lord of the Rings: The Return of the King: As armies mass for a final battle that will decide the fate of the world--and powerful, ancient forc...

Scraping: Fullmetal Alchemist: Brotherhood (2009) - https://www.streamwithvpn.com/fullmetal-alchemist-brotherhood-2009


Scraping movies:  24%|██▍       | 12/50 [00:21<01:09,  1.82s/it]

✓ Success with original URL: https://www.streamwithvpn.com/fullmetal-alchemist-brotherhood-2009
✓ Found description for Fullmetal Alchemist: Brotherhood: Disregard for alchemy’s laws ripped half of Edward Elric’s limbs from his body and left his brother ...

Scraping: Schindler's List (1993) - https://www.streamwithvpn.com/schindlers-list-1993


Scraping movies:  26%|██▌       | 13/50 [00:23<01:06,  1.81s/it]

✓ Success with original URL: https://www.streamwithvpn.com/schindlers-list-1993
✓ Found description for Schindler's List: The true story of how businessman Oskar Schindler saved over a thousand Jewish lives from the Nazis ...

Scraping: The Godfather Part II (1974) - https://www.streamwithvpn.com/the-godfather-part-ii-1974


Scraping movies:  28%|██▊       | 14/50 [00:25<01:03,  1.76s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-godfather-part-ii-1974
✓ Found description for The Godfather Part II: In the continuing saga of the Corleone crime family, a young Vito Corleone grows up in Sicily and in...

Scraping: Sherlock (2010) - https://www.streamwithvpn.com/sherlock-2010


Scraping movies:  30%|███       | 15/50 [00:26<01:01,  1.76s/it]

✓ Success with original URL: https://www.streamwithvpn.com/sherlock-2010
✓ Found description for Sherlock: A modern update finds the famous sleuth and his doctor partner solving crime in 21st century London....

Scraping: 12 Angry Men (1957) - https://www.streamwithvpn.com/12-angry-men-1957


Scraping movies:  32%|███▏      | 16/50 [00:28<00:58,  1.73s/it]

✓ Success with original URL: https://www.streamwithvpn.com/12-angry-men-1957
✓ Found description for 12 Angry Men: The defense and the prosecution have rested and the jury is filing into the jury room to decide if a...

Scraping: The Office (2005) - https://www.streamwithvpn.com/the-office-2005


Scraping movies:  34%|███▍      | 17/50 [00:30<00:58,  1.77s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-office-2005
✓ Found description for The Office: The everyday lives of office employees in the Scranton, Pennsylvania branch of the fictional Dunder ...

Scraping: Better Call Saul (2015) - https://www.streamwithvpn.com/better-call-saul-2015


Scraping movies:  36%|███▌      | 18/50 [00:32<00:56,  1.77s/it]

✓ Success with original URL: https://www.streamwithvpn.com/better-call-saul-2015
✓ Found description for Better Call Saul: Six years before Saul Goodman meets Walter White. We meet him when the man who will become Saul Good...

Scraping: Rick and Morty (2013) - https://www.streamwithvpn.com/rick-and-morty-2013


Scraping movies:  38%|███▊      | 19/50 [00:33<00:55,  1.78s/it]

✓ Success with original URL: https://www.streamwithvpn.com/rick-and-morty-2013
✓ Found description for Rick and Morty: Rick is a mentally-unbalanced but scientifically gifted old man who has recently reconnected with hi...

Scraping: Arcane (2021) - https://www.streamwithvpn.com/arcane-2021


Scraping movies:  40%|████      | 20/50 [00:35<00:52,  1.73s/it]

✓ Success with original URL: https://www.streamwithvpn.com/arcane-2021
✓ Found description for Arcane: Amid the stark discord of twin cities Piltover and Zaun, two sisters fight on rival sides of a war b...

Scraping: One Piece (1999) - https://www.streamwithvpn.com/one-piece-1999


Scraping movies:  42%|████▏     | 21/50 [00:37<00:51,  1.79s/it]

✓ Success with original URL: https://www.streamwithvpn.com/one-piece-1999
✓ Found description for One Piece: Years ago, the fearsome Pirate King, Gol D. Roger was executed leaving a huge pile of treasure and t...

Scraping: The Lord of the Rings: The Fellowship of the Ring (2001) - https://www.streamwithvpn.com/the-lord-of-the-rings-the-fellowship-of-the-ring-2001


Scraping movies:  44%|████▍     | 22/50 [00:39<00:49,  1.75s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-lord-of-the-rings-the-fellowship-of-the-ring-2001
✓ Found description for The Lord of the Rings: The Fellowship of the Ring: Young hobbit Frodo Baggins, after inheriting a mysterious ring from his uncle Bilbo, must leave his ...

Scraping: Friends (1994) - https://www.streamwithvpn.com/friends-1994


Scraping movies:  46%|████▌     | 23/50 [00:40<00:48,  1.79s/it]

✓ Success with original URL: https://www.streamwithvpn.com/friends-1994
✓ Found description for Friends: Six young people from New York City, on their own and struggling to survive in the real world, find ...

Scraping: True Detective (2014) - https://www.streamwithvpn.com/true-detective-2014


Scraping movies:  48%|████▊     | 24/50 [00:42<00:45,  1.74s/it]

✓ Success with original URL: https://www.streamwithvpn.com/true-detective-2014
✓ Found description for True Detective: An American anthology police detective series utilizing multiple timelines in which investigations s...

Scraping: Sapne Vs Everyone (2023) - https://www.streamwithvpn.com/sapne-vs-everyone-2023


Scraping movies:  50%|█████     | 25/50 [00:44<00:42,  1.70s/it]

✓ Success with original URL: https://www.streamwithvpn.com/sapne-vs-everyone-2023
✓ Found description for Sapne Vs Everyone: Two obsessive dreamers collide with the resistance of expectations, morality and each other....

Scraping: Hunter x Hunter (2011) - https://www.streamwithvpn.com/hunter-x-hunter-2011


Scraping movies:  52%|█████▏    | 26/50 [00:46<00:41,  1.73s/it]

✓ Success with original URL: https://www.streamwithvpn.com/hunter-x-hunter-2011
✓ Found description for Hunter x Hunter: To fulfill his dreams of becoming a legendary Hunter like his dad, a young boy must pass a rigorous ...

Scraping: Death Note (2006) - https://www.streamwithvpn.com/death-note-2006


Scraping movies:  54%|█████▍    | 27/50 [00:47<00:39,  1.71s/it]

✓ Success with original URL: https://www.streamwithvpn.com/death-note-2006
✓ Found description for Death Note: Light Yagami is an ace student with great prospects—and he’s bored out of his mind. But all that cha...

Scraping: Dexter: Resurrection (2025) - https://www.streamwithvpn.com/dexter-resurrection-2025
⚠ Original URL failed (404), trying without year: https://www.streamwithvpn.com/dexter-resurrection
⚠ Original URL failed (404), trying without year: https://www.streamwithvpn.com/dexter-resurrection


Scraping movies:  56%|█████▌    | 28/50 [00:49<00:39,  1.81s/it]

✓ Success with URL without year: https://www.streamwithvpn.com/dexter-resurrection
✓ Found description for Dexter: Resurrection: Dexter Morgan awakens from a coma to find Harrison gone without a trace. Realizing the weight of wha...

Scraping: Seinfeld (1989) - https://www.streamwithvpn.com/seinfeld-1989


Scraping movies:  58%|█████▊    | 29/50 [00:51<00:38,  1.83s/it]

✓ Success with original URL: https://www.streamwithvpn.com/seinfeld-1989
✓ Found description for Seinfeld: A stand-up comedian and his three offbeat friends weather the pitfalls and payoffs of life in New Yo...

Scraping: Firefly (2002) - https://www.streamwithvpn.com/firefly-2002


Scraping movies:  60%|██████    | 30/50 [00:53<00:36,  1.82s/it]

✓ Success with original URL: https://www.streamwithvpn.com/firefly-2002
✓ Found description for Firefly: In the year 2517, after the arrival of humans in a new star system, follow the adventures of the ren...

Scraping: Batman: The Animated Series (1992) - https://www.streamwithvpn.com/batman-the-animated-series-1992


Scraping movies:  62%|██████▏   | 31/50 [00:55<00:33,  1.78s/it]

✓ Success with original URL: https://www.streamwithvpn.com/batman-the-animated-series-1992
✓ Found description for Batman: The Animated Series: Vowing to avenge the murder of his parents, Bruce Wayne devotes his life to wiping out crime in Goth...

Scraping: Inception (2010) - https://www.streamwithvpn.com/inception-2010


Scraping movies:  64%|██████▍   | 32/50 [00:56<00:32,  1.81s/it]

✓ Success with original URL: https://www.streamwithvpn.com/inception-2010
✓ Found description for Inception: Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his target...

Scraping: Fight Club (1999) - https://www.streamwithvpn.com/fight-club-1999


Scraping movies:  66%|██████▌   | 33/50 [00:58<00:30,  1.78s/it]

✓ Success with original URL: https://www.streamwithvpn.com/fight-club-1999
✓ Found description for Fight Club: A ticking-time-bomb insomniac and a slippery soap salesman channel primal male aggression into a sho...

Scraping: Forrest Gump (1994) - https://www.streamwithvpn.com/forrest-gump-1994


Scraping movies:  68%|██████▊   | 34/50 [01:00<00:28,  1.75s/it]

✓ Success with original URL: https://www.streamwithvpn.com/forrest-gump-1994
✓ Found description for Forrest Gump: A man with a low IQ has accomplished great things in his life and been present during significant hi...

Scraping: Pulp Fiction (1994) - https://www.streamwithvpn.com/pulp-fiction-1994


Scraping movies:  70%|███████   | 35/50 [01:02<00:26,  1.76s/it]

✓ Success with original URL: https://www.streamwithvpn.com/pulp-fiction-1994
✓ Found description for Pulp Fiction: A burger-loving hit man, his philosophical partner, a drug-addled gangster's moll and a washed-up bo...

Scraping: The Lord of the Rings: The Two Towers (2002) - https://www.streamwithvpn.com/the-lord-of-the-rings-the-two-towers-2002


Scraping movies:  72%|███████▏  | 36/50 [01:03<00:24,  1.73s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-lord-of-the-rings-the-two-towers-2002
✓ Found description for The Lord of the Rings: The Two Towers: Frodo Baggins and the other members of the Fellowship continue on their sacred quest to destroy the ...

Scraping: Our Planet (2019) - https://www.streamwithvpn.com/our-planet-2019


Scraping movies:  74%|███████▍  | 37/50 [01:05<00:21,  1.68s/it]

✓ Success with original URL: https://www.streamwithvpn.com/our-planet-2019
✓ Found description for Our Planet: Experience our planet's natural beauty and examine how climate change impacts all living creatures i...

Scraping: Sandeep Bhaiya (2023) - https://www.streamwithvpn.com/sandeep-bhaiya-2023


Scraping movies:  76%|███████▌  | 38/50 [01:06<00:19,  1.63s/it]

✓ Success with original URL: https://www.streamwithvpn.com/sandeep-bhaiya-2023
✓ Found description for Sandeep Bhaiya: ‘Sandeep Bhaiya’ is a spinoff of the blockbuster webshow TVF Aspirants. It will have a total of eigh...

Scraping: TVF Pitchers (2015) - https://www.streamwithvpn.com/tvf-pitchers-2015


Scraping movies:  78%|███████▊  | 39/50 [01:08<00:17,  1.63s/it]

✓ Success with original URL: https://www.streamwithvpn.com/tvf-pitchers-2015
✓ Found description for TVF Pitchers: A story of trials and tribulations of four young entrepreneurs who quit their day jobs in order to p...

Scraping: Panchayat (2020) - https://www.streamwithvpn.com/panchayat-2020


Scraping movies:  80%|████████  | 40/50 [01:10<00:16,  1.63s/it]

✓ Success with original URL: https://www.streamwithvpn.com/panchayat-2020
✓ Found description for Panchayat: Panchayat is a comedy-drama, which captures the journey of an engineering graduate Abhishek, who for...

Scraping: The Good, the Bad and the Ugly (1966) - https://www.streamwithvpn.com/the-good-the-bad-and-the-ugly-1966


Scraping movies:  82%|████████▏ | 41/50 [01:11<00:14,  1.62s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-good-the-bad-and-the-ugly-1966
✓ Found description for The Good, the Bad and the Ugly: While the Civil War rages on between the Union and the Confederacy, three men – a quiet loner, a rut...

Scraping: The Twilight Zone (1959) - https://www.streamwithvpn.com/the-twilight-zone-1959


Scraping movies:  84%|████████▍ | 42/50 [01:13<00:13,  1.69s/it]

✓ Success with original URL: https://www.streamwithvpn.com/the-twilight-zone-1959
✓ Found description for The Twilight Zone: An anthology series containing drama, psychological thriller, fantasy, science fiction, suspense, an...

Scraping: Leyla and Mecnun (2011) - https://www.streamwithvpn.com/leyla-and-mecnun-2011


Scraping movies:  86%|████████▌ | 43/50 [01:15<00:11,  1.69s/it]

✓ Success with original URL: https://www.streamwithvpn.com/leyla-and-mecnun-2011
✓ Found description for Leyla and Mecnun: Leyla ile Mecnun is a Turkish television comedy series. The show is set in Istanbul, Turkey and prem...

Scraping: Gravity Falls (2012) - https://www.streamwithvpn.com/gravity-falls-2012


Scraping movies:  88%|████████▊ | 44/50 [01:16<00:10,  1.69s/it]

✓ Success with original URL: https://www.streamwithvpn.com/gravity-falls-2012
✓ Found description for Gravity Falls: Twin brother and sister Dipper and Mabel Pines are in for an unexpected adventure when they spend th...

Scraping: Cowboy Bebop (1998) - https://www.streamwithvpn.com/cowboy-bebop-1998


Scraping movies:  90%|█████████ | 45/50 [01:18<00:08,  1.72s/it]

✓ Success with original URL: https://www.streamwithvpn.com/cowboy-bebop-1998
✓ Found description for Cowboy Bebop: In 2071, roughly fifty years after an accident with a hyperspace gateway made the Earth almost uninh...

Scraping: Fargo (2014) - https://www.streamwithvpn.com/fargo-2014


Scraping movies:  92%|█████████▏| 46/50 [01:20<00:06,  1.73s/it]

✓ Success with original URL: https://www.streamwithvpn.com/fargo-2014
✓ Found description for Fargo: A close-knit anthology series dealing with stories involving malice, violence and murder based in an...

Scraping: Ted Lasso (2020) - https://www.streamwithvpn.com/ted-lasso-2020


Scraping movies:  94%|█████████▍| 47/50 [01:22<00:05,  1.75s/it]

✓ Success with original URL: https://www.streamwithvpn.com/ted-lasso-2020
✓ Found description for Ted Lasso: Ted Lasso, an American football coach, moves to England when he's hired to manage a soccer team—desp...

Scraping: Kota Factory (2019) - https://www.streamwithvpn.com/kota-factory-2019


Scraping movies:  96%|█████████▌| 48/50 [01:23<00:03,  1.71s/it]

✓ Success with original URL: https://www.streamwithvpn.com/kota-factory-2019
✓ Found description for Kota Factory: In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexcep...

Scraping: Succession (2018) - https://www.streamwithvpn.com/succession-2018


Scraping movies:  98%|█████████▊| 49/50 [01:25<00:01,  1.72s/it]

✓ Success with original URL: https://www.streamwithvpn.com/succession-2018
✓ Found description for Succession: Follow the lives of the Roy family as they contemplate their future once their aging father begins t...

Scraping: Bluey (2018) - https://www.streamwithvpn.com/bluey-2018


Scraping movies: 100%|██████████| 50/50 [01:27<00:00,  1.75s/it]

✓ Success with original URL: https://www.streamwithvpn.com/bluey-2018
✓ Found description for Bluey: Bluey is an inexhaustible six year-old Blue Heeler dog, who loves to play and turns everyday family ...

Scraping completed! Found data for 50 entries
Success rate: 50 / 50

Sample scraped data:
                        title  \
0                Breaking Bad   
1    The Shawshank Redemption   
2  Avatar: The Last Airbender   
3                    The Wire   
4             Game of Thrones   

                                                 url scrape_status  \
0    https://www.streamwithvpn.com/breaking-bad-2008       success   
1  https://www.streamwithvpn.com/the-shawshank-re...       success   
2  https://www.streamwithvpn.com/avatar-the-last-...       success   
3        https://www.streamwithvpn.com/the-wire-2002       success   
4  https://www.streamwithvpn.com/game-of-thrones-...       success   

                                         description  cast  
0  Walter White, a New 


