Import the data

In [None]:
import pandas as pd
df = pd.read_csv("content_analysis.csv")
df.head()

Create the list of political entities according to their ideologies

In [None]:
left = [
    "Die Linke", "IU", "Podemos", "PCE", "PCF", "LFI", "PRC", "SI", "Syriza",
    "BE", "Vänsterpartiet", "Vasemmistoliitto", "AKEL", "PTB/PVDA", "KPÖ",
    "SP", "Enhedslisten", "Rødt", "PST/POP", "PIE", "The Left", "Razem", "EFA",
    "S&D", "Renew Europe"
]

# TODO: Add more right parties to balance it with the left ones
right = [
    "EPP", "ECR", "PiS", "VOX", "ID", "RN", "Lega", "FPÖ", "Fidesz", "Patriots", 
    "ESN", "AfD", "SPD", "Republika", "Reconquête", "NOWA NADZIEJA", "Mi Hazánk" 
]

Classify entries depending on their idology

In [None]:
import numpy as np

def classify_ideology(description):
    if not isinstance(description, str):
        return np.nan
    
    # Convert to lowercase so I can find all the matches
    description = description.lower()
    left_lower = [party.lower() for party in left]
    right_lower = [party.lower() for party in right]

    # Find for left and right parties on the description
    found_left = any(party in description for party in left_lower)
    found_right = any(party in description for party in right_lower)

    if found_left and not found_right:
        return "left"
    elif found_right and not found_left:
        return "right"
    else:
        return np.nan


In [None]:
df["ideology"] = df["description"].apply(classify_ideology)

Verify there exist videos with the ideology column filled

In [None]:
# Count occurrences
left_count = (df["ideology"] == "left").sum()
right_count = (df["ideology"] == "right").sum()
nan_count = df["ideology"].isna().sum()

# Print results
print(f"Left count: {left_count}")
print(f"Right count: {right_count}")
print(f"NaN count: {nan_count}")


Count likes and followers

TODO: Number of views and likes

In [None]:
import requests
import re

def get_video_likes_and_creator_followers(video_id: str, creator_id: str):

    # Construct video URL and creator URL
    video_url = f"https://www.tiktok.com/@{creator_id}/video/{video_id}"
    creator_url = f"https://www.tiktok.com/@{creator_id}"
    
    # Set up headers to mimic a real browser request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0",
        "Referer": "https://www.tiktok.com/",
        "Accept-Language": "en-US,en;q=0.9",
        "Connection": "keep-alive",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Upgrade-Insecure-Requests": "1"
    }

    # Fetch video page content
    try:
        video_response = requests.get(video_url, headers=headers)
        video_response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching video page: {e}")
        return None, None

    video_html = video_response.text
    # Debugging
    # print(video_html)
    print(f"Fetched video page HTML for video {video_id}")

    # Extract likes and followers using regex
    try:
        # Search for likes (TODO: regex must be verified)
        likes_match = re.search(r'"likeCount":(\d+)', video_html)
        if likes_match:
            likes = int(likes_match.group(1))
        else:
            print(f"Likes not found for video {video_id}")
            likes = None
        
        # Fetch creator page content to get followers count
        creator_response = requests.get(creator_url, headers=headers)
        creator_response.raise_for_status()

        creator_html = creator_response.text

        # Extract followers using regex
        followers_match = re.search(r'"followerCount":(\d+)', creator_html)
        if followers_match:
            followers = int(followers_match.group(1))
        else:
            print(f"Followers not found for creator {creator_id}")
            followers = None
        
        return likes, followers
    except Exception as e:
        print(f"Error extracting data for video {video_id} and creator {creator_id}: {e}")
        return None, None


def add_likes_and_followers_to_dataframe(df):
    
    # Initialize new columns
    df["likes"] = None
    df["followers"] = None

    # Loop over rows and extract likes and followers
    for index, row in df.iterrows():
        video_id = row["video_id"]
        creator_id = row["creator_id"]
        
        likes, followers = get_video_likes_and_creator_followers(video_id, creator_id)
        
        # Assign the extracted values to the respective columns
        df.at[index, "likes"] = likes
        df.at[index, "followers"] = followers

    return df


In [None]:
# example_df = df.head(5)

In [None]:
df = add_likes_and_followers_to_dataframe(df)

df.head()


Store the result

In [None]:
df.to_csv("final_dataset.csv", index=False)