# TikTok Video Popularity Prediction

This notebook is a concise prototype for predicting whether a TikTok video will be popular. It loads a tabular dataset of TikTok videos, defines a binary target variable based on likes, engineers simple features, trains a Random Forest classifier, and evaluates its performance. Comments throughout explain each step.

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [None]:
# Load dataset
DATA_FILE = 'tiktok_video_performance.csv'  # Update if needed
df = pd.read_csv(DATA_FILE)
print(df.head())


In [None]:
# Define target variable
median_likes = df['likes'].median()
df['is_popular'] = (df['likes'] > median_likes).astype(int)
print('Median likes threshold:', median_likes)


In [None]:
# Feature engineering and model training
df['num_hashtags'] = df['hashtags'].fillna('').apply(lambda x: len(x.split()))
X = df[['comments', 'shares', 'views', 'num_hashtags']].fillna(0)
y = df['is_popular']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
