# Import Statements

In [21]:
import re
import sklearn
import pandas as pd
import gensim
import numpy as np
import nltk
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
from gensim import corpora, models
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA
from sklearn.naive_bayes import MultinomialNB
from sklearn.decomposition import TruncatedSVD, NMF
from sklearn.metrics import classification_report
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Data Preperation


In [2]:
# Importing all related Datasets, cleaning, pre-processing and creating a balanced dataset 
# We know that our dataset is balanced across 3 categories of sports, movie and books
# So each category will be assigned 1/3 of the dataset...

fifa_df = pd.read_csv('fifa.csv')
fifa_df.dropna(inplace=True)
fifa_df = fifa_df.drop(['Unnamed: 0', 'Date Created', 'Number of Likes', 'Source of Tweet', 'Sentiment'],axis = 1)
fifa_df.rename(columns={'Tweet':'text'}, inplace=True)
fifa_df['topic'] = 'sports'
fifa = fifa_df.sample(n=1000, random_state=1)

In [3]:
fifa

Unnamed: 0,text,topic
21402,a fan chant jsjzhdhd #Dreamers2022 #WorldCup20...,sports
3075,Argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,sports
1428,"It is not a political message, it is our count...",sports
4286,Fuck Boudiaf! \n@EnnerValencia14 is is inviola...,sports
12262,Turning 30 and Unmarried Felt Like I Underachi...,sports
...,...,...
3094,@JohnMcMah0n Thanks for your #WorldCup2022 #Re...,sports
19,@ALosersLament @Majstar7 @piersmorgan racist u...,sports
10012,Plot Twist: The refs were the ones really paid...,sports
2220,The Ministry announces facilities to attract f...,sports


In [4]:
nba = pd.read_csv('NBA.csv')
nba.dropna(inplace=True)
nba = nba[['text']]
nba['topic'] = 'sports'
nba = nba.sample(n=1000, random_state=1)

In [5]:
nba

Unnamed: 0,text,topic
4546,it's a wrap! #DenverNuggets #NBAWesternFinals,sports
5860,#BostonCeltics need to stop playing son I’m ge...,sports
13820,"Great season, ain’t no complaints #HEATCulture...",sports
16767,🔥 Tyler Herro Debut RC LOT 🔥 Bid @ $16.50 👀📈📈📈...,sports
3679,"Vibe Czech 1, 2... Buena Vibras coming soon......",sports
...,...,...
7286,"Igoudala has passed up 2 chances now, he had t...",sports
15208,I'm I'm I'm gonna keep it together but but wha...,sports
16945,On to the Next 💪🏾🏀 #miamiheat https://t.co/r2w...,sports
18499,Iggy Iggy can’t you see... Sometimes your mo...,sports


In [6]:
nfl = pd.read_csv('NFL.csv')
nfl.dropna(inplace=True)
nfl = nfl[['text']]
nfl['topic'] = 'sports'
nfl = nfl.sample(n=1000, random_state=1)

In [7]:
nfl

Unnamed: 0,text,topic
13830,Yeah if we had picked Fulton instead Id be thr...,sports
18025,So you would rather have Troy Dye? Hall couldv...,sports
69758,KJ please and this is an A draft,sports
84305,Yall got anymore of them LBs? Giants,sports
21698,For real. If we get 2 or 3 long starters thats...,sports
...,...,...
94383,Every GM in the league would take 78 2nd round...,sports
69898,I really want every WR we picked in this draft...,sports
88073,You are not wrong and I totally agree with you...,sports
23047,Considering how rare it is for a team with an ...,sports


Total number of sport related text is 3000, so we need 3000 for movie and 3000 for books...

In [8]:
movies_df = pd.read_csv('Movies3.csv')
movies_df = movies_df[['text']]
movies_df.dropna(inplace=True)
movies_df['topic'] = 'movie'
movies = movies_df.sample(n=3000, random_state=1)

In [9]:
movies

Unnamed: 0,text,topic
98387,They were huge with my skater group in the lat...,movie
1156777,What the heck Weren t Jackie Chan Bruce Lee ...,movie
570010,Thief 1981 To Live and Die in LA 1985,movie
451819,Live action the Imam from the Riddick movies ...,movie
954418,Isn t it great It s in my top three of Altman s,movie
...,...,...
325355,Hey Mr Balfe first of all a big giant thank y...,movie
568285,Map of Tiny Perfect Things is my favorite Rea...,movie
1490155,Meh They are rich and insured They ll be fin...,movie
2279316,shrek from the movie shrek also rowley from t...,movie


In [10]:
books_df = pd.read_csv('Books_rating.csv')
books_df.dropna(inplace=True)
books_df = books_df[['review/text']]
books_df.rename(columns={'review/text': 'text'}, inplace=True)
books_df['topic'] = 'book'
books = books_df.sample(n=3000, random_state=1)

In [11]:
books

Unnamed: 0,text,topic
1898915,"This is an great topical reference, I use it q...",book
286070,I re-read this book every year at the start of...,book
885833,I don't know how I stumbled on this book but I...,book
2075898,I was very disappointed in this book. The plot...,book
2821735,Beautiufl coloring book. I bought these for my...,book
...,...,...
1694666,This book was a joy to read! It shows how rewa...,book
2957488,While I'm sure wrestling fans will be enthrall...,book
2133252,My book arrived in perfect shape and in a spee...,book
131723,"Christopher Phillips, in the true tradition es...",book


In [12]:
# Concatenating all the df's into one...
df = pd.concat([fifa,movies,nba,nfl,books], ignore_index=True)

In [13]:
df

Unnamed: 0,text,topic
0,a fan chant jsjzhdhd #Dreamers2022 #WorldCup20...,sports
1,Argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,sports
2,"It is not a political message, it is our count...",sports
3,Fuck Boudiaf! \n@EnnerValencia14 is is inviola...,sports
4,Turning 30 and Unmarried Felt Like I Underachi...,sports
...,...,...
8995,This book was a joy to read! It shows how rewa...,book
8996,While I'm sure wrestling fans will be enthrall...,book
8997,My book arrived in perfect shape and in a spee...,book
8998,"Christopher Phillips, in the true tradition es...",book


# BERT (Sentiment)

In [145]:
analyzer = SentimentIntensityAnalyzer()

df['text_cleaned'] = df['text'].str.lower()
df['score'] = df['text_cleaned'].apply(lambda review: analyzer.polarity_scores(review))
df['compound'] = df['text_cleaned'].apply(lambda review: analyzer.polarity_scores(review.lower())['compound'])
df['Sentiment'] = df['compound'].apply(lambda x: 'positive' if x >= 0.05 else 'negative' if x < -0.05 else 'neutral')

In [146]:
df

Unnamed: 0,text,topic,text_cleaned,score,compound,Sentiment
0,a fan chant jsjzhdhd #Dreamers2022 #WorldCup20...,sports,a fan chant jsjzhdhd #dreamers2022 #worldcup20...,"{'neg': 0.0, 'neu': 0.723, 'pos': 0.277, 'comp...",0.3182,positive
1,Argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,sports,argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,"{'neg': 0.0, 'neu': 0.756, 'pos': 0.244, 'comp...",0.6369,positive
2,"It is not a political message, it is our count...",sports,"it is not a political message, it is our count...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral
3,Fuck Boudiaf! \n@EnnerValencia14 is is inviola...,sports,fuck boudiaf! \n@ennervalencia14 is is inviola...,"{'neg': 0.387, 'neu': 0.613, 'pos': 0.0, 'comp...",-0.5848,negative
4,Turning 30 and Unmarried Felt Like I Underachi...,sports,turning 30 and unmarried felt like i underachi...,"{'neg': 0.0, 'neu': 0.82, 'pos': 0.18, 'compou...",0.6908,positive
...,...,...,...,...,...,...
8995,This book was a joy to read! It shows how rewa...,book,this book was a joy to read! it shows how rewa...,"{'neg': 0.016, 'neu': 0.728, 'pos': 0.256, 'co...",0.9894,positive
8996,While I'm sure wrestling fans will be enthrall...,book,while i'm sure wrestling fans will be enthrall...,"{'neg': 0.15, 'neu': 0.674, 'pos': 0.175, 'com...",0.5939,positive
8997,My book arrived in perfect shape and in a spee...,book,my book arrived in perfect shape and in a spee...,"{'neg': 0.0, 'neu': 0.699, 'pos': 0.301, 'comp...",0.7841,positive
8998,"Christopher Phillips, in the true tradition es...",book,"christopher phillips, in the true tradition es...","{'neg': 0.0, 'neu': 0.807, 'pos': 0.193, 'comp...",0.7845,positive


In [147]:
value_counts = df['Sentiment'].value_counts()
print("Occurrences of each distinct value in the column:")
print(value_counts)

Occurrences of each distinct value in the column:
Sentiment
positive    5375
negative    1833
neutral     1792
Name: count, dtype: int64


In [148]:
mapping = {'negative': 0, 'positive': 1, 'neutral': 2}
for index, row in df.iterrows():
    if row['Sentiment'] in mapping:
        df.at[index, 'Sentiment_Index'] = int(mapping[row['Sentiment']])

In [149]:
df

Unnamed: 0,text,topic,text_cleaned,score,compound,Sentiment,Sentiment_Index
0,a fan chant jsjzhdhd #Dreamers2022 #WorldCup20...,sports,a fan chant jsjzhdhd #dreamers2022 #worldcup20...,"{'neg': 0.0, 'neu': 0.723, 'pos': 0.277, 'comp...",0.3182,positive,1.0
1,Argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,sports,argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,"{'neg': 0.0, 'neu': 0.756, 'pos': 0.244, 'comp...",0.6369,positive,1.0
2,"It is not a political message, it is our count...",sports,"it is not a political message, it is our count...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
3,Fuck Boudiaf! \n@EnnerValencia14 is is inviola...,sports,fuck boudiaf! \n@ennervalencia14 is is inviola...,"{'neg': 0.387, 'neu': 0.613, 'pos': 0.0, 'comp...",-0.5848,negative,0.0
4,Turning 30 and Unmarried Felt Like I Underachi...,sports,turning 30 and unmarried felt like i underachi...,"{'neg': 0.0, 'neu': 0.82, 'pos': 0.18, 'compou...",0.6908,positive,1.0
...,...,...,...,...,...,...,...
8995,This book was a joy to read! It shows how rewa...,book,this book was a joy to read! it shows how rewa...,"{'neg': 0.016, 'neu': 0.728, 'pos': 0.256, 'co...",0.9894,positive,1.0
8996,While I'm sure wrestling fans will be enthrall...,book,while i'm sure wrestling fans will be enthrall...,"{'neg': 0.15, 'neu': 0.674, 'pos': 0.175, 'com...",0.5939,positive,1.0
8997,My book arrived in perfect shape and in a spee...,book,my book arrived in perfect shape and in a spee...,"{'neg': 0.0, 'neu': 0.699, 'pos': 0.301, 'comp...",0.7841,positive,1.0
8998,"Christopher Phillips, in the true tradition es...",book,"christopher phillips, in the true tradition es...","{'neg': 0.0, 'neu': 0.807, 'pos': 0.193, 'comp...",0.7845,positive,1.0


In [150]:
sentiment_groups = [df[df['Sentiment_Index'] == sentiment] for sentiment in [0, 1, 2]]

In [151]:
balanced_groups = [group.sample(n=1750) for group in sentiment_groups]

In [152]:
balanced_groups

[                                                   text   topic  \
 2280  Sloth was an establish criminal  notice how th...   movie   
 51    Now that the Qatar World Cup has begun, I take...  sports   
 230              What the hell!!! #rigged #WorldCup2022  sports   
 5641  idk Veach pulled salary cap out of his asshole...  sports   
 8961  Tremendously comprehensive. Having been a vege...    book   
 ...                                                 ...     ...   
 2196                                         Star Wars    movie   
 6363  Do not waste your money on this book! I was ve...    book   
 1405  Taps was the movie that helped make Tom Cruise...   movie   
 1876         I think worse is Depp and Marion Cotillard   movie   
 3530  Yep  and what d we get for all of that waiting...   movie   
 
                                            text_cleaned  \
 2280  sloth was an establish criminal  notice how th...   
 51    now that the qatar world cup has begun, i take...   
 2

In [153]:
df

Unnamed: 0,text,topic,text_cleaned,score,compound,Sentiment,Sentiment_Index
0,a fan chant jsjzhdhd #Dreamers2022 #WorldCup20...,sports,a fan chant jsjzhdhd #dreamers2022 #worldcup20...,"{'neg': 0.0, 'neu': 0.723, 'pos': 0.277, 'comp...",0.3182,positive,1.0
1,Argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,sports,argentina since day 1🇦🇷🤍💙 \n#worldcup2022 #leo...,"{'neg': 0.0, 'neu': 0.756, 'pos': 0.244, 'comp...",0.6369,positive,1.0
2,"It is not a political message, it is our count...",sports,"it is not a political message, it is our count...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
3,Fuck Boudiaf! \n@EnnerValencia14 is is inviola...,sports,fuck boudiaf! \n@ennervalencia14 is is inviola...,"{'neg': 0.387, 'neu': 0.613, 'pos': 0.0, 'comp...",-0.5848,negative,0.0
4,Turning 30 and Unmarried Felt Like I Underachi...,sports,turning 30 and unmarried felt like i underachi...,"{'neg': 0.0, 'neu': 0.82, 'pos': 0.18, 'compou...",0.6908,positive,1.0
...,...,...,...,...,...,...,...
8995,This book was a joy to read! It shows how rewa...,book,this book was a joy to read! it shows how rewa...,"{'neg': 0.016, 'neu': 0.728, 'pos': 0.256, 'co...",0.9894,positive,1.0
8996,While I'm sure wrestling fans will be enthrall...,book,while i'm sure wrestling fans will be enthrall...,"{'neg': 0.15, 'neu': 0.674, 'pos': 0.175, 'com...",0.5939,positive,1.0
8997,My book arrived in perfect shape and in a spee...,book,my book arrived in perfect shape and in a spee...,"{'neg': 0.0, 'neu': 0.699, 'pos': 0.301, 'comp...",0.7841,positive,1.0
8998,"Christopher Phillips, in the true tradition es...",book,"christopher phillips, in the true tradition es...","{'neg': 0.0, 'neu': 0.807, 'pos': 0.193, 'comp...",0.7845,positive,1.0


In [157]:
balanced_df = pd.concat(balanced_groups)

In [158]:
balanced_df

Unnamed: 0,text,topic,text_cleaned,score,compound,Sentiment,Sentiment_Index
2280,Sloth was an establish criminal notice how th...,movie,sloth was an establish criminal notice how th...,"{'neg': 0.111, 'neu': 0.812, 'pos': 0.077, 'co...",-0.4946,negative,0.0
51,"Now that the Qatar World Cup has begun, I take...",sports,"now that the qatar world cup has begun, i take...","{'neg': 0.088, 'neu': 0.912, 'pos': 0.0, 'comp...",-0.4404,negative,0.0
230,What the hell!!! #rigged #WorldCup2022,sports,what the hell!!! #rigged #worldcup2022,"{'neg': 0.727, 'neu': 0.273, 'pos': 0.0, 'comp...",-0.8392,negative,0.0
5641,idk Veach pulled salary cap out of his asshole...,sports,idk veach pulled salary cap out of his asshole...,"{'neg': 0.08, 'neu': 0.92, 'pos': 0.0, 'compou...",-0.1027,negative,0.0
8961,Tremendously comprehensive. Having been a vege...,book,tremendously comprehensive. having been a vege...,"{'neg': 0.097, 'neu': 0.817, 'pos': 0.086, 'co...",-0.6324,negative,0.0
...,...,...,...,...,...,...,...
1111,He s gonna lay some cable,movie,he s gonna lay some cable,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
5007,He just needs to hold onto the ball.,sports,he just needs to hold onto the ball.,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
2922,RETAAIINER Retainer,movie,retaaiiner retainer,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
4041,Was it though?🤔👇🏽 #backsportspage #brooklynnet...,sports,was it though?🤔👇🏽 #backsportspage #brooklynnet...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0


In [159]:
df = balanced_df.sample(frac=1).reset_index(drop=True)

In [156]:
df

Unnamed: 0,text,topic,text_cleaned,score,compound,Sentiment,Sentiment_Index
0,"As a Pastor,this book is so good I give a copy...",book,"as a pastor,this book is so good i give a copy...","{'neg': 0.0, 'neu': 0.87, 'pos': 0.13, 'compou...",0.6826,positive,1.0
1,Mmm that is one tasty burger,movie,mmm that is one tasty burger,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
2,Official Poster for Ruby Gillman Teenage Kra...,movie,official poster for ruby gillman teenage kra...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
3,Wtf are you talking about,movie,wtf are you talking about,"{'neg': 0.487, 'neu': 0.513, 'pos': 0.0, 'comp...",-0.5859,negative,0.0
4,He s talking about a retired investor,movie,he s talking about a retired investor,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
...,...,...,...,...,...,...,...
5245,Good book but I don't recommended it for no on...,book,good book but i don't recommended it for no on...,"{'neg': 0.094, 'neu': 0.866, 'pos': 0.039, 'co...",-0.4094,negative,0.0
5246,How was the first match at Qatar #WorldCup2022 ?,sports,how was the first match at qatar #worldcup2022 ?,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,neutral,2.0
5247,"I am a rape survivor and yes, it can lead to T...",book,"i am a rape survivor and yes, it can lead to t...","{'neg': 0.31, 'neu': 0.548, 'pos': 0.142, 'com...",-0.9199,negative,0.0
5248,Spoilers for Peacemaker I always thought it ...,movie,spoilers for peacemaker i always thought it ...,"{'neg': 0.02, 'neu': 0.805, 'pos': 0.175, 'com...",0.9509,positive,1.0


In [160]:


df1 = pd.DataFrame(columns=['data', 'target'])

df1['data'] = df['text_cleaned']
df1['target'] = df['Sentiment_Index']


categories = ['positive', 'neutral', 'negative']
train_set_S = df1.sample(frac=0.8, random_state=1)
test_set_S = df1.drop(train_set_S.index)

train_set_S

train = pd.DataFrame({'text': train_set_S.data, 'target': train_set_S.target})

test = pd.DataFrame({'text': test_set_S.data, 'target': test_set_S.target})


train, dev = train_test_split(train, test_size=0.15, random_state=1, 
                               stratify=train[['target']])

# Model configuration # https://simpletransformers.ai/docs/usage/#configuring-a-simple-transformers-model 
model_args = ClassificationArgs()

model_args.overwrite_output_dir=True # overwrite existing saved models in the same directory
model_args.evaluate_during_training=True # to perform evaluation while training the model
# (eval data should be passed to the training method)

model_args.num_train_epochs=10 # number of epochs
model_args.train_batch_size=32 # batch size
model_args.learning_rate=4e-6 # learning rate
model_args.max_seq_length=256 # maximum sequence length
# Note! Increasing max_seq_len may provide better performance, but training time will increase. 
# For educational purposes, we set max_seq_len to 256.

# Early stopping to combat overfitting: https://simpletransformers.ai/docs/tips-and-tricks/#using-early-stopping
model_args.use_early_stopping=True
model_args.early_stopping_delta=0.01 # "The improvement over best_eval_loss necessary to count as a better checkpoint"
model_args.early_stopping_metric='eval_loss'
model_args.early_stopping_metric_minimize=True
model_args.early_stopping_patience=2
model_args.evaluate_during_training_steps=32 # how often you want to run validation in terms of training steps (or batches)

# Checking steps per epoch
steps_per_epoch = int(np.ceil(len(train) / float(model_args.train_batch_size)))
print('Each epoch will have {:,} steps.'.format(steps_per_epoch)) # 64 steps = validating 2 times per epoch

model = ClassificationModel('bert', 'bert-base-cased', num_labels=3, args=model_args, use_cuda=True) # CUDA is enabled

_, history = model.train_model(train, eval_df=dev) 

Each epoch will have 112 steps.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
8it [00:23,  2.99s/it]                       
Epoch 1 of 10:   0%|          | 0/10 [00:00<?, ?it/s]
Running Epoch 1 of 10:   0%|          | 0/112 [00:00<?, ?it/s][A
Epochs 1/10. Running Loss:    1.2009:   0%|          | 0/112 [00:08<?, ?it/s][A
Epochs 1/10. Running Loss:    1.2009:   1%|          | 1/112 [00:09<17:32,  9.48s/it][A
Epochs 1/10. Running Loss:    1.1969:   1%|          | 1/112 [00:13<17:32,  9.48s/it][A
Epochs 1/10. Running Loss:    1.1969:   2%|▏         | 2/112 [00:16<14:13,  7.76s/it][A
Epochs 1/10. Running Loss:    1.1436:   2%|▏         | 2/112 [00:16<14:13,  7.76s/it][A
Epochs 1/10. Running Loss:    1.1436:   3%|▎         | 3/112 [00:18<09:55,  5.46s/it][A
Epochs 1/10. Runn

In [161]:
for index, row in df_test.iterrows():
    if row['sentiment'] in mapping:
        df_test.at[index, 'sentiment'] = mapping[row['sentiment']]

df_test

predicted, probabilities = model.predict(df_test.text.to_list())
df_test['predicted_sentiment'] = predicted

df_test

1it [00:11, 11.44s/it]
100%|██████████| 1/1 [00:00<00:00,  2.76it/s]


Unnamed: 0,sentence id,text,sentiment,topic,Predicted Topic,Predicted_Topic_LDA,Predicted_Topic_MNB,predicted_sentiment
0,0,I wouldn't be caught dead watching the NFL if ...,0,sports,1,1,movie,0
1,1,Chris O'Donnell stated that while filming for ...,2,movie,1,1,movie,0
2,2,"The whole game was a rollercoaster ride, but L...",1,sports,1,1,sports,0
3,3,"Zendaya slayed in Dune 2, as she does in all h...",1,movie,1,1,movie,0
4,4,While my favorite player was playing this matc...,0,sports,1,1,sports,1
5,5,My uncle's brother's neighbor's cat's veterina...,2,book,1,1,movie,0
6,6,He said that The Great Gatsby is the best nove...,0,book,1,1,movie,1
7,7,I could not look away from this train wrck of ...,0,movie,1,1,movie,0
8,8,The film Everything Everywhere All At Once fol...,2,movie,1,1,movie,0
9,9,I just finished reading pride and prejudice wh...,1,book,1,1,book,0
