# 🎯 **Goal: Sentiment Analysis on Movie Tags**
We’ll analyze the tag_text from your tags_fact table to understand:

- Are users saying positive or negative things?

- What are the most common themes?

In [None]:
# Running this code will query a table in BigQuery and download

%%bigquery tags_df --project movielens-32m
SELECT * FROM `movielens-32m.movielens_clean.tags_fact` #this table name was set based on the table you chose to query

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
tags_df.head()

Unnamed: 0,userId,movieId,tag_text,tag_timestamp
0,58,136020,james bond,2023-01-01 05:31:22+00:00
1,58,63113,007,2023-01-01 05:33:34+00:00
2,58,96079,nostalgic,2023-01-01 05:34:45+00:00
3,58,96079,espionage,2023-01-01 05:34:46+00:00
4,58,96079,ben wishaw,2023-01-01 05:34:56+00:00


**Using NLP to Analyze Sentiment**

we are using ✅ TextBlob (simple & fast)



In [None]:
from textblob import TextBlob

# Apply sentiment analysis
tags_df['polarity'] = tags_df['tag_text'].apply(lambda x: TextBlob(x).sentiment.polarity)

# Classify sentiment
def label_sentiment(score):
    if score > 0.2:
        return 'positive'
    elif score < -0.2:
        return 'negative'
    else:
        return 'neutral'

tags_df['sentiment'] = tags_df['polarity'].apply(label_sentiment)
tags_df.head()


Unnamed: 0,userId,movieId,tag_text,tag_timestamp,polarity,sentiment
0,58,136020,james bond,2023-01-01 05:31:22+00:00,0.0,neutral
1,58,63113,007,2023-01-01 05:33:34+00:00,0.0,neutral
2,58,96079,nostalgic,2023-01-01 05:34:45+00:00,-0.5,negative
3,58,96079,espionage,2023-01-01 05:34:46+00:00,0.0,neutral
4,58,96079,ben wishaw,2023-01-01 05:34:56+00:00,0.0,neutral


**Aggregateing Results by Movie**

In [None]:
sentiment_summary = tags_df.groupby(['movieId', 'sentiment']).size().unstack(fill_value=0).reset_index()

In [None]:
sentiment_summary.head()

sentiment,movieId,negative,neutral,positive
0,1,23,1046,161
1,2,59,505,9
2,3,0,21,2
3,4,0,12,0
4,5,1,58,5


In [None]:
from google.cloud import bigquery

# Initialize BigQuery client
bq_client = bigquery.Client(project="movielens-32m")

# Upload DataFrame to BigQuery
bq_client.load_table_from_dataframe(
    sentiment_summary,
    "movielens-32m.movielens_clean.tags_sentiment_summary"
).result()

print("✅ Sentiment summary uploaded to BigQuery!")



✅ Sentiment summary uploaded to BigQuery!
