In [1]:
# Import the necessary libraries
import requests  # This library is the best for making HTTP requests to websites and APIs.
import json      # This library helps us work with the JSON data format, which is how the API will send us data.
import time      # This library lets us pause our script, which is important for being polite to the API.
import pandas as pd # This is the most powerful library for data analysis and manipulation in Python. We'll use it to create our table.

In [2]:
# Set up your API key and the base URL for the API
MY_API_KEY = 'ebb80615-f6aa-4e67-a43e-b3831aa40917' # <-- PASTE YOUR KEY HERE
BASE_URL = 'https://content.guardianapis.com/search'

# Define the parameters for our search
# We are searching for articles about "Narendra Modi"
search_query = 'Narendra Modi'

In [3]:
# --- Part 1: Set up the session and parameters ---
# A session is a more efficient way to make multiple requests to the same website.
my_session = requests.Session()

# These are the specific details we send with our request.
params = {
    'q': search_query,
    'api-key': MY_API_KEY,
    'page-size': 50,  # Ask for 50 articles per page.
    'page': 1,        # Start with page 1.
    'show-fields': 'wordcount', # Ask for the wordcount of each article.
    'show-tags': 'contributor' # Ask for the author (contributor) tags.
}

# --- Part 2: The Loop ---
all_results = [] # Create an empty list to store all the articles we find.
page_num = 1
total_pages = 1 # We'll update this after the first request.

print("Starting to fetch data...")

while page_num <= total_pages:
    print(f"Fetching page {page_num}...")
    params['page'] = page_num

    try:
        response = my_session.get(BASE_URL, params=params)
        response.raise_for_status()  # This will raise an error for bad responses (like 404 or 500)
        data = response.json()['response']

        # Add the articles from this page to our main list.
        all_results.extend(data['results'])

        # After the first request, find out the total number of pages.
        if page_num == 1:
            total_pages = data['pages']
            print(f"Total pages found: {total_pages}")

        # Move to the next page for the next loop iteration.
        page_num += 1

        # Be polite to the API - wait 1 second before the next request.
        time.sleep(1)

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        break
    except KeyError:
        print("Could not find 'response' or 'results' in the data. Stopping.")
        break

print("Finished fetching all data!")

Starting to fetch data...
Fetching page 1...
Total pages found: 83
Fetching page 2...
Fetching page 3...
Fetching page 4...
Fetching page 5...
Fetching page 6...
Fetching page 7...
Fetching page 8...
Fetching page 9...
Fetching page 10...
Fetching page 11...
Fetching page 12...
Fetching page 13...
Fetching page 14...
Fetching page 15...
Fetching page 16...
Fetching page 17...
Fetching page 18...
Fetching page 19...
Fetching page 20...
Fetching page 21...
Fetching page 22...
Fetching page 23...
Fetching page 24...
Fetching page 25...
Fetching page 26...
Fetching page 27...
Fetching page 28...
Fetching page 29...
Fetching page 30...
Fetching page 31...
Fetching page 32...
Fetching page 33...
Fetching page 34...
Fetching page 35...
Fetching page 36...
Fetching page 37...
Fetching page 38...
Fetching page 39...
Fetching page 40...
Fetching page 41...
Fetching page 42...
Fetching page 43...
Fetching page 44...
Fetching page 45...
Fetching page 46...
Fetching page 47...
Fetching page 48...
F

In [4]:
# Convert the list of results into a pandas DataFrame
df = pd.DataFrame(all_results)

# Display the first 5 rows of the table to see what we've got
df.head()

Unnamed: 0,id,type,sectionId,sectionName,webPublicationDate,webTitle,webUrl,apiUrl,fields,tags,isHosted,pillarId,pillarName
0,world/2025/jun/06/canada-prime-minister-g7-inv...,article,world,World news,2025-06-06T20:58:48Z,Canada’s PM faces backlash for inviting India’...,https://www.theguardian.com/world/2025/jun/06/...,https://content.guardianapis.com/world/2025/ju...,{'wordcount': '569'},"[{'id': 'profile/leyland-cecco', 'type': 'cont...",False,pillar/news,News
1,world/2024/nov/18/narendra-modi-to-make-histor...,article,world,World news,2024-11-18T05:00:29Z,Narendra Modi to make ‘historic’ Guyana visit ...,https://www.theguardian.com/world/2024/nov/18/...,https://content.guardianapis.com/world/2024/no...,{'wordcount': '610'},"[{'id': 'profile/natricia-duncan', 'type': 'co...",False,pillar/news,News
2,commentisfree/2025/sep/01/the-guardian-view-on...,article,commentisfree,Opinion,2025-09-01T17:45:33Z,The Guardian view on Donald Trump and India: t...,https://www.theguardian.com/commentisfree/2025...,https://content.guardianapis.com/commentisfree...,{'wordcount': '590'},"[{'id': 'profile/editorial', 'type': 'contribu...",False,pillar/opinion,Opinion
3,world/2025/apr/25/domestic-pressures-shaping-i...,article,world,World news,2025-04-25T17:49:44Z,The domestic pressures shaping India’s respons...,https://www.theguardian.com/world/2025/apr/25/...,https://content.guardianapis.com/world/2025/ap...,{'wordcount': '792'},"[{'id': 'profile/penelope-macrae', 'type': 'co...",False,pillar/news,News
4,world/2024/nov/04/canada-hindu-temple-attack-modi,article,world,World news,2024-11-04T17:21:06Z,Narendra Modi condemns attack on Hindu temple ...,https://www.theguardian.com/world/2024/nov/04/...,https://content.guardianapis.com/world/2024/no...,{'wordcount': '373'},"[{'id': 'profile/leyland-cecco', 'type': 'cont...",False,pillar/news,News


In [5]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [6]:
# Set your project ID, dataset name, and table name
project_id = 'guardian-news-analysis' # <-- PASTE YOUR PROJECT ID HERE
dataset_name = 'guardian_news'
table_name = 'articles'

# Combine them into a full table ID for pandas
table_id = f'{project_id}.{dataset_name}.{table_name}'

In [7]:
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client(project=project_id)

# Create the dataset
dataset_id = f"{project_id}.{dataset_name}"
dataset = bigquery.Dataset(dataset_id)
dataset.location = "US" # You can choose other locations, like "EU" or "asia-south1"

try:
    client.get_dataset(dataset_id) # Make an API request.
    print(f"Dataset {dataset_name} already exists.")
except Exception:
    dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
    print(f"Created dataset {project_id}.{dataset.dataset_id}")

Created dataset guardian-news-analysis.guardian_news


In [8]:
# The 'tags' column is a list of dictionaries, which BigQuery can't handle directly.
# Let's convert it to a simple string representation (JSON string) before uploading.
df_for_upload = df.copy() # Make a copy to keep our original DataFrame intact
df_for_upload['tags'] = df_for_upload['tags'].apply(lambda x: json.dumps(x))

# Now, upload the DataFrame to BigQuery
print(f"Uploading data to table: {table_id}")
df_for_upload.to_gbq(
    destination_table=table_id,
    project_id=project_id,
    if_exists='replace' # This means if the table already exists, overwrite it.
)

print("Upload complete!")

Uploading data to table: guardian-news-analysis.guardian_news.articles


  df_for_upload.to_gbq(
100%|██████████| 1/1 [00:00<00:00, 2255.00it/s]

Upload complete!





In [2]:
# Step 1: Import necessary libraries and authenticate
from google.colab import auth
from google.cloud import bigquery
import pandas as pd
from textblob import TextBlob

# This will prompt you to click a link to authorize your account
auth.authenticate_user()
print('Authentication successful.')

# Step 2: Set up your project and table details
project_id = 'guardian-news-analysis' # <--- IMPORTANT: REPLACE WITH YOUR PROJECT ID
dataset_name = 'guardian_news'
source_table_name = 'articles_cleaned'
destination_table_name = 'articles_with_sentiment'

source_table_id = f"{project_id}.{dataset_name}.{source_table_name}"
destination_table_id = f"{project_id}.{dataset_name}.{destination_table_name}"

# Initialize the BigQuery client
client = bigquery.Client(project=project_id)
print(f"BigQuery client initialized for project: {project_id}")

# Step 3: Fetch your cleaned data from BigQuery
print(f"Fetching data from {source_table_id}...")
sql = f"SELECT * FROM `{source_table_id}`"
df = client.query(sql).to_dataframe()
print(f"Successfully fetched {len(df)} rows.")

# Step 4: Perform the sentiment analysis on the headlines
print("Analyzing sentiment of article headlines...")

def get_polarity(text):
    if isinstance(text, str):
        return TextBlob(text).sentiment.polarity
    return 0.0

def get_subjectivity(text):
    if isinstance(text, str):
        return TextBlob(text).sentiment.subjectivity
    return 0.0

df['sentiment_polarity'] = df['webTitle'].apply(get_polarity)
df['sentiment_subjectivity'] = df['webTitle'].apply(get_subjectivity)

print("Sentiment analysis complete.")
print("Here's a sample of the data with the new sentiment scores:")
print(df[['webTitle', 'sentiment_polarity', 'sentiment_subjectivity']].head())

# Step 5: Upload the new, enriched data back to a new BigQuery table
print(f"Uploading results to new table: {destination_table_id}...")

df.to_gbq(
    destination_table=destination_table_id,
    project_id=project_id,
    if_exists='replace'
)

print("Upload complete! Your new table 'articles_with_sentiment' is ready in BigQuery.")

Authentication successful.
BigQuery client initialized for project: guardian-news-analysis
Fetching data from guardian-news-analysis.guardian_news.articles_cleaned...
Successfully fetched 8766 rows.
Analyzing sentiment of article headlines...
Sentiment analysis complete.
Here's a sample of the data with the new sentiment scores:
                                            webTitle  sentiment_polarity  \
0  Artist of the week 113: Toby Ziegler | Skye Sh...                 0.0   
1  Sydney Biennale 2016:  Belgiorno-Nettis family...                 0.0   
2                  Five of the best… art exhibitions                 0.0   
3  Share your photos of air quality around the world                 0.0   
4  Modigliani review – you think you know him? Tu...                 0.0   

   sentiment_subjectivity  
0                     0.0  
1                     0.0  
2                     0.0  
3                     0.0  
4                     0.0  
Uploading results to new table: guardian-new

  df.to_gbq(
100%|██████████| 1/1 [00:00<00:00, 8456.26it/s]

Upload complete! Your new table 'articles_with_sentiment' is ready in BigQuery.



