In [1]:
import json
import requests
import time
import csv
from requests_oauthlib import OAuth1
from urllib.parse import parse_qs
from flickrapi import FlickrAPI
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
import html
import re
from datetime import datetime

In [2]:
API_KEY = '6121804e178a34ebe49444e858987ee5'
API_SECRET = '0995d081c0eccf00'
API_END = 'https://api.flickr.com/services/rest/'
flickr = FlickrAPI(API_KEY, API_SECRET, format='parsed-json')
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/joeyared/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
def search_for_photos(keywords, start_date, end_date, num_images=400):
    start = int(datetime.strptime(start_date, '%Y-%m-%d').timestamp())
    end = int(datetime.strptime(end_date, '%Y-%m-%d').timestamp())
    photos = flickr.photos.search(tags=keywords,
                                  tag_mode='all',
                                  min_upload_date=start, 
                                  max_upload_date=end,
                                  per_page=num_images, 
                                  sort='date-posted-desc', 
                                  extras='date_upload')
    return [photo['id'] for photo in photos['photos']['photo']]

In [4]:
def analyze_sentiment(text):
    scores = sid.polarity_scores(text)
    if scores['compound'] >= 0.05:
        sentiment = 'Positive'
    elif scores['compound'] <= -0.05:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
    return sentiment, scores

In [5]:
def fetch_and_display_comments(image_ids):
    comment_data = []
    for photo_id in image_ids:
        params = {
            'method': 'flickr.photos.comments.getList',
            'photo_id': photo_id,
            'api_key': API_KEY,
            'format': 'json',
            'nojsoncallback': 1
        }
        try: 
            response = requests.get(API_END, params=params)
            response.raise_for_status()
            data = response.json()
            # print(data)
            comments = data.get('comments', {}).get('comment', [])
            
            if comments:  # Only print if there are comments
                for comment in comments:
                    
                    comment_text = html.unescape(comment.get('_content', ''))
                    comment_text = re.sub(r'https?://\S+', '', comment_text)  # http(s) URLs
                    comment_text = re.sub(r'www\.\S+', '', comment_text)      # www URLs
                    comment_text = re.sub(r'[\w\-]+(\.[\w\-]+)+\.\w+\S*', '', comment_text)  # other URL formats
                    
                    comment_text = re.sub(r'\[.*?\]$', '', comment_text)  # Remove [...] at end
                    comment_text = re.sub(r'\(.*?\)$', '', comment_text)  # Remove (...) at end
    
                    comment_text = re.sub(r'<[^>]+>', '', comment_text)  # HTML tags
                    comment_text = re.sub(r'</?\w+[^>]*>', '', comment_text)  #  remaining tags
                    comment_text = re.sub(r'&[a-z]+;', '', comment_text)  # HTML entities
                    comment_text = re.sub(r'[<>]', '', comment_text)  # stray brackets
                    comment_text = re.sub(r'alt=[\'"]\S+[\'"]', '', comment_text)  # alt attributes
                    comment_text = re.sub(r'/a>', '', comment_text)  # closing a tags
                    comment_text = re.sub(r'[^\x00-\x7F]+', '', comment_text) # non-ASCII
                    comment_text = ' '.join(comment_text.split())
                    comment_text = comment_text.strip()

                    if not comment_text or not comment_text[0].isalpha():
                        continue
                        
                    if comment_text and len(comment_text) > 3:
                        sentiment, score = analyze_sentiment(comment_text)
                        comment_data.append({
                            'photo_id': photo_id,
                            'author': comment.get('authorname', ''),
                            'date': comment.get('datecreate', ''),
                            'comment_text': comment_text, 
                            'sentiment': sentiment, 
                            'sentiment_score': score['compound']
                        })
        except requests.exceptions.RequestException as e:
            print(f"Error fetching comments for photo ID {photo_id}: {e}")
    if not comment_data:
        print("No comments found for any of the photos")
    comment_df = pd.DataFrame(comment_data, columns=['photo_id','author','date','comment_text','sentiment','sentiment_score'])
    comment_df['date'] = pd.to_datetime(comment_df['date'], unit='s', errors='coerce').dt.strftime('%Y-%m-%d')
    return comment_df

In [6]:
def main():
    # nltk.download('vader_lexicon')
    # sid = SentimentIntensityAnalyzer()
    search_words = ['gaza', 'palestine']

    
    search_word = 'Gaza'
    starts = '2023-01-01'
    ends = '2024-11-01'
    image_ids = search_for_photos(search_word, starts, ends)

    
    if image_ids:
        df = fetch_and_display_comments(image_ids)
        display(df)
        df.to_csv("flickr_comments.csv", index=False)
    else:
        print("no images found for this search term")

In [7]:
if __name__ == "__main__":
    main()

  comment_text = re.sub(r'[[\]()]+$', '', comment_text)  # Remove stray brackets at end


Unnamed: 0,photo_id,author,date,comment_text,sentiment,sentiment_score
0,54106893477,GerGray,2024-10-31,Superb stuff.,Positive,0.6249
1,54106893477,bernd.kugow,2024-11-01,I like it.,Positive,0.3612
2,54106893477,Flickr,2024-11-02,Congrats on Explore!,Positive,0.5707
3,54106893477,pelsedyr,2024-11-02,Congrats on Explore,Positive,0.5267
4,54106893477,s0340248,2024-11-02,Glckwunsch zu Explore !,Neutral,0.0
5,54106893477,Sigurd Krieger,2024-11-02,Congrats on Xplore!!,Positive,0.6103
6,54106893477,V A N D E E,2024-11-02,"Congrats on Explore, love this capture!",Positive,0.8356
7,54106893477,Francesco Dini,2024-11-02,Beautiful photo and congrats on Explore,Positive,0.8074
8,54106893477,Marut Rata,2024-11-02,Congrats on E X P L O R E !,Positive,0.5707
9,54106893477,xprocessed,2024-11-02,Congratulations [ for being showcased on Explore!,Positive,0.636
