In [1]:
import json
import requests
import time
import csv
from requests_oauthlib import OAuth1
from urllib.parse import parse_qs
from flickrapi import FlickrAPI
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
import html
import re
from datetime import datetime

In [2]:
API_KEY = '6121804e178a34ebe49444e858987ee5'
API_SECRET = '0995d081c0eccf00'
API_END = 'https://api.flickr.com/services/rest/'
flickr = FlickrAPI(API_KEY, API_SECRET, format='parsed-json')
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/joeyared/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
def search_for_photos(keywords, start_date, end_date, num_images=200):
    start = int(datetime.strptime(start_date, '%Y-%m-%d').timestamp())
    end = int(datetime.strptime(end_date, '%Y-%m-%d').timestamp())
    photos = flickr.photos.search(tags=keywords,
                                  tag_mode='all',
                                  min_upload_date=start, 
                                  max_upload_date=end,
                                  per_page=num_images, 
                                  sort='date-posted-desc', 
                                  extras='date_upload')
    return [photo['id'] for photo in photos['photos']['photo']]

In [4]:
def analyze_sentiment(text):
    scores = sid.polarity_scores(text)
    if scores['compound'] >= 0.05:
        sentiment = 'Positive'
    elif scores['compound'] <= -0.05:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
    return sentiment, scores

In [5]:
def fetch_and_display_comments(image_ids):
    comment_data = []
    for photo_id in image_ids:
        params = {
            'method': 'flickr.photos.comments.getList',
            'photo_id': photo_id,
            'api_key': API_KEY,
            'format': 'json',
            'nojsoncallback': 1
        }
        try: 
            response = requests.get(API_END, params=params)
            response.raise_for_status()
            data = response.json()
            # print(data)
            comments = data.get('comments', {}).get('comment', [])
            
            if comments:  # Only print if there are comments
                for comment in comments:
                    comment_text = html.unescape(comment.get('_content', ''))
                    comment_text = re.sub(r'[^\x00-\x7F]+', '', comment_text)
                    sentiment, score = analyze_sentiment(comment_text)
                    comment_data.append({
                        'photo_id': photo_id,
                        'author': comment.get('authorname', ''),
                        'date': comment.get('datecreate', ''),
                        'comment_text': comment_text, 
                        'sentiment': sentiment, 
                        'sentiment_score': score
                    })
        except requests.exceptions.RequestException as e:
            print(f"Error fetching comments for photo ID {photo_id}: {e}")
    if not comment_data:
        print("No comments found for any of the photos")
    comment_df = pd.DataFrame(comment_data, columns=['photo_id','author','date','comment_text','sentiment','sentiment_score'])
    comment_df['date'] = pd.to_datetime(comment_df['date'], unit='s', errors='coerce').dt.strftime('%Y-%m-%d')
    return comment_df

In [6]:
def main():
    # nltk.download('vader_lexicon')
    # sid = SentimentIntensityAnalyzer()
    search_word = 'Gaza'
    starts = '2023-01-01'
    ends = '2024-11-01'
    image_ids = search_for_photos(search_word, starts, ends)
    if image_ids:
        df = fetch_and_display_comments(image_ids)
        display(df)
        df.to_csv("flickr_gaza_comments2.csv", index=False)
    else:
        print("no images found for this search term")

In [7]:
if __name__ == "__main__":
    main()

Unnamed: 0,photo_id,author,date,comment_text,sentiment,sentiment_score
0,54106893477,GerGray,2024-10-31,Superb stuff.,Positive,"{'neg': 0.0, 'neu': 0.196, 'pos': 0.804, 'comp..."
1,54106893477,bernd.kugow,2024-11-01,I like it.,Positive,"{'neg': 0.0, 'neu': 0.286, 'pos': 0.714, 'comp..."
2,54106893477,Flickr,2024-11-02,"Congrats on Explore! <a href=""https://www.fli...",Positive,"{'neg': 0.0, 'neu': 0.619, 'pos': 0.381, 'comp..."
3,54106893477,pelsedyr,2024-11-02,Congrats on Explore,Positive,"{'neg': 0.0, 'neu': 0.37, 'pos': 0.63, 'compou..."
4,54106893477,s0340248,2024-11-02,Glckwunsch zu Explore !,Neutral,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
...,...,...,...,...,...,...
68,54092594267,"""Adventure before dementia""",2024-10-30,Noam Chomsky speaks the truth.,Positive,"{'neg': 0.0, 'neu': 0.635, 'pos': 0.365, 'comp..."
69,54092594267,amalia_mar,2024-10-30,So true!!!!,Neutral,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
70,54092594267,Khaled M. K. HEGAZY,2024-10-30,[https://www.flickr.com/photos/imagik1] \n [ht...,Positive,"{'neg': 0.0, 'neu': 0.263, 'pos': 0.737, 'comp..."
71,54092594267,I~am~CosMos,2024-11-08,,Neutral,"{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound..."


In [8]:
# gaza_df = pd.read_csv('flickr_gaza_comments.csv')
# gaza_df