In [2]:
import pandas as pd
import requests
import concurrent.futures
from textblob import TextBlob

API_KEY = "abd46575-1136-4a4b-8868-d5d223a9f1e1"
BASE_URL = "https://content.guardianapis.com/technology/apple?from-date=2023-07-01&api-key={}&type=article&page=".format(API_KEY)

def get_urls(base_url, num_pages):
    return [base_url + str(i) for i in range(1, num_pages + 1)]

def fetch_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        return None

def fetch_article_content(api_url):
    try:
        response = requests.get(api_url + "?api-key=" + API_KEY + "&show-fields=body,headline")
        response.raise_for_status()
        data = response.json()
        content = data['response']['content']['fields'].get('body', 'No content available')
        headline = data['response']['content']['fields'].get('headline', 'No headline available')
        return content, headline
    except requests.exceptions.RequestException as e:
        print(f"Error fetching article content from {api_url}: {e}")
        return None, None
    except KeyError as e:
        print(f"Error parsing article content: {e}")
        return None, None

def extract_info(json_data):
    if not json_data:
        return []
    try:
        articles = json_data['response']['results']
        return [
            {
                'title': article['webTitle'],
                'sectionname': article['sectionName'],
                'publisheddate': article['webPublicationDate'],
                'api_url': article['apiUrl']
            } for article in articles
        ]
    except KeyError as e:
        print(f"Error parsing data: {e}")
        return []

def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity, blob.sentiment.subjectivity

def main():
    urls = get_urls(BASE_URL, 13)
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        json_data_list = list(executor.map(fetch_data, urls))
    
    info = []
    for json_data in json_data_list:
        info.extend(extract_info(json_data))
    
    if info:
        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
            content_and_headlines = list(executor.map(fetch_article_content, [article['api_url'] for article in info]))
        
        for i, (content, headline) in enumerate(content_and_headlines):
            if content and headline:
                info[i]['content'] = content
                info[i]['headline'] = headline
            info[i].pop('api_url')  # Remove api_url from the final data

        df = pd.DataFrame(info)

        # Perform sentiment analysis on title, headline, and content
        df['title_polarity'], df['title_subjectivity'] = zip(*df['title'].map(analyze_sentiment))
        df['headline_polarity'], df['headline_subjectivity'] = zip(*df['headline'].map(analyze_sentiment))
        df['content_polarity'], df['content_subjectivity'] = zip(*df['content'].map(analyze_sentiment))
        
        # Select relevant columns for displaying
        sentiment_df = df[['publisheddate', 'title_polarity', 'title_subjectivity', 
                           'headline_polarity', 'headline_subjectivity', 
                           'content_polarity', 'content_subjectivity']]
        
        # Display the sentiment analysis DataFrame with dates
        print(sentiment_df)
    else:
        print("No data found")

if __name__ == "__main__":
    main()


            publisheddate  title_polarity  title_subjectivity  \
0    2024-07-03T14:26:50Z        0.000000            0.000000   
1    2024-06-25T10:45:40Z        0.700000            0.600000   
2    2024-06-24T10:07:41Z        0.000000            0.000000   
3    2024-06-21T19:01:40Z        0.000000            0.000000   
4    2024-06-21T03:32:37Z       -0.155556            0.288889   
..                    ...             ...                 ...   
118  2023-07-17T17:11:52Z        0.000000            0.000000   
119  2023-07-15T14:00:32Z        0.136364            0.454545   
120  2023-07-12T11:48:59Z        0.000000            0.000000   
121  2023-07-05T17:37:27Z       -0.400000            0.400000   
122  2023-07-05T16:00:37Z        0.000000            0.100000   

     headline_polarity  headline_subjectivity  content_polarity  \
0             0.000000               0.000000          0.032146   
1             0.700000               0.600000          0.193186   
2             0.00