In [1]:
from stackapi import StackAPI
import pandas as pd 
import time
from datetime import datetime

In [34]:
def get_data_from_StackAPI(start_date=int(time.mktime(datetime(2023, 1, 1).timetuple())),
                           end_date=int(time.mktime(
                               datetime(2023, 12, 31, 23, 59, 59).timetuple())),
                           score_min=50,
                           filter_code='!.I1Z-vjt(hd9(yOJasMPIz(eE0)Ob'):
    '''
    Retrieves data from Stack Overflow API using specified parameters.

    Parameters:
    - start_date (int, optional): Unix timestamp for the start date (default: January 1, 2023).
    - end_date (int, optional): Unix timestamp for the end date (default: December 31, 2023).
    - score_min (int, optional): Minimum score for questions to retrieve (default: 50).
    - filter_code (str, optional): Filter code for Stack Exchange API to specify which fields to retrieve (default: '!.I1Z-vjt(hd9(yOJasMPIz(eE0)Ob').
                                    You can define your filter here : https://api.stackexchange.com/docs/advanced-search#order=desc&sort=activity&filter=!.I1Z-vjt(hd9(yOJasMPIz(eE0)Ob&site=stackoverflow&run=true

    Returns:
    - DataFrame: Pandas DataFrame containing the retrieved data.
    '''
    # Initialisation de l'API Stack Overflow
    SITE = StackAPI('stackoverflow')
    data = []
    questions = SITE.fetch('questions', fromdate=start_date, todate=end_date,
                           min=50, sort='votes', tagged='python', filter=filter_code)

    # Remplissage de la liste avec les principales caractéristiques des questions
    for item in questions['items']:
        question_id = item['question_id']
        creation_date = pd.to_datetime(item['creation_date'], unit='s')
        score = item['score']
        # Certains champs peuvent être absents
        view_count = item.get('view_count', 0)
        answer_count = item.get('answer_count', 0)
        comment_count = item.get('comment_count', 0)
        favorite_count = item.get('favorite_count', 0)
        title = item['title']
        body = item.get('body', '')  # Certains champs peuvent être absents
        tags = item['tags']

        data.append({
            'Question_ID': question_id,
            'CreationDate': creation_date,
            'Score': score,
            'ViewCount': view_count,
            'AnswerCount': answer_count,
            'CommentCount': comment_count,
            'FavoriteCount': favorite_count,
            'Title': title,
            'Body': body,
            'Tags': tags,
            'Post': title + " " + body
        })

    return pd.DataFrame(data)

In [35]:
data = get_data_from_StackAPI()
print(data)

    Question_ID        CreationDate  Score  ViewCount  AnswerCount  \
0      75608323 2023-03-01 19:52:19    400     667446           28   
1      75956209 2023-04-07 07:05:59    253     507852            3   
2      77213053 2023-10-02 03:02:00    166     140036            6   
3      75898276 2023-03-31 11:58:04    147     312859            5   
4      76187256 2023-05-06 05:11:42    136     305230           15   
5      75602063 2023-03-01 09:53:49    113     266551           11   
6      77364550 2023-10-26 06:22:24    107     126043            7   
7      74981558 2023-01-02 10:57:36     97      96720            4   
8      76616042 2023-07-04 21:58:01     87     137227            8   
9      75495800 2023-02-18 19:19:23     85     107935            9   
10     75397736 2023-02-09 11:08:22     79      50022            5   
11     75269700 2023-01-28 17:54:09     78      15548            5   
12     75804599 2023-03-21 17:35:10     69      70954            4   
13     76414514 2023