In [2]:
import setup  # notebooks/setup.py
setup.init(verbose = True)

Applied nest_asyncio patch for Jupyter compatibility
Changed working directory to: /Users/gphome/Desktop/projects/Obrela-assignment/StackExchange-Statistics-Service/src


In [3]:
from datetime import datetime
from app.components.stackexchange import StackExchangeClient
import json
from pathlib import Path
MOCK = True
MOCK_DATA = Path('mock_data')

In [4]:
client = StackExchangeClient()
if not MOCK:
    ##
    start_date = datetime(2025, 6, 1)
    end_date = datetime(2025, 7, 1)
    ##
    since = int(start_date.timestamp())
    until = int(end_date.timestamp())
    answers = client.get_answers(since, until)
else:
    filename = "answers_2025-06-01_00-00-00_2025-07-01_00-00-00.json"
    full_path = MOCK_DATA / filename
    answers = json.loads(full_path.read_text(encoding='utf-8'))
print(f"Fetched {len(answers)} number of answers")


Fetched 12499 number of answers


In [5]:
answers[:1]

[{'owner': {'account_id': 9938133,
   'reputation': 53794,
   'user_id': 7355741,
   'user_type': 'registered',
   'profile_image': 'https://i.sstatic.net/6CMNt.jpg?s=256',
   'display_name': 'fmw42',
   'link': 'https://stackoverflow.com/users/7355741/fmw42'},
  'is_accepted': True,
  'score': 3,
  'last_activity_date': 1748725286,
  'creation_date': 1748725286,
  'answer_id': 79646989,
  'question_id': 79644827,
  'content_license': 'CC BY-SA 4.0'}]

### Save fetched answers
``` If MOCK is set as False, API response is saved in the mock_data directory.```

In [9]:

if not MOCK:
    start_date_string = start_date.__str__().replace(' ', '_').replace(":", "-")
    end_date_string = end_date.__str__().replace(' ', '_').replace(":", "-")
    filename = f"answers_{start_date_string}_{end_date_string}.json"
    full_path = MOCK_DATA / filename
    full_path.write_text(json.dumps(answers, indent=4), encoding='utf-8')
    print(f"File {filename} saved sucessfully.")

File answers_2025-06-01_00-00-00_2025-07-01_00-00-00.json saved sucessfully.


In [6]:
accepted_answers = [answer for answer in answers if answer.get('is_accepted') == True]

In [7]:
accepted_answers[:5]

[{'owner': {'account_id': 9938133,
   'reputation': 53794,
   'user_id': 7355741,
   'user_type': 'registered',
   'profile_image': 'https://i.sstatic.net/6CMNt.jpg?s=256',
   'display_name': 'fmw42',
   'link': 'https://stackoverflow.com/users/7355741/fmw42'},
  'is_accepted': True,
  'score': 3,
  'last_activity_date': 1748725286,
  'creation_date': 1748725286,
  'answer_id': 79646989,
  'question_id': 79644827,
  'content_license': 'CC BY-SA 4.0'},
 {'owner': {'account_id': 23627132,
   'reputation': 351,
   'user_id': 17659480,
   'user_type': 'registered',
   'profile_image': 'https://lh3.googleusercontent.com/a/AATXAJw8tYRnPGJplk7WIWrznLDbVa-Wb90uLGnawndL=k-s256',
   'display_name': 'Navid Abedini',
   'link': 'https://stackoverflow.com/users/17659480/navid-abedini'},
  'is_accepted': True,
  'score': 2,
  'last_activity_date': 1748725425,
  'creation_date': 1748725425,
  'answer_id': 79646993,
  'question_id': 79646705,
  'content_license': 'CC BY-SA 4.0'},
 {'posted_by_collecti

In [8]:
print(len(accepted_answers))

2319


In [9]:
answer_ids = [answer.get('answer_id') for answer in answers][:10]

In [10]:
len(answer_ids)

10

In [12]:
MOCK = False
if not MOCK:
    comments = client.get_comments(answer_ids)
else:    
    filename = "comments_2025-06-01-00-00-00_2025-06-05-00-00-00.json"
    full_path = MOCK_DATA / filename
    comments = json.loads(full_path.read_text(encoding='utf-8'))
    print(f"Fetched {len(comments)} number of comments")

In [13]:
comments

[{'owner': {'account_id': 30689396,
   'reputation': 61,
   'user_id': 23529203,
   'user_type': 'registered',
   'profile_image': 'https://lh3.googleusercontent.com/a/ACg8ocJAKEkxerStUT39qKUujc5kGiuDkVF1S3thiIT5GMVK=k-s256',
   'display_name': 'Lorna Watawat',
   'link': 'https://stackoverflow.com/users/23529203/lorna-watawat'},
  'edited': False,
  'score': 0,
  'creation_date': 1748876857,
  'post_id': 79646993,
  'comment_id': 140480638,
  'content_license': 'CC BY-SA 4.0'},
 {'owner': {'account_id': 3894877,
   'reputation': 380,
   'user_id': 3224196,
   'user_type': 'registered',
   'accept_rate': 75,
   'profile_image': 'https://www.gravatar.com/avatar/d808a0400eb16de7f8cb237be5e48662?s=256&d=identicon&r=PG&f=y&so-version=2',
   'display_name': 'Martin',
   'link': 'https://stackoverflow.com/users/3224196/martin'},
  'edited': False,
  'score': 0,
  'creation_date': 1748871046,
  'post_id': 79646999,
  'comment_id': 140479431,
  'content_license': 'CC BY-SA 4.0'},
 {'owner': {'

### Some edge cases

#### Empty 

In [12]:
# Example test for empty results
MOCK = False
empty_answers = client.get_answers(int(datetime(2000, 1, 1).timestamp()), int(datetime(2000, 1, 2).timestamp()))
assert len(empty_answers) == 0, "Expected no answers for this date range."

empty_comments = client.get_comments([])
assert len(empty_comments) == 0, "Expected no comments for empty answer IDs."

In [13]:

empty_answers

[]

In [14]:
empty_comments

[]

#### Invalid range

In [15]:
# Invalid date range
invalid_answers = client.get_answers(int(datetime(2025, 6, 5).timestamp()), int(datetime(2025, 6, 1).timestamp()))
assert len(invalid_answers) == 0, "Expected no answers for invalid date range."

# Invalid answer IDs
invalid_comments = client.get_comments([-1, 0, 999999999999])
assert len(invalid_comments) == 0, "Expected no comments for invalid answer IDs."

Error fetching data from StackExchange API, url:https://api.stackexchange.com/2.3/answers/-1;0;999999999999/comments, page: 1 : 400 Client Error: Bad Request for url: https://api.stackexchange.com/2.3/answers/-1;0;999999999999/comments?order=asc&sort=creation&site=stackoverflow&page=1


In [16]:
invalid_answers

[]

In [17]:
invalid_comments

[]

### Statistics computation Schedio

```
import time

start = time.time()
# 1. Initialize counters and a set for unique question IDs
counter_of_accepted_answers = 0
counter_of_not_accepted_answers = 0
sum_accepted_scores = 0
distinct_question_ids = set()


for answer in answers:
    
    qid = answer.get('question_id')
    if qid and qid not in distinct_question_ids:
        distinct_question_ids.add(qid)
    
    
    if answer.get('is_accepted'):
        counter_of_accepted_answers += 1
        sum_accepted_scores += float(answer.get('score', 0))
    else:
        counter_of_not_accepted_answers += 1

if counter_of_accepted_answers > 0:
    avg_score_accepted_answers = sum_accepted_scores / counter_of_accepted_answers
else:
    avg_score_accepted_answers = 0


total_answers = counter_of_accepted_answers + counter_of_not_accepted_answers
if len(distinct_question_ids) > 0:
    avg_answer_count_per_question = total_answers / len(distinct_question_ids)
else:
    avg_answer_count_per_question = 0

end = time.time()
# 6. Print or return the results
print("Total accepted answers:", counter_of_accepted_answers)
print("Average score of accepted answers:", avg_score_accepted_answers)
print("Average answer count per question:", avg_answer_count_per_question)
print("time elapsed for 5 days range (~2000 records): ", (end - start) * 1000, "ms")
```

```
start = time.time()
top_10_answers = sorted(answers, key=lambda x: x['score'], reverse=True)[:10]
end = time.time()
print("time elapsed for 5 days range (~2000 records): ", (end - start) * 1000, "ms")
```


In [7]:
import requests
url = "https://gist.githubusercontent.com/PanagopoulosGeorge/4a5b2c1304971e502d64a5c1b13248bb/raw/6b748538ebeb137597655514a7dd47547d387f35/gistfile1.txt"
response = requests.get(url)

In [10]:
response.json()['items']

[{'owner': {'account_id': 4243,
   'reputation': 1344314,
   'user_id': 6309,
   'user_type': 'registered',
   'accept_rate': 100,
   'profile_image': 'https://i.sstatic.net/I4fiW.jpg?s=256',
   'display_name': 'VonC',
   'link': 'https://stackoverflow.com/users/6309/vonc'},
  'is_accepted': False,
  'score': 0,
  'last_activity_date': 1735692049,
  'last_edit_date': 1735692049,
  'creation_date': 1735689702,
  'answer_id': 79320958,
  'question_id': 78122228,
  'content_license': 'CC BY-SA 4.0'},
 {'owner': {'account_id': 1573946,
   'reputation': 1998,
   'user_id': 1460957,
   'user_type': 'registered',
   'accept_rate': 67,
   'profile_image': 'https://www.gravatar.com/avatar/5b3c77fc2586d7740d8fc15bfe23ca56?s=256&d=identicon&r=PG',
   'display_name': 'ALZ',
   'link': 'https://stackoverflow.com/users/1460957/alz'},
  'is_accepted': False,
  'score': 0,
  'last_activity_date': 1735689796,
  'creation_date': 1735689796,
  'answer_id': 79320960,
  'question_id': 37975366,
  'content_