<a href="https://colab.research.google.com/github/Nemczek/checkio_database/blob/main/pyCheckio_class_database.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project pyCheckio

## Setting up

In [60]:
# importing libraries
import requests
import pandas as pd
from collections import Counter

In [61]:
# static variables
BASE_URL = 'https://py.checkio.org/api/group-details/'
GROUP_PROGRESS_API_BASE = 'https://py.checkio.org/api/group-progress/'
GROUP_ACTIVITY_API_BASE = 'https://py.checkio.org/api/group-activity/'
# Here you need to put your personal group token
TOKEN = None

URL_WITH_TOKEN = BASE_URL + TOKEN
PROGRESS_API_WITH_TOKEN = GROUP_PROGRESS_API_BASE + TOKEN
ACTIVITY_API_WITH_TOKEN = GROUP_ACTIVITY_API_BASE + TOKEN

In [None]:
# listing all classes
requests.get(URL_WITH_TOKEN).json()['objects']

In [63]:
def get_slug(url):
  """
  Returns slag parameter of our current class

  Parameters:
  url (str): URL to our class details

  Returns:
  slug (str): the slug parameter
  """
  slug = requests.get(url).json()['objects'][4]['slug'] # 4 is the index of our class
  return slug
print(get_slug(URL_WITH_TOKEN))

michal-wojcik-2022-2023


## Activity API

In [64]:
# This API gives us acces to the latest activity of users
class_slug = get_slug(URL_WITH_TOKEN)
activity_url_with_slug = f"{ACTIVITY_API_WITH_TOKEN}&slug={class_slug}"

resp = requests.get(activity_url_with_slug).json()['objects']
resp[2]

{'username': '126086',
 'createdAt': '2023-01-09',
 'data': {'task': {'imageUrl': 'https://d17mnqrx9pmt3e.cloudfront.net/media/logos/task/normal/password-enabled.png',
   'shortText': 'Verify password by condition\n',
   'subject': 'Acceptable Password I',
   'type': 'task',
   'url': '/mission/acceptable-password-i/'},
  'type': 'implementation',
  'user': {'avatarUrl': 'https://www.gravatar.com/avatar/c13a3f1c3609338a3cc532e0eacc70eb?s=80',
   'level': 4,
   'username': '126086',
   'group': {'name': 'Michal Wojcik 2022-2023',
    'url': '/class/michal-wojcik-2022-2023/',
    'owner': 'MichalRyszardWojcik',
    'slug': 'michal-wojcik-2022-2023'},
   'type': 'user',
   'url': '/user/126086/'},
  'url': '/class/michal-wojcik-2022-2023/solution-history/3510415/'}}

## Progress API

In [65]:
# Get data from API
progress_url_with_slug = f"{PROGRESS_API_WITH_TOKEN}&slug={class_slug}"
progress_data = requests.get(progress_url_with_slug).json()['objects']

In [None]:
progress_data[0] # Look on data

## Quest dataset

In [68]:
# unique statuses
statuses = []
for task in progress_data:
  for user in task['data']:
    statuses.append(user['status'])
set(statuses)

{'new', 'opened', 'published', 'tried'}

In [None]:
# Get [[task_name, num_of_votes, num_of_comments, num_opened, num_published, num_tried, num_new]]
list_of_tasks = []
for task in progress_data:
  num_of_votes = 0
  num_of_comments = 0
  num_of_tries = 0
  list_of_statuses = []

  for entry in task['data']:
    list_of_statuses.append(entry['status'])

    for solution in entry['solutions']:
      num_of_votes += solution['votes']
      num_of_comments += solution['comments']

  counter_object = Counter(list_of_statuses)
  list_of_tasks.append([task['title'], num_of_votes, num_of_comments,
                        counter_object['opened'], counter_object['published'],
                        counter_object['tried'], counter_object['new']])
list_of_tasks

In [None]:
# Change list of lists to pandas DataFrame
task_data = pd.DataFrame(list_of_tasks, columns=['Task', 'Votes', 'Comments',
                                                 'Opened', 'Published', 'Tried',
                                                 'New'])
task_data

## Every user attempt dataset

In [72]:
# Extract data into list of lists where one list is one entry
list_of_entries = []

for task in progress_data:
  task_name = task['title']

  for entry in task['data']:
    username = entry['username']
    status = entry['status']

    if len(entry['solutions']) == 0:
      url, createdAt, votes, comments = "None", "None", "None", "None"
    else:
      # I'm taking only first solution
      url = entry['solutions'][0]['url']
      createdAt = entry['solutions'][0]['createdAt']
      votes = entry['solutions'][0]['votes']
      comments = entry['solutions'][0]['comments']

    list_of_entries.append([username, status, task_name, createdAt, votes, comments, url])
print(list_of_entries)


[['karol2202', 'published', 'Multiply (Intro)', 'None', 'None', 'None', 'None'], ['117374', 'published', 'Multiply (Intro)', '2022-11-21 15:31', 0, 0, 'https://py.checkio.org/mission/multiply-intro/publications/117374/python-3/first-attempt/'], ['Antoni_Wojcik', 'published', 'Multiply (Intro)', '2022-11-21 14:02', 9, 0, 'https://py.checkio.org/mission/multiply-intro/publications/Antoni_Wojcik/python-3/first/'], ['Karolina_Zadura', 'published', 'Multiply (Intro)', '2022-11-21 15:39', 0, 0, 'https://py.checkio.org/mission/multiply-intro/publications/Karolina_Zadura/python-3/first/'], ['126083', 'published', 'Multiply (Intro)', '2022-11-28 15:48', 5, 0, 'https://py.checkio.org/mission/multiply-intro/publications/126083/python-3/first/'], ['126212', 'published', 'Multiply (Intro)', 'None', 'None', 'None', 'None'], ['119587', 'published', 'Multiply (Intro)', '2023-01-04 19:46', 0, 0, 'https://py.checkio.org/mission/multiply-intro/publications/119587/python-3/first/'], ['Mariaa_Kuczynskaa', 

In [None]:
# Convert to pandas data frame
entry_df = pd.DataFrame(list_of_entries, columns=['username', 'status',
                                                  'task_name', 'createdAt',
                                                  'votes', 'comments', 'url'])
entry_df

## Exporting data to SQL

In [None]:
%%capture
db_name = "checkio_class.db"

%load_ext sql
%sql sqlite:///{db_name}

import sqlalchemy as db
engine = db.create_engine(f'sqlite:///{db_name}')

entry_df.to_sql('entry_df', engine, index=False)
task_data.to_sql('task_data', engine, index=False)
# This code whill throw an error if database alredy exists.

In [None]:
# Some tests to check if everything went well

import sqlite3 as sq
connection = sq.connect(db_name)
cursor = connection.cursor()

query = "SELECT * from task_data;"
result = cursor.execute(query)
rows = result.fetchall()

pd.DataFrame(rows, columns=map(lambda x: x[0], result.description))

In [None]:
query2 = 'SELECT * FROM entry_df'
result2 = cursor.execute(query2)
rows2 = result2.fetchall()

pd.DataFrame(rows2, columns=map(lambda x: x[0], result2.description))

In [None]:
query3 = "SELECT * FROM entry_df WHERE NOT votes = 'None' AND username = 'Antoni_Wojcik'"
result3 = cursor.execute(query3)
rows3 = result3.fetchall()

pd.DataFrame(rows3, columns=map(lambda x: x[0], result3.description))