In [1]:
import pandas as pd
import json 
import requests

In [2]:
def access_api():
    """
    Description: fucntion to get a json from a API and transform into a python dictionary
    
    INPUT: do not get an input, but if ypu want is possible to get the url but only will works to that specific API
    
    OUTPUT: return a python dictionary from the json got from the API
    """
    
    # Endpoint
    endpoint_url =  'https://api.stackexchange.com/2.2/search?order=desc&sort=activity&intitle=perl&site=stackoverflow'

    # Request
    res = requests.get(endpoint_url)
    
    #Data parsing
    results = json.loads(res.content)
    return results

In [3]:
#get the python dictionary and saving in a variabel
response_json = access_api()

In [4]:
#clean from the JSON the tags, so the owner information can be obtained, dividing the information in two dataframes, one having the information of the owner and the other the rest

info_list = []

owner_list = []


for id_, doc in enumerate(response_json['items']):
    #getting all the keys but owner
    columns = set(doc.keys()) - set(['owner'])
    
    #getting the information from each record except owner
    row_first_list = {key: doc[key] for key in columns}
    
    #assigning id
    row_first_list['id'] = id_
    
    #adding the row to the list
    info_list.append(row_first_list)
    
    #getting the information of the owner
    row_second_list = doc['owner'].copy()
    
    #assigning id
    row_second_list['id'] = id_
    
    #adding the row
    owner_list.append(row_second_list)
    

In [5]:
info_df = pd.DataFrame(info_list)
owner_df = pd.DataFrame(owner_list)

In [6]:
non_contested = info_df[info_df.answer_count == 0].shape[0]

number_contested = info_df[info_df.answer_count > 0].shape[0]

In [7]:
print('The Number of answered are: {}\nThe number of not answered: {}'.format(number_contested, non_contested))

The Number of answered are: 24
The number of not answered: 6


In [8]:
info_df.columns

Index(['is_answered', 'score', 'question_id', 'view_count', 'creation_date',
       'tags', 'answer_count', 'last_activity_date', 'link', 'title',
       'content_license', 'id', 'last_edit_date', 'accepted_answer_id',
       'closed_reason', 'closed_date'],
      dtype='object')

In [9]:
view_count, link, _id = info_df[info_df['view_count'] == info_df['view_count'].min()][['view_count', 'link', 'id']].values.tolist()[0]

In [10]:
print('La respuesta con menor número de visitas tiene id: {}\nEs del link: {}\nTiene: {} visitas'.format(_id, link, view_count))

La respuesta con menor número de visitas tiene id: 15
Es del link: https://stackoverflow.com/questions/73167201/counting-number-of-sub-hierarchies-in-a-text-file-using-perl-scripting
Tiene: 21 visitas


In [11]:
#getting an understandable date format
info_df['creation_date'] = pd.to_datetime(info_df['creation_date'], unit = 's')

In [12]:
min_date, link_min, id_min =  info_df[info_df['creation_date'] == info_df['creation_date'].min()][['creation_date', 'link', 'id']].values.tolist()[0]

max_date, link_max, id_max =  info_df[info_df['creation_date'] == info_df['creation_date'].max()][['creation_date', 'link', 'id']].values.tolist()[0]

print('La respuesta más vieja tiene id: {}\nEs del link: {}\nEs de la fecha: {} visitas'.format(id_min, link_min, min_date))

print('La respuesta más actual tiene id: {}\nEs del link: {}\nEs de la fecha: {} visitas'.format(id_max, link_max, max_date))

La respuesta más vieja tiene id: 16
Es del link: https://stackoverflow.com/questions/2487829/whats-the-right-way-to-kill-child-processes-in-perl-before-exiting
Es de la fecha: 2010-03-21 16:43:23 visitas
La respuesta más actual tiene id: 11
Es del link: https://stackoverflow.com/questions/73168851/can-connect-to-mysql-database-through-cli-but-not-perl-dbi
Es de la fecha: 2022-07-29 17:11:28 visitas


In [13]:
user_id, reputation, link_ow, id_ow = owner_df[owner_df['reputation'] == owner_df['reputation'].max()][['user_id', 'reputation', 'link', 'id']].values.tolist()[0]

link_answered = info_df[info_df['id'] == id_ow]['link'].values[0]

print('El usuario con mayor reputación tiene el id: {}\nSu user id es: {}\nSu reputación es: {}\nSu link de usuario es:{}\nEl link de la respuesta es:{}'.format(id_ow, user_id, reputation, link_ow, link_answered))

El usuario con mayor reputación tiene el id: 19
Su user id es: 342740.0
Su reputación es: 19233.0
Su link de usuario es:https://stackoverflow.com/users/342740/prix
El link de la respuesta es:https://stackoverflow.com/questions/3107540/c-public-key-verify-perl-private-key-and-use-as-aes-key-possible-and-or-viabl
