### Extracting Data From API

In [1]:
# We use some libraries to extract data from the web api provided
import requests
import json
import pandas as pd

In [2]:
# we define the url to extract the data with Get method
url = "https://api.stackexchange.com/2.2/search?order=desc&sort=activity&intitle=perl&site=stackoverflow"

try:
    response = requests.get(url)
except requests.exceptions.RequestException as e:
    print(f"An error occurred while making the request: {e}")
except requests.exceptions.HTTPError as e:
    print(f"HTTP error: {e.response.status_code} - {e.response.text}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

json_data = json.loads(response.text)

In [3]:
df = pd.DataFrame(json_data["items"])
df.head(5)

# i observed that the column "owner" has a little more information that would be needed to complement all the data

Unnamed: 0,tags,owner,is_answered,view_count,accepted_answer_id,answer_count,score,last_activity_date,creation_date,last_edit_date,question_id,content_license,link,title,closed_date,closed_reason,protected_date,community_owned_date
0,[perl],"{'reputation': 3305, 'user_id': 251947, 'user_...",True,16319,33442443.0,14,14,1693754305,1446228117,1446229000.0,33442240,CC BY-SA 3.0,https://stackoverflow.com/questions/33442240/p...,Perl printf to use commas as thousands-separator,,,,
1,"[regex, file, perl, parsing]","{'reputation': 9, 'user_id': 22026250, 'user_t...",False,39,,0,-2,1693689518,1693689518,,77030089,,https://stackoverflow.com/questions/77030089/s...,Struggling to read file and use RegEx in perl,1693712000.0,Not suitable for this site,,
2,"[regex, perl, parsing]","{'reputation': 9, 'user_id': 22026250, 'user_t...",True,70,77029610.0,1,0,1693685333,1693678041,1693681000.0,77029473,CC BY-SA 4.0,https://stackoverflow.com/questions/77029473/s...,Struggling with Perl regex: Extracting scenari...,,,,
3,"[regex, perl]","{'reputation': 47, 'user_id': 12111603, 'user_...",True,42,77027643.0,1,1,1693645752,1693639247,,77027340,CC BY-SA 4.0,https://stackoverflow.com/questions/77027340/i...,"In perl, how can I use regex on a hash contain...",,,,
4,[perl],"{'reputation': 329, 'user_id': 17843701, 'user...",True,75,71510162.0,1,1,1693563247,1647505118,1693563000.0,71509034,CC BY-SA 4.0,https://stackoverflow.com/questions/71509034/p...,Perl: Subroutine uneven/unusual behaviour,,,,


In [4]:
# we gonna change the json data to a pandas dataframe
df = pd.DataFrame(json_data["items"]) # this part of the data is in the items key

owner_info = df["owner"] # this part of the data is in the owner key into the items key ... items->owner

# extracting the owner info from the dataframe
df_owner = pd.DataFrame([x for x in owner_info])

#once we have the owner info we can merge it with the rest of the data
df = pd.concat([df_owner, df], axis=1)

# we dont need the owner column anymore
df.drop(columns=["owner"], inplace=True)

df.head()


Unnamed: 0,reputation,user_id,user_type,accept_rate,profile_image,display_name,link,tags,is_answered,view_count,...,creation_date,last_edit_date,question_id,content_license,link.1,title,closed_date,closed_reason,protected_date,community_owned_date
0,3305,251947,registered,67.0,https://www.gravatar.com/avatar/80f18fe71d6fc0...,Chris Koknat,https://stackoverflow.com/users/251947/chris-k...,[perl],True,16319,...,1446228117,1446229000.0,33442240,CC BY-SA 3.0,https://stackoverflow.com/questions/33442240/p...,Perl printf to use commas as thousands-separator,,,,
1,9,22026250,registered,,https://www.gravatar.com/avatar/037b6d260a8fdd...,PopSmoke,https://stackoverflow.com/users/22026250/popsmoke,"[regex, file, perl, parsing]",False,39,...,1693689518,,77030089,,https://stackoverflow.com/questions/77030089/s...,Struggling to read file and use RegEx in perl,1693712000.0,Not suitable for this site,,
2,9,22026250,registered,,https://www.gravatar.com/avatar/037b6d260a8fdd...,PopSmoke,https://stackoverflow.com/users/22026250/popsmoke,"[regex, perl, parsing]",True,70,...,1693678041,1693681000.0,77029473,CC BY-SA 4.0,https://stackoverflow.com/questions/77029473/s...,Struggling with Perl regex: Extracting scenari...,,,,
3,47,12111603,registered,,https://i.stack.imgur.com/qbNhc.jpg?s=256&g=1,Henri_S,https://stackoverflow.com/users/12111603/henri-s,"[regex, perl]",True,42,...,1693639247,,77027340,CC BY-SA 4.0,https://stackoverflow.com/questions/77027340/i...,"In perl, how can I use regex on a hash contain...",,,,
4,329,17843701,registered,,https://www.gravatar.com/avatar/36c114a41c3dd6...,PPP,https://stackoverflow.com/users/17843701/ppp,[perl],True,75,...,1647505118,1693563000.0,71509034,CC BY-SA 4.0,https://stackoverflow.com/questions/71509034/p...,Perl: Subroutine uneven/unusual behaviour,,,,


In [5]:
# save the data in a csv file
df.to_csv("StackOverflow.csv", index=False)

### Obtener el número de respuestas contestadas y no contestadas

In [6]:
print("Total of questions answered and unanswered")
df.value_counts("is_answered")

Total of questions answered and unanswered


is_answered
True     24
False     6
Name: count, dtype: int64

### Obtener la respuesta con menor número de vistas

In [7]:
print("The question with the most views is:")
df[df["view_count"] == df["view_count"].min()][["title","view_count"]]

The question with the most views is:


Unnamed: 0,title,view_count
11,why does spack external find fail to find perl?,12


In [8]:
# One lines solution
df["title"][df["view_count"] == min(df["view_count"])]

11    why does spack external find fail to find perl?
Name: title, dtype: object

### Obtener la respuesta más vieja y más actual

In [9]:
df["created_date"] = pd.to_datetime(df["creation_date"], unit="s")
result = df["title"][(df["created_date"] == max(df["created_date"])) | (df["created_date"] == min(df["created_date"]))]

print("The question with the most recent and oldest is:")

print("newest: ", result.values[0])
print("oldest: ", result.values[1])

The question with the most recent and oldest is:
newest:  Struggling to read file and use RegEx in perl
oldest:  What is the best way to slurp a file into a string in Perl?


In [11]:
df["created_date"] = pd.to_datetime(df["creation_date"], unit="s")
result = df[["title","created_date"]][(df["created_date"] == max(df["created_date"])) | (df["created_date"] == min(df["created_date"]))]
result

Unnamed: 0,title,created_date
1,Struggling to read file and use RegEx in perl,2023-09-02 21:18:38
28,What is the best way to slurp a file into a st...,2008-10-15 21:55:47


In [10]:
print("The oldest answer is: ")
df["title"][df["created_date"] == df["created_date"].min()].to_list()[0]

The oldest answer is: 


'What is the best way to slurp a file into a string in Perl?'

In [11]:
print("The oldest answer is: ")
df["title"][df["created_date"] == df["created_date"].max()].to_list()[0]

The oldest answer is: 


'Struggling to read file and use RegEx in perl'

### Obtener la respuesta del owner que tenga una mayor reputación

In [19]:
result = df[["title","reputation"]]
result.sort_values(by="reputation", ascending=False).head(5)

Unnamed: 0,title,reputation
6,How can I use an array as the arguments to a b...,57719
26,How can I repeat a string N times in Perl?,50101
19,taint-mode perl: preserve suid when running ex...,37539
15,How do you create unicode file names in Window...,35625
28,What is the best way to slurp a file into a st...,26430


In [21]:
print("The title of the question with owner having the highest reputation is: ")
df[["title","reputation"]][df["reputation"] == max(df["reputation"])]

The title of the question with owner having the highest reputation is: 


Unnamed: 0,title,reputation
6,How can I use an array as the arguments to a b...,57719


In [37]:
print(" hello world")

 hello world
