In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from scipy import optimize, stats
import pandas as pd
import sympy as smp
import requests
from io import StringIO
from IPython.display import display, Math, Markdown, HTML
pd.set_option('display.max_colwidth', None)

## Import Data on Users Cleo Answered to Using StackExchange API

In [2]:
#this line make a request to SE's api
cleo_json = requests.get("https://api.stackexchange.com/2.2/users/97378/answers?pagesize=100&site=mathematics")

#this line puts the response in a pandas dataframe
cleo_answers = pd.read_json(StringIO(cleo_json.text))

#but the data is still in the wrong format and the next line fixes it
cleo_answers = pd.json_normalize(cleo_answers.loc[:, "items"])

# Info about the questions
q_ids = ";".join([str(i) for i in cleo_answers["question_id"]])
questions = requests.get(f"https://api.stackexchange.com/2.2/questions/{q_ids}?pagesize=100&site=mathematics")
questions_t = pd.read_json(StringIO(questions.text))


df_questions = pd.json_normalize(questions_t.loc[:, "items"])

## Clean Question OP Data

In [3]:
df_OPall = df_questions[['owner.display_name', 'owner.reputation', 'owner.user_id', 'owner.link']].drop_duplicates()
df_OPall.sort_values('owner.reputation', ascending=False, inplace=True)
df_OPall.rename(columns={'owner.display_name': 'Username', 'owner.reputation': 'Reputation', 'owner.user_id': 'ID', 'owner.link': 'Profile Link'}, inplace=True, )
df_OPall = df_OPall.reset_index(drop=True)

In [4]:
cleo_list = [cleo_answers['owner.display_name'][0], cleo_answers['owner.reputation'][0], cleo_answers['owner.user_id'][0], cleo_answers['owner.link'][0]]
df_OPall.loc[len(df_OPall)] = cleo_list

## Import OP Questions

In [5]:
def get_questions_by_user_id(user_id):
    questions = []
    page = 1
    has_more = True

    while has_more:
        url = f"https://api.stackexchange.com/2.3/users/{user_id}/questions"
        params = {
            "order": "desc",
            "sort": "activity",
            "site": "math",
            "page": page,
            "pagesize": 100  # Adjust the page size if needed
        }
        response = requests.get(url, params=params)
        data = response.json()

        if "items" in data:
            questions.extend(data["items"])
            has_more = data["has_more"]
            page += 1
        else:
            has_more = False

    return questions

def get_questions_by_ids(user_ids):
    all_questions = []
    for user_id in user_ids:
        questions = get_questions_by_user_id(user_id)
        all_questions.extend(questions)
    return all_questions

# Assuming df_Qusers is already loaded and contains a column 'ID'
OPquestions = get_questions_by_ids(df_OPall['ID'].tolist())

## Cleaning OP Question Data

In [6]:
# Convert the list of questions to a DataFrame
df_OPquestions = pd.DataFrame(OPquestions)

# Extract display_name and user_id from the owner column
df_OPquestions['owner_display_name'] = df_OPquestions['owner'].apply(lambda x: x.get('display_name') if isinstance(x, dict) else None)
df_OPquestions['owner_user_id'] = df_OPquestions['owner'].apply(lambda x: x.get('user_id') if isinstance(x, dict) else None)

# Drop the original 'owner' column if not needed
df_OPquestions = df_OPquestions.drop(columns=['owner'])
df_OPquestions = df_OPquestions[['owner_display_name', 'owner_user_id', 'title', 'creation_date', 'link']]
df_OPquestions = df_OPquestions.sort_values(['owner_display_name', 'creation_date'], ascending=[True, False])

# Rename columns
df_OPquestions.rename(columns={
    'owner_display_name': 'Username',
    'creation_date': 'Post Date',
    'owner_user_id': 'ID',
    'title': 'Post Title',
    'link': 'Post Link'
}, inplace=True)

# Convert Unix timestamps to dates
df_OPquestions['Post Date'] = pd.to_datetime(df_OPquestions['Post Date'], unit='s').dt.date

In [7]:
df_OPquestions

Unnamed: 0,Username,ID,Post Title,Post Date,Post Link
266,Anastasiya-Romanova 秀,133248,How to explain this geometry problem to an 8th grader?,2017-01-26,https://math.stackexchange.com/questions/2114690/how-to-explain-this-geometry-problem-to-an-8th-grader
246,Anastasiya-Romanova 秀,133248,"Closed form of $\mathscr{R}=\int_0^{\pi/2}\sin^2x\,\ln\big(\sin^2(\tan x)\big)\,\,dx$",2014-11-04,https://math.stackexchange.com/questions/1006127/closed-form-of-mathscrr-int-0-pi-2-sin2x-ln-big-sin2-tan-x-big
271,Anastasiya-Romanova 秀,133248,The Integral of Multiple Tangent Functions,2014-11-04,https://math.stackexchange.com/questions/1005740/the-integral-of-multiple-tangent-functions
270,Anastasiya-Romanova 秀,133248,A combination integral and series resulting the inverse tangent integral,2014-11-02,https://math.stackexchange.com/questions/1003213/a-combination-integral-and-series-resulting-the-inverse-tangent-integral
272,Anastasiya-Romanova 秀,133248,"Closed form of $\displaystyle\int_{0}^{\pi/4}\int_{\pi/2}^{\pi}\frac{(\cos x-\sin x)^{y-2}}{(\cos x+\sin x)^{y+2}}\, dy\, dx$",2014-11-01,https://math.stackexchange.com/questions/1001785/closed-form-of-displaystyle-int-0-pi-4-int-pi-2-pi-frac-cos-x-si
...,...,...,...,...,...
572,xuce1234,182482,"Evaluate $\int_0^1\frac{\ln(1-x)}{x}\text{Li}_3\left(\frac{1+x}{2}\right)dx$ , $\int_0^1\frac{\ln^2(1-x)}{x}\text{Li}_2\left(\frac{1+x}{2} \right)dx$",2014-10-12,https://math.stackexchange.com/questions/970125/evaluate-int-01-frac-ln1-xx-textli-3-left-frac1x2-rightdx
597,xuce1234,182482,Euler sums question,2014-10-12,https://math.stackexchange.com/questions/969710/euler-sums-question
598,xuce1234,182482,"Evaluate $\int_{0}^{1} \frac{\left[\rm{Li}_2\left(\frac{1}{2} \right)-\rm{Li}_2\left(\frac{1 + x}{2}\right)\right]\ln( 1 - x)}{1 + x}\,dx$",2014-10-12,https://math.stackexchange.com/questions/969684/evaluate-int-01-frac-left-rmli-2-left-frac12-right-rmli-2
586,xuce1234,182482,How to calculate the value of the series limits,2014-10-11,https://math.stackexchange.com/questions/967509/how-to-calculate-the-value-of-the-series-limits


## Getting Answer Data

In [8]:
def get_answers_by_user_id(user_id):
    answers = []
    page = 1
    has_more = True

    while has_more:
        url = f"https://api.stackexchange.com/2.3/users/{user_id}/answers"
        params = {
            "order": "desc",
            "sort": "activity",
            "site": "math",
            "page": page,
            "pagesize": 100  # You can adjust the page size if needed
        }
        response = requests.get(url, params=params)
        data = response.json()
        
        if "items" in data:
            answers.extend(data["items"])
            has_more = data["has_more"]
            page += 1
        else:
            has_more = False
    
    return answers

def get_answers_by_ids(user_ids):
    all_answers = []
    for user_id in user_ids:
        answers = get_answers_by_user_id(user_id)
        all_answers.extend(answers)
    return all_answers

# Assuming df_Qusers is already loaded and contains a column 'ID'
OPanswers = get_answers_by_ids(df_OPall['ID'].tolist())

In [9]:
# Convert the list of answers to a DataFrame
df_OPanswers = pd.DataFrame(OPanswers)

# Extract display_name and user_id from the owner column
df_OPanswers['owner_display_name'] = df_OPanswers['owner'].apply(lambda x: x.get('display_name') if isinstance(x, dict) else None)
df_OPanswers['owner_user_id'] = df_OPanswers['owner'].apply(lambda x: x.get('user_id') if isinstance(x, dict) else None)

# Drop the original 'owner' column if not needed
df_OPanswers = df_OPanswers[['owner_display_name', 'owner_user_id', 'answer_id', 'creation_date']]
df_OPanswers = df_OPanswers.sort_values(['owner_display_name', 'creation_date'], ascending=[True, False])

# Rename columns
df_OPanswers.rename(columns={
    'owner_display_name': 'Username',
    'creation_date': 'Post Date',
    'owner_user_id': 'ID',
}, inplace=True)

# Convert Unix timestamps to dates
df_OPanswers['Post Date'] = pd.to_datetime(df_OPanswers['Post Date'], unit='s').dt.date

In [13]:
df_OPanswers.to_csv('df_OPanswers', index=False)
df_OPquestions.to_csv('df_OPquestions', index=False)
df_OPall.to_csv('df_OPall', index=False)