## This notebook was meant to map the user actions to the specific descriptions of the actions, so that it's more readable.

In [9]:
import os, json, requests, time
from pprint import pprint

import yaml
with open("../key.yaml", "r") as file:
    api_keys = yaml.safe_load(file)

api_key = api_keys["API_KEY"]
api_username = "ShubhamG"
defaultHost = "discourse.onlinedegree.iitm.ac.in"

# Headers for authentication
headers = {
    'Api-Key': api_key,
    'Api-Username': api_username
}

action_to_description = {
"1": "likes_given",
"2": "likes_recieved",
"3": "user_bookmarked_a_post",
"4": "created_a_new_topic",
"5": "replied",
"6": "recieved_a_response",
"7": "user_was_mentioned",
"9": "user's_post_was_quoted",
"11": "user_edited_the_post",
"12": "user_sent_a_new_private_message",
"13": "recieved_a_private_message",
"15": "user_marked_a_post_as_solution",
"16": "user_was_assigned_a_topic_or_post",
"17": "linked"
}

In [1]:
def map_user_actions(data_list):
    number_of_actions = {v: 0 for v in action_to_description.values()}
    number_of_actions["Solutions_posted"] = 0
    # Each element of data_list has a key called "action_type". We have to populate the dictionary number_of_actions with the count of each action_type.
    for element in data_list[:]:
        action_int = element["action_type"] # For example, "1"
        action_type = action_to_description[str(action_int)] # If action_int=1, then action_types=likes_given
        number_of_actions[action_type] = number_of_actions.get(action_type, 0) + 1 # number_of_actions[likes_given] = number_of_actions.get(likes_given, 0) + 1
        if action_int == 5: # This will be used to find the number of solutions posted by the user
            time.sleep(0.3)
            post_id = element["post_id"]
            endpoint = f"https://{defaultHost}/posts/{post_id}.json"

            response = requests.get(endpoint, headers=headers)         
            if response.status_code==200:
                data=response.json()
                if data["accepted_answer"] == True: # the post was accepted as a solution
                    number_of_actions["Solutions_posted"] += 1
            else:
                print(f"Error: {response.status_code}")
                print(response.text)
    return number_of_actions

In [53]:
"""
demo structure of all_users_data = {
subject_1: {
    user_1 : {data_list},
    user_2 : {data_list},
    ...
    },
subject_2: {
    user_3 : {data_list},
    user_4 : {data_list},
    ...
    },
    ...
}
"""

parent = "user_action_JSONs" # Retrieve all JSON files from user_action_JSONs folder
all_users_data = {} # This is used to store the user_action data of all the users

for file in os.listdir(parent): # each file in the parent corresponds to a TA/instructor of a particular subject # e.g userAction_DeepLearning_T3_2024_pkrishna.json

    with open(os.path.join(parent,file),'r') as f:
        print(f"\n\nFILE: {file}\n")
        username = file.split("2024_")[1].split(".json")[0] # e.g. pkrishna
        subject = file.split("_")[1] # e.g. DeepLearning
        data_dict = json.load(f) # For big JSON files, can use yield keyword to read the file in chunks
        
        mapped_user_actions = map_user_actions(data_dict["user_actions"]) # Note that data_dict["user_actions"] is a list of dictionaries
        pprint(mapped_user_actions)
        if subject not in all_users_data: # Subject is at level_1, user is at level_2 of all_users_data
            all_users_data[subject] = {}
        all_users_data[subject][username] = mapped_user_actions # the user details are stored subject-wise



FILE: userAction_DeepLearning_T3_2024_man.json

{'Solutions_posted': 58,
 'created_a_new_topic': 16,
 'likes_given': 8,
 'likes_recieved': 335,
 'linked': 2,
 'recieved_a_private_message': 0,
 'recieved_a_response': 151,
 'replied': 259,
 "user's_post_was_quoted": 1,
 'user_bookmarked_a_post': 0,
 'user_edited_the_post': 8,
 'user_marked_a_post_as_solution': 54,
 'user_sent_a_new_private_message': 0,
 'user_was_assigned_a_topic_or_post': 0,
 'user_was_mentioned': 899}


FILE: userAction_DeepLearning_T3_2024_pkrishna.json

{'Solutions_posted': 0,
 'created_a_new_topic': 0,
 'likes_given': 0,
 'likes_recieved': 0,
 'linked': 0,
 'recieved_a_private_message': 0,
 'recieved_a_response': 0,
 'replied': 1,
 "user's_post_was_quoted": 0,
 'user_bookmarked_a_post': 0,
 'user_edited_the_post': 12,
 'user_marked_a_post_as_solution': 0,
 'user_sent_a_new_private_message': 0,
 'user_was_assigned_a_topic_or_post': 0,
 'user_was_mentioned': 32}


FILE: userAction_Maths2_T3_2024_21f3002073.json

{'S

In [54]:
pprint(mapped_user_actions)

{'Solutions_posted': 84,
 'created_a_new_topic': 21,
 'likes_given': 335,
 'likes_recieved': 520,
 'linked': 5,
 'recieved_a_private_message': 0,
 'recieved_a_response': 315,
 'replied': 567,
 "user's_post_was_quoted": 0,
 'user_bookmarked_a_post': 0,
 'user_edited_the_post': 1,
 'user_marked_a_post_as_solution': 84,
 'user_sent_a_new_private_message': 0,
 'user_was_assigned_a_topic_or_post': 0,
 'user_was_mentioned': 662}


In [55]:
# Dump all_users_data to a json file
import json
with open('all_users_data.json', 'w') as f:
    json.dump(all_users_data, f, indent=4)

In [1]:
import pandas as pd
import json

# Read the JSON file
with open('all_users_data.json', 'r') as f:
    data = json.load(f)

# Create lists to store the data
rows = []

# Iterate through the nested structure
for subject, users in data.items():
    for user, metrics in users.items():
        row = {'subject': subject, 'user': user, **metrics} # We have used the **metrics to unpack the dictionary
        rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows)
df

Unnamed: 0,subject,user,likes_given,likes_recieved,user_bookmarked_a_post,created_a_new_topic,replied,recieved_a_response,user_was_mentioned,user's_post_was_quoted,user_edited_the_post,user_sent_a_new_private_message,recieved_a_private_message,user_marked_a_post_as_solution,user_was_assigned_a_topic_or_post,linked,Solutions_posted
0,DeepLearning,man,8,335,0,16,259,151,899,1,8,0,0,54,0,2,58
1,DeepLearning,pkrishna,0,0,0,0,1,0,32,0,12,0,0,0,0,0,0
2,Maths2,21f3002073,33,213,0,16,179,94,20,2,0,0,0,23,0,0,23
3,Maths2,lavanya,0,14,0,2,1,0,46,0,2,0,0,1,0,0,1
4,Maths2,Sannidhi_Alape,0,0,0,0,1,0,11,0,0,0,0,1,0,0,1
5,MLT,23f2000573,137,371,0,40,599,399,79,1,5,0,0,71,0,6,80
6,MLT,Nikita,439,1108,0,26,1053,498,883,8,7,0,0,188,0,0,189
7,MLT,PulkitMangal,86,578,0,75,938,478,291,0,7,0,0,174,0,11,187
8,MLT,Vishal,335,520,0,21,567,315,662,0,1,0,0,84,0,5,84


In [2]:
# Replace all nan  values with 0
df.drop(columns=["user_bookmarked_a_post","recieved_a_response", "user_was_mentioned","user_edited_the_post","user_sent_a_new_private_message","recieved_a_private_message","user_was_assigned_a_topic_or_post","linked"], inplace=True)
df

Unnamed: 0,subject,user,likes_given,likes_recieved,created_a_new_topic,replied,user's_post_was_quoted,user_marked_a_post_as_solution,Solutions_posted
0,DeepLearning,man,8,335,16,259,1,54,58
1,DeepLearning,pkrishna,0,0,0,1,0,0,0
2,Maths2,21f3002073,33,213,16,179,2,23,23
3,Maths2,lavanya,0,14,2,1,0,1,1
4,Maths2,Sannidhi_Alape,0,0,0,1,0,1,1
5,MLT,23f2000573,137,371,40,599,1,71,80
6,MLT,Nikita,439,1108,26,1053,8,188,189
7,MLT,PulkitMangal,86,578,75,938,0,174,187
8,MLT,Vishal,335,520,21,567,0,84,84


In [4]:
df.to_excel('user_actions.xlsx')