In [12]:
import requests
import logging
from pprint import pprint

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# GraphQL query
query = """
query getUserProfile($username: String!) {
    allQuestionsCount {
        difficulty
        count
    }
    matchedUser(username: $username) {
        contributions {
            points
        }
        profile {
            reputation
            ranking
        }
        submissionCalendar
        submitStats {
            acSubmissionNum {
                difficulty
                count
                submissions
            }
            totalSubmissionNum {
                difficulty
                count
                submissions
            }
        }
    }
    recentSubmissionList(username: $username) {
        title
        titleSlug
        timestamp
        statusDisplay
        lang
        __typename
    }
    matchedUserStats: matchedUser(username: $username) {
        submitStats: submitStatsGlobal {
            acSubmissionNum {
                difficulty
                count
                submissions
                __typename
            }
            totalSubmissionNum {
                difficulty
                count
                submissions
                __typename
            }
            __typename
        }
    }
    userContestRankingStats: userContestRanking(username: $username) {
            attendedContestsCount
            rating
            globalRanking
            totalParticipants
            topPercentage
        }
}
"""

# Function to format the data
def format_data(data):
    result_data = {
        "totalSolved": data["matchedUser"]["submitStats"]["acSubmissionNum"][0]["count"],
        "totalSubmissions": data["matchedUser"]["submitStats"]["totalSubmissionNum"],
        "totalQuestions": data["allQuestionsCount"][0]["count"],
        "easySolved": data["matchedUser"]["submitStats"]["acSubmissionNum"][1]["count"],
        "totalEasy": data["allQuestionsCount"][1]["count"],
        "mediumSolved": data["matchedUser"]["submitStats"]["acSubmissionNum"][2]["count"],
        "totalMedium": data["allQuestionsCount"][2]["count"],
        "hardSolved": data["matchedUser"]["submitStats"]["acSubmissionNum"][3]["count"],
        "totalHard": data["allQuestionsCount"][3]["count"],
        "ranking": data["matchedUser"]["profile"]["ranking"],
        "contributionPoint": data["matchedUser"]["contributions"]["points"],
        "reputation": data["matchedUser"]["profile"]["reputation"],
        "recentSubmissionsCount" : len(data["recentSubmissionList"]),
        "recentSubmissions": data["recentSubmissionList"],
        "matchedUserStats": data["matchedUserStats"]["submitStats"],
        "userContestRankingStats": data["userContestRankingStats"]
    }
    return result_data

def leetcode(user):
    url = 'https://leetcode.com/graphql'
    headers = {
        'Content-Type': 'application/json',
        'Referer': 'https://leetcode.com'
    }
    variables = {'username': user}
    data = {'query': query, 'variables': variables}

    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()
        data = response.json()

        if 'errors' in data:
            return data['errors']
        else:
            return format_data(data['data'])
    except requests.exceptions.RequestException as e:
        logger.error(e)
        return e


def main():
    username = 'aviranjan444'
    pprint(leetcode(username))

if __name__ == '__main__':
    main()


{'contributionPoint': 2415,
 'easySolved': 360,
 'hardSolved': 24,
 'matchedUserStats': {'__typename': 'UserSubmitStatsNode',
                      'acSubmissionNum': [{'__typename': 'SubmissionCountNode',
                                           'count': 766,
                                           'difficulty': 'All',
                                           'submissions': 1012},
                                          {'__typename': 'SubmissionCountNode',
                                           'count': 360,
                                           'difficulty': 'Easy',
                                           'submissions': 473},
                                          {'__typename': 'SubmissionCountNode',
                                           'count': 382,
                                           'difficulty': 'Medium',
                                           'submissions': 510},
                                          {'__typename': 'SubmissionCountN

Let's look at the datasets available and try to so do some exploratory analysis.

In [18]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df1 = pd.read_csv("datasets/leetcode_dataset - lc.csv")
df1.head()
print(f"Number of columns : {len(df1.columns)} \n", df1.columns)
print("Shape of the dataset is: ", df1.shape)

Number of columns : 19 
 Index(['id', 'title', 'description', 'is_premium', 'difficulty',
       'solution_link', 'acceptance_rate', 'frequency', 'url', 'discuss_count',
       'accepted', 'submissions', 'companies', 'related_topics', 'likes',
       'dislikes', 'rating', 'asked_by_faang', 'similar_questions'],
      dtype='object')
Shape of the dataset is:  (1825, 19)


In [25]:
pd.DataFrame(df1.isna().sum())

Unnamed: 0,0
id,0
title,0
description,0
is_premium,0
difficulty,0
solution_link,838
acceptance_rate,0
frequency,0
url,0
discuss_count,0


In [20]:
df2 = pd.read_csv("datasets/leetcode_questions.csv")
df2.head()
print(f"Number of columns : {len(df2.columns)} \n", df2.columns)
print("Shape of the dataset is: ",df2.shape)

Number of columns : 14 
 Index(['Question ID', 'Question Title', 'Question Slug', 'Question Text',
       'Topic Tagged text', 'Difficulty Level', 'Success Rate',
       'total submission', 'total accepted', 'Likes', 'Dislikes', 'Hints',
       'Similar Questions ID', 'Similar Questions Text'],
      dtype='object')
Shape of the dataset is:  (2238, 14)


In [26]:
pd.DataFrame(df2.isna().sum())

Unnamed: 0,0
Question ID,0
Question Title,0
Question Slug,0
Question Text,480
Topic Tagged text,2
Difficulty Level,0
Success Rate,0
total submission,0
total accepted,0
Likes,0


- Missing values are there in both the datasets, do i have to perform some data imputation on these datasets first ? decide on which columns i need to perform imputation. The imputation process would depend on an utility script which will leverage the GraphQL API of Leetcode
- submission data fetched from the API includes the following columns : question title
- must have columns --> question title,