In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('preprocessed_data.csv')
dff = df.copy()

In [3]:
df = df.drop(df[df['is_premium']==True].index).reset_index(drop=True)

In [4]:
df = df[['id','title','problem_description','topic_tags','difficulty']]

In [5]:
df.loc[:, 'problem_description'] = df['problem_description'].str.split(' ')

In [6]:
df.loc[:, 'topic_tags'] = df['topic_tags'].str.replace("'", "").str.replace(" ","").str.strip().str.split(',')

In [7]:
df.loc[:, 'difficulty'] = df['difficulty'].str.split()

In [8]:
temp = df.copy()

In [9]:
df['tags'] = df['problem_description'] + df['topic_tags'] + df['difficulty']

In [10]:
df.drop(columns=['problem_description','topic_tags','difficulty'], inplace=True)

In [11]:
df['tags'] = df['tags'].apply(lambda x: " ".join(x))

In [12]:
df

Unnamed: 0,id,title,tags
0,1,1. Two Sum,Given an array of integers nums and an integer...
1,2,2. Add Two Numbers,You are given two non-empty linked lists repre...
2,3,3. Longest Substring Without Repeating Characters,"Given a string s, find the length of the longe..."
3,4,4. Median of Two Sorted Arrays,Given two sorted arrays nums1 and nums2 of siz...
4,5,5. Longest Palindromic Substring,"Given a string s, return the longest palindrom..."
...,...,...,...
2155,2996,2996. Smallest Missing Integer Greater Than Se...,You are given a 0-indexed array of integers nu...
2156,2997,2997. Minimum Number of Operations to Make Arr...,You are given a 0-indexed integer array nums a...
2157,2998,2998. Minimum Number of Operations to Make X a...,You are given two positive integers x and y.\n...
2158,2999,2999. Count the Number of Powerful Integers,"You are given three integers start, finish, an..."


In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
cv = TfidfVectorizer(max_features=5000,stop_words='english')

In [14]:
vector = cv.fit_transform(df['tags']).toarray()

In [15]:
vector.shape

(2160, 5000)

In [16]:
from sklearn.metrics.pairwise import cosine_similarity

In [17]:
similarity = cosine_similarity(vector)

In [18]:
def recommend(movie):
    index = df[df['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(temp.iloc[i[0]])

In [19]:
recommend('192. Word Frequency')

id                                                                   692
title                                          692. Top K Frequent Words
problem_description    [Given, an, array, of, strings, words, and, an...
topic_tags             [HashTable, String, Trie, Sorting, Heap(Priori...
difficulty                                                      [Medium]
Name: 556, dtype: object
id                                                                   194
title                                                194. Transpose File
problem_description    [Given, a, text, file, file.txt,, transpose, i...
topic_tags                                                       [Shell]
difficulty                                                      [Medium]
Name: 185, dtype: object
id                                                                   609
title                                 609. Find Duplicate File in System
problem_description    [Given, a, list, paths, of, directory, info,, ...
t

In [20]:
dff['topic_tags'].index

RangeIndex(start=0, stop=3000, step=1)

In [21]:
dff.sample(3).T

Unnamed: 0,1126,2211,932
id,1127,2212,933
page_number,23,45,19
is_premium,True,True,False
title,1127. User Purchase Platform,2212. Maximum Points in an Archery Competition,933. Number of Recent Calls
problem_description,,,You have a RecentCounter class which counts th...
topic_tags,,,"'Design', 'Queue', 'Data Stream'"
difficulty,Hard,Medium,Easy
similar_questions,,,['']
no_similar_questions,,,0.0
acceptance,48.1,49.6,73.8


In [22]:
dff

Unnamed: 0,id,page_number,is_premium,title,problem_description,topic_tags,difficulty,similar_questions,no_similar_questions,acceptance,accepted,submission,solution,discussion_count,likes,dislikes,problem_URL,solution_URL
0,1,1,False,1. Two Sum,Given an array of integers nums and an integer...,"'Array', 'Hash Table'",Easy,"[""'3Sum'"", ""'4Sum'"", ""'Two Sum II - Input Arra...",21.0,51.0,11300000.0,22100000.0,26800.0,638.0,52700.0,1700.0,https://leetcode.com/problems/two-sum,https://leetcode.com/problems/two-sum/solution
1,2,1,False,2. Add Two Numbers,You are given two non-empty linked lists repre...,"'Linked List', 'Math', 'Recursion'",Medium,"[""'Multiply Strings'"", ""'Add Binary'"", ""'Sum o...",8.0,41.5,4000000.0,9700000.0,15700.0,428.0,28900.0,5600.0,https://leetcode.com/problems/add-two-numbers,https://leetcode.com/problems/add-two-numbers/...
2,3,1,False,3. Longest Substring Without Repeating Characters,"Given a string s, find the length of the longe...","'Hash Table', 'String', 'Sliding Window'",Medium,"[""'Longest Substring with At Most Two Distinct...",9.0,34.1,5100000.0,14900000.0,18100.0,237.0,37700.0,1700.0,https://leetcode.com/problems/longest-substrin...,https://leetcode.com/problems/longest-substrin...
3,4,1,False,4. Median of Two Sorted Arrays,Given two sorted arrays nums1 and nums2 of siz...,"'Array', 'Binary Search', 'Divide and Conquer'",Hard,"[""'Median of a Row Wise Sorted Matrix'""]",1.0,38.3,2200000.0,5800000.0,14100.0,304.0,26600.0,2900.0,https://leetcode.com/problems/median-of-two-so...,https://leetcode.com/problems/median-of-two-so...
4,5,1,False,5. Longest Palindromic Substring,"Given a string s, return the longest palindrom...","'String', 'Dynamic Programming'",Medium,"[""'Shortest Palindrome'"", ""'Palindrome Permuta...",6.0,33.2,2700000.0,8200000.0,9600.0,225.0,27900.0,1600.0,https://leetcode.com/problems/longest-palindro...,https://leetcode.com/problems/longest-palindro...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,2996,60,False,2996. Smallest Missing Integer Greater Than Se...,You are given a 0-indexed array of integers nu...,"'Array', 'Hash Table', 'Sorting'",Easy,"[""'Longest Common Prefix'"", ""'First Missing Po...",3.0,29.4,17900.0,60900.0,186.0,29.0,49.0,136.0,https://leetcode.com/problems/smallest-missing...,https://leetcode.com/problems/smallest-missing...
2996,2997,60,False,2997. Minimum Number of Operations to Make Arr...,You are given a 0-indexed integer array nums a...,"'Array', 'Bit Manipulation'",Medium,"[""'Minimum Bit Flips to Convert Number'""]",1.0,76.1,13500.0,17700.0,156.0,4.0,62.0,4.0,https://leetcode.com/problems/minimum-number-o...,https://leetcode.com/problems/minimum-number-o...
2997,2998,60,False,2998. Minimum Number of Operations to Make X a...,You are given two positive integers x and y.\n...,"'Dynamic Programming', 'Breadth-First Search',...",Medium,"[""'Shortest Bridge'"", ""'Minimum Moves to Sprea...",2.0,40.5,9700.0,23900.0,167.0,16.0,129.0,19.0,https://leetcode.com/problems/minimum-number-o...,https://leetcode.com/problems/minimum-number-o...
2998,2999,60,False,2999. Count the Number of Powerful Integers,"You are given three integers start, finish, an...","'Math', 'String', 'Dynamic Programming'",Hard,"[""'Powerful Integers'"", ""'Numbers With Repeate...",2.0,22.0,2400.0,10900.0,57.0,5.0,62.0,3.0,https://leetcode.com/problems/count-the-number...,https://leetcode.com/problems/count-the-number...


In [24]:
import requests

url = "https://leetcode.com/graphql/"
headers = {
    "Content-Type": "application/json",
}

# Your GraphQL query and variables
data = {
    "query": """
        query pastContests($pageNo: Int, $numPerPage: Int) {
          pastContests(pageNo: $pageNo, numPerPage: $numPerPage) {
            pageNum
            currentPage
            totalNum
            numPerPage
            data {
              title
              titleSlug
              startTime
              originStartTime
              cardImg
              sponsors {
                name
                lightLogo
                darkLogo
              }
            }
          }
        }
    """,
    "variables": {"pageNo": 1},
    "operationName": "pastContests",
}

# Make the GraphQL request
response = requests.post(url, json=data, headers=headers)

# Print the response
print(response.json())

{'data': {'pastContests': {'pageNum': 52, 'currentPage': 1, 'totalNum': 512, 'numPerPage': 10, 'data': [{'title': 'Weekly Contest 383', 'titleSlug': 'weekly-contest-383', 'startTime': 1707013800, 'originStartTime': 1707013800, 'cardImg': None, 'sponsors': []}, {'title': 'Biweekly Contest 123', 'titleSlug': 'biweekly-contest-123', 'startTime': 1706970600, 'originStartTime': 1706970600, 'cardImg': None, 'sponsors': []}, {'title': 'Weekly Contest 382', 'titleSlug': 'weekly-contest-382', 'startTime': 1706409000, 'originStartTime': 1706409000, 'cardImg': None, 'sponsors': []}, {'title': 'Weekly Contest 381', 'titleSlug': 'weekly-contest-381', 'startTime': 1705804200, 'originStartTime': 1705804200, 'cardImg': None, 'sponsors': []}, {'title': 'Biweekly Contest 122', 'titleSlug': 'biweekly-contest-122', 'startTime': 1705761000, 'originStartTime': 1705761000, 'cardImg': None, 'sponsors': []}, {'title': 'Weekly Contest 380', 'titleSlug': 'weekly-contest-380', 'startTime': 1705199400, 'originStar