<a href="https://colab.research.google.com/github/Sagargupta16/LeetCode_Rating_Predictor/blob/main/LC_Contest_Rating_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Machine Learning Model Preparation and Execution Libraries
# This script includes imports for handling data, building and training machine learning models,
# performing numerical computations, and parallel processing.

import json  # For parsing and handling JSON data
import numpy as np  # For numerical operations, mainly with arrays
import pandas as pd  # For data manipulation and analysis, especially with tabular data
import requests  # For making HTTP requests to web servers
import tensorflow as tf  # For building and training machine learning models
import time  # For working with time-related functions
from sklearn.model_selection import train_test_split  # For splitting datasets into training and testing sets
from sklearn.preprocessing import MinMaxScaler  # For feature scaling, specifically min-max normalization
from tensorflow.keras.models import Sequential  # For creating a linear stack of neural network layers
from tensorflow.keras.layers import Dense, LSTM  # Dense for fully connected layers, LSTM for Long Short-Term Memory layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping  # Callbacks for saving models and early stopping
from concurrent.futures import ThreadPoolExecutor  # For executing calls asynchronously
import joblib  # For saving and loading Python objects that make use of NumPy data structures
from IPython.display import Javascript

# Set the data rate limit to a higher value (e.g., 10 MB/s)
data_rate_limit = 10 * 1024 * 1024  # 10 MB/s
Javascript(f"IPython.notebook.kernel.execute('NotebookApp.iopub_data_rate_limit = {data_rate_limit}')")


<IPython.core.display.Javascript object>

In [3]:
# Define GraphQL Query and Headers

# GraphQL query for fetching user contest data.
# The query requests the contest ranking history of a user including fields like
# attendance status, rating, ranking, and contest title.
# It takes a username as a variable ($username).
query = """
query userContestRankingInfo($username: String!) {
    userContestRankingHistory(username: $username) {
        attended
        rating
        ranking
        contest {
            title
        }
    }
}
"""

# Headers for the GraphQL request.
# "Content-Type: application/json" indicates that the request body format is JSON,
# which is a common requirement for GraphQL APIs.
headers = {"Content-Type": "application/json"}

In [4]:
# LeetCode Contest Data Fetching Functions
# Session for making HTTP requests
session = requests.Session()

# Fetch the count of users participating in a given contest
def fetch_contest_users_count(contest_title):
    url = f"https://leetcode.com/contest/api/ranking/{contest_title}/"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {"error": "Failed to fetch data"}

# Read a specified number of usernames from a JSON file
def read_usernames_from_json(file_path, number_of_usernames=10000):
    with open(file_path, 'r') as file:
        all_usernames = json.load(file)
        return all_usernames[:number_of_usernames]

# Fetch contest ranking history data for a given username
def fetch_data(username):
    try:
        response = requests.post(
            "https://leetcode.com/graphql",
            json={"query": query, "variables": {"username": username}}
        )
        response.raise_for_status()  # Raise HTTPError for bad responses
        if response.status_code == 200:
            return response.json().get("data", {}).get("userContestRankingHistory", [])
        else:
            print(f"Error fetching data for username {username}: {response.status_code}")
            return []  # Return an empty list in case of error
    except requests.exceptions.RequestException as e:
        print(f"Request error for username {username}: {e}")
        return []

In [5]:
# Process user contest data to generate a structured dataset
constest_participants = {}

def process_data(contests):
    data = []
    rating=1500
    z=0
    for contest in contests:
        if contest["attended"]:
            prev_rating = rating
            rating = contest["rating"]
            ranking = contest["ranking"]
            contest_title = contest["contest"]["title"]
            contest_title = contest_title.lower().replace(" ", "-")
            words = contest_title.split("-")
            first_word, last_word = words[0], words[-1]
            if first_word == "weekly" and last_word.isdigit() and int(last_word) < 58:
              contest_title = "leetcode-" + contest_title
            if contest_title == "weekly-contest-62" :
              contest_title = "weekly-contest-by-app-academy"
            if(contest_title not in constest_participants):
              x = fetch_contest_users_count(contest_title)
              if "user_num" not in x :
                break
              constest_participants[contest_title] = x["user_num"]
            total_participants = constest_participants[contest_title]
            data.append([contest_title,prev_rating,rating,ranking,total_participants,z])
            z+=1
    return data

def process_batch_parallel(usernames_batch, max_workers=100):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = executor.map(fetch_data, usernames_batch)

    data = []
    for contests in results:
        if contests:  # Check if contests is not empty
            data.extend(process_data(contests))

    return data


usernames = read_usernames_from_json('usernames.json')
all_data = []

batch_size = 100

for i in range(0, len(usernames), batch_size):
    all_data.extend(process_batch_parallel(usernames[i:i + batch_size]))
    time.sleep(1)

dfa = pd.DataFrame(all_data, columns=['Title','Prev_Rating','Rating','Rank','Participants','Attended'])
print(dfa)
# Save the DataFrame to a JSON file
dfa.to_json('data.json', orient='records', lines=True)

Request error for username Gurmeet_Singh: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))Request error for username illusion7: HTTPSConnectionPool(host='leetcode.com', port=443): Max retries exceeded with url: /graphql (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)')))
Request error for username ecstatic-teslagyi: HTTPSConnectionPool(host='leetcode.com', port=443): Max retries exceeded with url: /graphql (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))Request error for username MaoZZ_17: HTTPSConnectionPool(host='leetcode.com', port=443): Max retries exceeded with url: /graphql (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)')))

Request error for username atulgarg: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without res

In [28]:
dfa = pd.read_json('data.json', orient='records', lines=True)

df = pd.DataFrame()
df['input1'] = dfa['Prev_Rating']
df['input2'] = dfa['Rank']/dfa['Participants']
df['input3'] = dfa['Attended']
df['output'] = dfa['Rating']-dfa['Prev_Rating']
print(df)

# Separate the features (X) and target variable (y)
X = df.iloc[:, :-1].values
y = df['output'].values

# Apply Min-Max scaling to the features for normalization
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Save the scaler for future use
joblib.dump(scaler, 'scaler.save')

# Reshape X for compatibility with machine learning models (if needed)
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1)

          input1    input2  input3   output
0       1500.000  0.008437       0  320.626
1       1820.626  0.003789       1  199.425
2       2020.051  0.008706       2  102.787
3       2122.838  0.008959       3   73.217
4       2196.055  0.027816       4   19.398
...          ...       ...     ...      ...
208396  2079.243  0.048267      26    9.020
208397  2088.263  0.083241      27    1.331
208398  2089.594  0.220729      28  -34.172
208399  2055.422  0.068928      29   11.030
208400  2066.452  0.110744      30  -12.117

[208401 rows x 4 columns]


In [29]:
# Ensure using a GPU if available for model training
with tf.device('/device:GPU:0'):
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(100, activation='tanh', recurrent_activation='sigmoid', input_shape=(1, 3)),
        tf.keras.layers.Dense(1)
    ])

    # Define the learning rate for the optimizer
    learning_rate = 0.01
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    # Compile the model with Adam optimizer and mean squared error loss function
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Train the model using the training dataset
    model.fit(X_train, y_train, epochs=50, batch_size=32)

    # Evaluate the model's performance on the test dataset
    loss = model.evaluate(X_test, y_test)
    print("Test Loss:", loss)

    # Save the trained model to a file for later use
    model.save('model.keras')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 65.79061889648438


In [30]:
import numpy as np
import tensorflow as tf
import joblib
import requests

# Load the trained model and the scaler
model = tf.keras.models.load_model('/content/model.keras')
scaler = joblib.load('/content/scaler.save')

# GraphQL query for fetching user contest data
query = """
query userContestRankingInfo($username: String!) {
        userContestRanking(username: $username) {
            attendedContestsCount
            rating
        }
    }
"""

# Headers for the GraphQL request
headers = {"Content-Type": "application/json"}

# Fetch data for a given username using GraphQL
def fetch_data(username):
    response = requests.post(
        "https://leetcode.com/graphql",
        headers=headers,
        json={"query": query, "variables": {"username": username}}
    )
    if response.status_code == 200:
        return response.json().get("data", {}).get("userContestRanking", [])
    else:
        print(f"Error fetching data for username {username}: {response.status_code}")
        return []  # Return an empty list in case of error

# Prompt the user for input and fetch necessary data
def get_user_input():
    print("Enter the input values:")
    username = "sagargupta1610"
    data = fetch_data(username)
    input1 = 1957
    input2 = 1869
    input3 = 21165
    input4 = input2/input3 if input3 != 0 else 0
    input5 = data["attendedContestsCount"] - 1
    return np.array([[input1, input4, input5]])

# Normalize and make a prediction based on user input
def make_prediction(input_data):
    input_scaled = scaler.transform(input_data)
    input_scaled = input_scaled.reshape((input_scaled.shape[0], 1, input_scaled.shape[1]))
    prediction = model.predict(input_scaled)
    return prediction[0][0]

# Main function to run the application
def main():
    user_input = get_user_input()
    prediction = make_prediction(user_input)
    print(f"Your Current Rating is: {user_input[0][0]}")
    print(f"You have Participated in {user_input[0][2]} contests")
    print(f"Predicted change in rating: {prediction}")
    print(f"Your future rating will be: {user_input[0][0] + prediction}")

if __name__ == "__main__":
    main()


Enter the input values:
Your Current Rating is: 1957.0
You have Participated in 87.0 contests
Predicted change in rating: 12.75726318359375
Your future rating will be: 1969.7572631835938
