<a href="https://colab.research.google.com/github/Sagargupta16/LeetCode_Rating_Predictor/blob/main/LC_Contest_Rating_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [65]:
# Machine Learning Model Preparation and Execution Libraries
# This script includes imports for handling data, building and training machine learning models,
# performing numerical computations, and parallel processing.

import json  # For parsing and handling JSON data
import numpy as np  # For numerical operations, mainly with arrays
import pandas as pd  # For data manipulation and analysis, especially with tabular data
import requests  # For making HTTP requests to web servers
import tensorflow as tf  # For building and training machine learning models
import time  # For working with time-related functions
from sklearn.model_selection import train_test_split  # For splitting datasets into training and testing sets
from sklearn.preprocessing import MinMaxScaler  # For feature scaling, specifically min-max normalization
from tensorflow.keras.models import Sequential  # For creating a linear stack of neural network layers
from tensorflow.keras.layers import Dense, LSTM  # Dense for fully connected layers, LSTM for Long Short-Term Memory layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping  # Callbacks for saving models and early stopping
from concurrent.futures import ThreadPoolExecutor  # For executing calls asynchronously
import joblib  # For saving and loading Python objects that make use of NumPy data structures

In [66]:
# Define GraphQL Query and Headers

# GraphQL query for fetching user contest data.
# The query requests the contest ranking history of a user including fields like
# attendance status, rating, ranking, and contest title.
# It takes a username as a variable ($username).
query = """
query userContestRankingInfo($username: String!) {
    userContestRankingHistory(username: $username) {
        attended
        rating
        ranking
        contest {
            title
        }
    }
}
"""

# Headers for the GraphQL request.
# "Content-Type: application/json" indicates that the request body format is JSON,
# which is a common requirement for GraphQL APIs.
headers = {"Content-Type": "application/json"}

In [67]:
# LeetCode Contest Data Fetching Functions

# Session for making HTTP requests
session = requests.Session()

# Fetch the count of users participating in a given contest
def fetch_contest_users_count(contest_title):
    url = f"https://leetcode.com/contest/api/ranking/{contest_title}/"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {"error": "Failed to fetch data"}

# Read a specified number of usernames from a JSON file
def read_usernames_from_json(file_path, number_of_usernames=500):
    with open(file_path, 'r') as file:
        all_usernames = json.load(file)
        return all_usernames[:number_of_usernames]

# Fetch contest ranking history data for a given username
def fetch_data(username):
    response = requests.post(
        "https://leetcode.com/graphql",
        headers=headers,
        json={"query": query, "variables": {"username": username}}
    )
    if response.status_code == 200:
        return response.json().get("data", {}).get("userContestRankingHistory", [])
    else:
        print(f"Error fetching data for username {username}: {response.status_code}")
        return []  # Return an empty list in case of error

In [70]:
# Process user contest data to generate a structured dataset
def process_data(contests):
    data = []
    rating = 1500
    z = 0
    for contest in contests:
        if contest["attended"]:
            rating, ranking = contest["rating"], contest["ranking"]
            contest_title = contest["contest"]["title"].lower().replace(" ", "-")
            if "weekly" in contest_title and contest_title.split("-")[-1].isdigit():
                contest_title = "leetcode-" + contest_title
            contest_title = "weekly-contest-by-app-academy" if contest_title == "weekly-contest-62" else contest_title

            if contest_title not in contest_participants:
                x = fetch_contest_users_count(contest_title)
                contest_participants[contest_title] = x.get("user_num", 0)

            total_participants = contest_participants[contest_title]
            data.append([rating, ranking, total_participants, (ranking / total_participants) * 100 if total_participants else 0, z, rating - rating])
            z += 1
    return data

# Process a batch of usernames in parallel to fetch and process their contest data
def process_batch_parallel(usernames, max_workers=100):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = executor.map(fetch_data, usernames)
    return [item for sublist in map(process_data, results) if sublist for item in sublist]

# Main script: Read usernames and process their data in batches
usernames = read_usernames_from_json('usernames.json')
all_data = []

for i in range(0, len(usernames), 250):
    all_data.extend(process_batch_parallel(usernames[i:i + 250]))
    time.sleep(1)

In [71]:
# Convert the aggregated data into a DataFrame and output it
df = pd.DataFrame(all_data, columns=['input1', 'input2', 'input3' , 'input4' ,'input5' , 'output'])
print(df)

# Save the DataFrame to a JSON file
df.to_json('data.json', orient='records', lines=True)

# Separate the features (X) and target variable (y)
X = df.iloc[:, :-1].values
y = df['output'].values

# Apply Min-Max scaling to the features for normalization
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Save the scaler for future use
joblib.dump(scaler, 'scaler.save')

# Reshape X for compatibility with machine learning models (if needed)
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3)

         input1  input2  input3     input4  input5   output
0      1500.000     110   13038   0.843688       0  320.626
1      1820.626      45   11877   0.378884       1  199.425
2      2020.051      69    7926   0.870553       2  102.787
3      2122.838     119   13283   0.895882       3   73.217
4      2196.055     384   13805   2.781601       4   19.398
...         ...     ...     ...        ...     ...      ...
10773  1454.323    5098   23404  21.782601       3   46.008
10774  1500.000     328    6631   4.946464       0  215.392
10775  1715.392    1027    7873  13.044583       1   44.149
10776  1759.541    1376   17435   7.892171       2   41.787
10777  1801.328    2413   26069   9.256205       3   28.621

[10778 rows x 6 columns]


In [72]:
# Ensure using a GPU if available for model training
with tf.device('/device:GPU:0'):

    # Define an LSTM model. The model architecture includes an LSTM layer followed by a Dense layer.
    # The activation functions are set to 'leaky_relu' for the LSTM and default (linear) for the Dense layer.
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(100, activation='leaky_relu', recurrent_activation='sigmoid', input_shape=(1, 5)),
        tf.keras.layers.Dense(1)
    ])

    # Define the learning rate for the optimizer
    learning_rate = 0.01
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    # Compile the model with Adam optimizer and mean squared error loss function
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Train the model using the training dataset
    model.fit(X_train, y_train, epochs=100, batch_size=32)

    # Evaluate the model's performance on the test dataset
    loss = model.evaluate(X_test, y_test)
    print("Test Loss:", loss)

    # Save the trained model to a file for later use
    model.save('model.keras')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [86]:
import numpy as np
import tensorflow as tf
import joblib
import requests

# Load the trained model and the scaler
model = tf.keras.models.load_model('/content/model.keras')
scaler = joblib.load('/content/scaler.save')

# GraphQL query for fetching user contest data
query = """
query userContestRankingInfo($username: String!) {
        userContestRanking(username: $username) {
            attendedContestsCount
            rating
        }
    }
"""

# Headers for the GraphQL request
headers = {"Content-Type": "application/json"}

# Fetch data for a given username using GraphQL
def fetch_data(username):
    response = requests.post(
        "https://leetcode.com/graphql",
        headers=headers,
        json={"query": query, "variables": {"username": username}}
    )
    if response.status_code == 200:
        return response.json().get("data", {}).get("userContestRanking", [])
    else:
        print(f"Error fetching data for username {username}: {response.status_code}")
        return []  # Return an empty list in case of error

# Prompt the user for input and fetch necessary data
def get_user_input():
    print("Enter the input values:")
    username = input("Enter your username: ")
    data = fetch_data(username)
    input1 = data["rating"]
    input2 = int(input("Enter your ranking: "))
    input3 = int(input("Enter Total Participants: "))
    input4 = (input2*100) / input3 if input3 != 0 else 0
    input5 = data["attendedContestsCount"]
    return np.array([[input1, input2, input3, input4, input5]])

# Normalize and make a prediction based on user input
def make_prediction(input_data):
    input_scaled = scaler.transform(input_data)
    input_scaled = input_scaled.reshape((input_scaled.shape[0], 1, input_scaled.shape[1]))
    prediction = model.predict(input_scaled)
    return prediction[0][0]

# Main function to run the application
def main():
    user_input = get_user_input()
    prediction = make_prediction(user_input)
    print(f"Your Current Rating is: {user_input[0][0]}")
    print(f"You have Participated in {user_input[0][4]} contests")
    print(f"Predicted change in rating: {prediction}")
    print(f"Your future rating will be: {user_input[0][0] + prediction}")

if __name__ == "__main__":
    main()


Enter the input values:
Enter your username: sagargupta1610
Enter your ranking: 8000
Enter Total Participants: 20000
Your Current Rating is : 1964.5613925584812
You have Participated in 85.0 contests
Predicted change in rating: -38.431880950927734
your Rating will get: 1926.1295116075535
