In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf
from pyspark.sql.types import ArrayType, FloatType, StructType, StructField, StringType
from scipy.spatial.distance import cosine
import numpy as np
import os
from consts import QUESTIONS_PATH, JOBS_PATH, open_csv_file

# Initialize Spark session
spark = SparkSession.builder.appName("InterviewQuestionSelector").getOrCreate()
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [0]:
# Load datasets into Spark DataFrames
job_postings = open_csv_file(spark, JOBS_PATH, 'all_jobpostings.csv')
code_questions = open_csv_file(spark, QUESTIONS_PATH, 'all_code_problems_with_solutions.csv')
open_questions = open_csv_file(spark, QUESTIONS_PATH, 'all_open_questions.csv')

# Preprocessing function to handle missing values and ensure string type
def preprocess_column_spark(df, column):
    df = df.withColumn(column, col(column).cast("string"))
    df = df.fillna({column: ""})
    return df

# Preprocess columns in the datasets
job_postings = preprocess_column_spark(job_postings, 'job_summary')
code_questions = preprocess_column_spark(code_questions, 'topics')
open_questions = preprocess_column_spark(open_questions, 'question')

In [0]:
pip install -q -U google-generativeai

## Job postings: filling in missing skills

In [0]:
from pyspark.sql.functions import udf, concat_ws
from pyspark.sql.types import ArrayType, StringType
import ast

# Define a UDF to safely parse the string to a list
def parse_skills(skills_str):
    try:
        return ast.literal_eval(skills_str)
    except (ValueError, SyntaxError):
        return []

parse_skills_udf = udf(parse_skills, ArrayType(StringType()))

# Apply the UDF to create a proper list column
job_postings = job_postings.withColumn("skills_list", parse_skills_udf("skills"))

# Convert the skills list to a single string
job_postings = job_postings.withColumn("skills_string", concat_ws(", ", "skills_list")) \
    .drop("skills", "skills_list").withColumnRenamed("skills_string", "skills")

In [0]:
import pandas as pd

# Convert dataset to pandas
job_postings_pandas = job_postings.toPandas()
empty_skills_count = job_postings_pandas[job_postings_pandas['skills'] == ''].shape[0]
print(empty_skills_count)

9551


In [0]:
import google.generativeai as genai
import os
import time
from api_keys import API_KEYS

def infer_skills(job_summary):
    """
    Extracts skills from a job description using the Gemini model.

    Args:
        job_summary: The job description text.

    Returns:
        A comma-separated string of skills extracted from the job description.
    """

    if pd.isna(job_summary) or str(job_summary).strip() == '':
        return ''

    prompt = f"Infer a comma-separated list of skills required for the following job description:\n{job_summary}"

    try:
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return ''
    
start_time = time.time()
running_time = 0
empty_skills_rows = job_postings_pandas[(job_postings_pandas['skills'] == '') & (job_postings_pandas['job_summary'].str.strip() != '')]

while (running_time < 3600) and (empty_skills_rows.shape[0] > 0):
    for api_key in API_KEYS.values():
        # Configure Gemini API
        os.environ['GOOGLE_API_KEY'] = api_key
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Filter the rows where the "skills" column is empty
        empty_skills_rows = job_postings_pandas[(job_postings_pandas['skills'] == '') & (job_postings_pandas['job_summary'].str.strip() != '')]
        if empty_skills_rows.shape[0] == 0:
            break

        # Get the indices of the first 15 rows with empty "skills"
        indices_to_update = empty_skills_rows.index[:15]

        # Apply the UDF only to the selected rows
        job_postings_pandas.loc[indices_to_update, 'skills'] = (
            job_postings_pandas.loc[indices_to_update, 'job_summary']
                .apply(infer_skills)
        )
    running_time = time.time() - start_time

In [0]:
job_postings_pandas['skills'] = job_postings_pandas['skills'].fillna('')
empty_skills_count = job_postings_pandas[job_postings_pandas['skills'] == ''].shape[0]
job_postings_pandas['job_summary'] = job_postings_pandas['job_summary'].fillna('')
empty_job_summaries = job_postings_pandas[job_postings_pandas['job_summary'].str.strip() == ''].shape[0]
print("empty strings:", empty_skills_count)
print("empty job summaries:", empty_job_summaries)

empty strings: 631
empty job summaries: 631


In [0]:
from consts import JOBS_PATH
import os

job_postings_with_skills = spark.createDataFrame(job_postings_pandas)
job_postings_pandas.to_csv(os.path.join(JOBS_PATH, 'all_jobpostings_with_skills.csv'), index=False)

## Code questions: filling in missing topics

In [0]:
from pyspark.sql.functions import regexp_replace, concat_ws, split, col, expr

# Format the topics column to be a string containing comma-separated topics.
code_questions = code_questions.withColumn("topics_array", split(col("topics"), ", ")) \
    .withColumn("topics_array_cleaned", expr("transform(topics_array, x -> regexp_replace(x, \"'\", \"\"))")) \
    .withColumn("topics_formatted", concat_ws(", ", col("topics_array_cleaned"))) \
    .drop("topics_array", "topics_array_cleaned", "topics").withColumnRenamed("topics_formatted", "topics")

code_questions_pandas = code_questions.toPandas()

In [0]:
import google.generativeai as genai
import os
import time
from api_keys import API_KEYS

def extract_topics_from_question(question):
   if pd.isna(question) or question.strip() == '':
       return ''
   
   prompt = f"Analyze the following question and identify the specific skills being tested or evaluated. Return the skills as a comma-separated list of skills. If the question does not test any skills, return an empty string. Question: {question}"
   
   try:
       response = model.generate_content(prompt)
       skills = response.text.strip()
       return skills if skills else ''
   except Exception as e:
       return ''
   

start_time = time.time()
running_time = 0
empty_topics_rows = code_questions_pandas[(code_questions_pandas['topics'] == '') & (code_questions_pandas['question'].str.strip() != '')]

while (running_time < 900) and (empty_topics_rows.shape[0] > 0):
    for api_key in API_KEYS.values():
        # Configure Gemini API
        os.environ['GOOGLE_API_KEY'] = api_key
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Filter the rows where the "topics" column is empty
        empty_topics_rows = code_questions_pandas[(code_questions_pandas['topics'] == '') & (code_questions_pandas['question'].str.strip() != '')]
        if empty_topics_rows.shape[0] == 0:
            break

        # Get the indices of the first 15 rows with empty "topics"
        indices_to_update = empty_topics_rows.index[:15]

        # Apply the UDF only to the selected rows
        code_questions_pandas.loc[indices_to_update, 'topics'] = (
            code_questions_pandas.loc[indices_to_update, 'question']
                .apply(extract_topics_from_question)
        )
    running_time = time.time() - start_time

In [0]:
empty_topics_count = code_questions_pandas[code_questions_pandas['topics'].isna()].shape[0]
print("nulls:", empty_topics_count)
code_questions_pandas['topics'] = code_questions_pandas['topics'].fillna('')
empty_topics_count = code_questions_pandas[code_questions_pandas['topics'] == ''].shape[0]
code_questions_pandas['question'] = code_questions_pandas['question'].fillna('')
empty_questions = code_questions_pandas[code_questions_pandas['question'].str.strip() == ''].shape[0]
print("empty strings:", empty_topics_count)
print("empty questions:", empty_questions)

nulls: 0
empty strings: 0
empty questions: 0


In [0]:
from consts import QUESTIONS_PATH
import os

code_questions_with_topics = spark.createDataFrame(code_questions_pandas)
code_questions_pandas.to_csv(os.path.join(QUESTIONS_PATH, 'all_code_questions_with_topics.csv'), index=False)

In [0]:
code_questions_with_topics.display()

formatted_title,difficulty,question_id,question,similar_questions,no_similar_questions,acceptance,solution_URL,solution,topics
longest-substring-without-repeating-characters,Medium,3,"Given a string s, find the length of the longest substring without repeating characters.  Example 1: Input: s = ""abcabcbb"" Output: 3 Explanation: The answer is ""abc"", with the length of 3. Example 2: Input: s = ""bbbbb"" Output: 1 Explanation: The answer is ""b"", with the length of 1. Example 3: Input: s = ""pwwkew"" Output: 3 Explanation: The answer is ""wke"", with the length of 3. Notice that the answer must be a substring, ""pwke"" is a subsequence and not a substring.  Constraints: 0 <= s.length <= 5 * 104 s consists of English letters, digits, symbols and spaces.","[""'Longest Substring with At Most Two Distinct Characters'"", ""'Longest Substring with At Most K Distinct Characters'"", ""'Subarrays with K Different Integers'"", ""'Maximum Erasure Value'"", ""'Number of Equal Count Substrings'"", ""'Minimum Consecutive Cards to Pick Up'"", ""'Longest Nice Subarray'"", ""'Optimal Partition of String'"", ""'Count Complete Subarrays in an Array'""]",9.0,34.1,https://leetcode.com/problems/longest-substring-without-repeating-characters/solution,,"Hash Table, String, Sliding Window"
median-of-two-sorted-arrays,Hard,4,"Given two sorted arrays nums1 and nums2 of size m and n respectively, return the median of the two sorted arrays. The overall run time complexity should be O(log (m+n)) . Example 1: Input: nums1 = [1,3], nums2 = [2] Output: 2.00000 Explanation: merged array = [1,2,3] and median is 2. Example 2: Input: nums1 = [1,2], nums2 = [3,4] Output: 2.50000 Explanation: merged array = [1,2,3,4] and median is (2 + 3) / 2 = 2.5. Constraints: nums1.length == m nums2.length == n 0 <= m <= 1000 0 <= n <= 1000 1 <= m + n <= 2000 -10 6 <= nums1[i], nums2[i] <= 10 6",,,,,,"Algorithm design,Time complexity analysis,Binary search"
longest-palindromic-substring,Medium,5,"Given a string s, return the longest palindromic substring in s.  Example 1: Input: s = ""babad"" Output: ""bab"" Explanation: ""aba"" is also a valid answer. Example 2: Input: s = ""cbbd"" Output: ""bb""  Constraints: 1 <= s.length <= 1000 s consist of only digits and English letters.","[""'Shortest Palindrome'"", ""'Palindrome Permutation'"", ""'Palindrome Pairs'"", ""'Longest Palindromic Subsequence'"", ""'Palindromic Substrings'"", ""'Maximum Number of Non-overlapping Palindrome Substrings'""]",6.0,33.2,https://leetcode.com/problems/longest-palindromic-substring/solution,"package com.fishercoder.solutions.firstthousand; import java.util.HashMap; import java.util.Map; public class _1 {  public static class Solution1 {  public int[] twoSum(int[] nums, int target) {  Map map = new HashMap();  for (int i = 0; i < nums.length; i++) {  if (map.containsKey(target - nums[i])) {  return new int[] {map.get(target - nums[i]), i};  } else {  map.put(nums[i], i);  }  }  return new int[] {-1, -1};  }  } }","String, Dynamic Programming"
reverse-integer,Medium,7,"Given a signed 32-bit integer x , return x with its digits reversed . If reversing x causes the value to go outside the signed 32-bit integer range [-2 31 , 2 31 - 1] , then return 0 . Assume the environment does not allow you to store 64-bit integers (signed or unsigned). Example 1: Input: x = 123 Output: 321 Example 2: Input: x = -123 Output: -321 Example 3: Input: x = 120 Output: 21 Constraints: -2 31 <= x <= 2 31 - 1",,,,,,"Mathematical skills, Programming skills, Algorithm design, Integer manipulation, Edge case handling, Error handling"
reverse-integer,Medium,7,"Given a signed 32-bit integer x, return x with its digits reversed. If reversing x causes the value to go outside the signed 32-bit integer range [-231, 231 - 1], then return 0. Assume the environment does not allow you to store 64-bit integers (signed or unsigned).  Example 1: Input: x = 123 Output: 321 Example 2: Input: x = -123 Output: -321 Example 3: Input: x = 120 Output: 21  Constraints: -231 <= x <= 231 - 1","[""'String to Integer (atoi)'"", ""'Reverse Bits'"", ""'A Number After a Double Reversal'"", ""'Count Number of Distinct Integers After Reverse Operations'""]",4.0,27.9,https://leetcode.com/problems/reverse-integer/solution,,Math
string-to-integer-atoi,Medium,8,"Implement the myAtoi(string s) function, which converts a string to a 32-bit signed integer (similar to C/C++'s atoi function). The algorithm for myAtoi(string s) is as follows: Read in and ignore any leading whitespace. Check if the next character (if not already at the end of the string) is '-' or '+'. Read this character in if it is either. This determines if the final result is negative or positive respectively. Assume the result is positive if neither is present. Read in next the characters until the next non-digit character or the end of the input is reached. The rest of the string is ignored. Convert these digits into an integer (i.e. ""123"" -> 123, ""0032"" -> 32). If no digits were read, then the integer is 0. Change the sign as necessary (from step 2). If the integer is out of the 32-bit signed integer range [-231, 231 - 1], then clamp the integer so that it remains in the range. Specifically, integers less than -231 should be clamped to -231, and integers greater than 231 - 1 should be clamped to 231 - 1. Return the integer as the final result. Note: Only the space character ' ' is considered a whitespace character. Do not ignore any characters other than the leading whitespace or the rest of the string after the digits.  Example 1: Input: s = ""42"" Output: 42 Explanation: The underlined characters are what is read in, the caret is the current reader position. Step 1: ""42"" (no characters read because there is no leading whitespace)  ^ Step 2: ""42"" (no characters read because there is neither a '-' nor '+')  ^ Step 3: ""42"" (""42"" is read in)  ^ The parsed integer is 42. Since 42 is in the range [-231, 231 - 1], the final result is 42. Example 2: Input: s = "" -42"" Output: -42 Explanation: Step 1: "" -42"" (leading whitespace is read and ignored)  ^ Step 2: "" -42"" ('-' is read, so the result should be negative)  ^ Step 3: "" -42"" (""42"" is read in)  ^ The parsed integer is -42. Since -42 is in the range [-231, 231 - 1], the final result is -42. Example 3: Input: s = ""4193 with words"" Output: 4193 Explanation: Step 1: ""4193 with words"" (no characters read because there is no leading whitespace)  ^ Step 2: ""4193 with words"" (no characters read because there is neither a '-' nor '+')  ^ Step 3: ""4193 with words"" (""4193"" is read in; reading stops because the next character is a non-digit)  ^ The parsed integer is 4193. Since 4193 is in the range [-231, 231 - 1], the final result is 4193.  Constraints: 0 <= s.length <= 200 s consists of English letters (lower-case and upper-case), digits (0-9), ' ', '+', '-', and '.'.","[""'Reverse Integer'"", ""'Valid Number'"", ""'Check if Numbers Are Ascending in a Sentence'""]",3.0,16.8,https://leetcode.com/problems/string-to-integer-atoi/solution,"package com.fishercoder.solutions.firstthousand; import java.util.HashMap; import java.util.Map; public class _1 {  public static class Solution1 {  public int[] twoSum(int[] nums, int target) {  Map map = new HashMap();  for (int i = 0; i < nums.length; i++) {  if (map.containsKey(target - nums[i])) {  return new int[] {map.get(target - nums[i]), i};  } else {  map.put(nums[i], i);  }  }  return new int[] {-1, -1};  }  } }",String
palindrome-number,Easy,9,"Given an integer x , return true if x is a palindrome , and false otherwise . Example 1: Input: x = 121 Output: true Explanation: 121 reads as 121 from left to right and from right to left. Example 2: Input: x = -121 Output: false Explanation: From left to right, it reads -121. From right to left, it becomes 121-. Therefore it is not a palindrome. Example 3: Input: x = 10 Output: false Explanation: Reads 01 from right to left. Therefore it is not a palindrome. Constraints: -2 31 <= x <= 2 31 - 1 Follow up: Could you solve it without converting the integer to a string?",,,,,"package com.fishercoder.solutions.firstthousand; import java.util.HashMap; import java.util.Map; public class _1 {  public static class Solution1 {  public int[] twoSum(int[] nums, int target) {  Map map = new HashMap();  for (int i = 0; i < nums.length; i++) {  if (map.containsKey(target - nums[i])) {  return new int[] {map.get(target - nums[i]), i};  } else {  map.put(nums[i], i);  }  }  return new int[] {-1, -1};  }  } }","Mathematical reasoning,Algorithmic thinking,Coding,Problem-solving"
regular-expression-matching,Hard,10,"Given an input string s and a pattern p , implement regular expression matching with support for '.' and '*' where: '.' Matches any single character.​​​​ '*' Matches zero or more of the preceding element. The matching should cover the entire input string (not partial). Example 1: Input: s = ""aa"", p = ""a"" Output: false Explanation: ""a"" does not match the entire string ""aa"". Example 2: Input: s = ""aa"", p = ""a*"" Output: true Explanation: '*' means zero or more of the preceding element, 'a'. Therefore, by repeating 'a' once, it becomes ""aa"". Example 3: Input: s = ""ab"", p = "".*"" Output: true Explanation: "".*"" means ""zero or more (*) of any character (.)"". Constraints: 1 <= s.length <= 20 1 <= p.length <= 20 s contains only lowercase English letters. p contains only lowercase English letters, '.' , and '*' . It is guaranteed for each appearance of the character '*' , there will be a previous valid character to match.",,,,,,"Dynamic programming,Recursion,String manipulation,Regular expression understanding"
regular-expression-matching,Hard,10,"Given an input string s and a pattern p, implement regular expression matching with support for '.' and '*' where: '.' Matches any single character.​​​​ '*' Matches zero or more of the preceding element. The matching should cover the entire input string (not partial).  Example 1: Input: s = ""aa"", p = ""a"" Output: false Explanation: ""a"" does not match the entire string ""aa"". Example 2: Input: s = ""aa"", p = ""a*"" Output: true Explanation: '*' means zero or more of the preceding element, 'a'. Therefore, by repeating 'a' once, it becomes ""aa"". Example 3: Input: s = ""ab"", p = "".*"" Output: true Explanation: "".*"" means ""zero or more (*) of any character (.)"".  Constraints: 1 <= s.length <= 20 1 <= p.length <= 20 s contains only lowercase English letters. p contains only lowercase English letters, '.', and '*'. It is guaranteed for each appearance of the character '*', there will be a previous valid character to match.","[""'Wildcard Matching'""]",1.0,27.9,https://leetcode.com/problems/regular-expression-matching/solution,,"String, Dynamic Programming, Recursion"
integer-to-roman,Medium,12,"Seven different symbols represent Roman numerals with the following values: Symbol Value I 1 V 5 X 10 L 50 C 100 D 500 M 1000 Roman numerals are formed by appending the conversions of decimal place values from highest to lowest. Converting a decimal place value into a Roman numeral has the following rules: If the value does not start with 4 or 9, select the symbol of the maximal value that can be subtracted from the input, append that symbol to the result, subtract its value, and convert the remainder to a Roman numeral. If the value starts with 4 or 9 use the subtractive form representing one symbol subtracted from the following symbol, for example, 4 is 1 ( I ) less than 5 ( V ): IV and 9 is 1 ( I ) less than 10 ( X ): IX . Only the following subtractive forms are used: 4 ( IV ), 9 ( IX ), 40 ( XL ), 90 ( XC ), 400 ( CD ) and 900 ( CM ). Only powers of 10 ( I , X , C , M ) can be appended consecutively at most 3 times to represent multiples of 10. You cannot append 5 ( V ), 50 ( L ), or 500 ( D ) multiple times. If you need to append a symbol 4 times use the subtractive form . Given an integer, convert it to a Roman numeral. Example 1: Input: num = 3749 Output: ""MMMDCCXLIX"" Explanation: 3000 = MMM as 1000 (M) + 1000 (M) + 1000 (M) 700 = DCC as 500 (D) + 100 (C) + 100 (C) 40 = XL as 10 (X) less of 50 (L) 9 = IX as 1 (I) less of 10 (X) Note: 49 is not 1 (I) less of 50 (L) because the conversion is based on decimal places Example 2: Input: num = 58 Output: ""LVIII"" Explanation: 50 = L 8 = VIII Example 3: Input: num = 1994 Output: ""MCMXCIV"" Explanation: 1000 = M 900 = CM 90 = XC 4 = IV Constraints: 1 <= num <= 3999",,,,,,"Problem-solving,Algorithm design,Coding,Roman numeral conversion,Number conversion"


## Open questions: filling in missing topics

In [0]:
import pandas as pd

# Convert dataset to pandas
open_questions_pandas = open_questions.toPandas()
open_questions_pandas['topics'] = ''

In [0]:
import google.generativeai as genai
import os
from api_keys import API_KEYS
   
start_time = time.time()
running_time = 0
empty_topics_rows = open_questions_pandas[(open_questions_pandas['topics'] == '') & (open_questions_pandas['question'].str.strip() != '')]

while (running_time < 900) and (empty_topics_rows.shape[0] > 0):
    for api_key in API_KEYS.values():
        # Configure Gemini API
        os.environ['GOOGLE_API_KEY'] = api_key
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        model = genai.GenerativeModel('gemini-1.5-flash')

        empty_topics_rows = open_questions_pandas[(open_questions_pandas['topics'] == '') & (open_questions_pandas['question'].str.strip() != '')]
        if empty_topics_rows.shape[0] == 0:
            break

        # Get the indices of the first 15 rows with empty "topics"
        indices_to_update = empty_topics_rows.index[:15]

        # Apply the UDF only to the selected rows
        open_questions_pandas.loc[indices_to_update, 'topics'] = (
            open_questions_pandas.loc[indices_to_update, 'question']
                .apply(extract_topics_from_question)
        )
    running_time = time.time() - start_time

In [0]:
from consts import QUESTIONS_PATH
import os

open_questions_with_topics = spark.createDataFrame(open_questions_pandas)
open_questions_pandas.to_csv(os.path.join(QUESTIONS_PATH, 'all_open_questions_with_topics.csv'), index=False)

0


In [0]:
open_questions_with_topics.display()

question_id,question,category,topics
112,What are your strengths?,General,"Self-awareness, communication, self-promotion"
113,What are your weaknesses?,General,"Self-awareness, honesty, self-reflection, communication"
114,Why are you interested in working for [insert company name here]?,General,"Research skills, Communication skills, Persuasion skills, Self-awareness"
115,Where do you see yourself in five years? Ten years?,General,"Career planning, foresight, self-awareness, long-term goals, ambition"
116,Why do you want to leave your current company?,General,"Communication skills, self-awareness, critical thinking, problem-solving"
117,Why was there a gap in your employment between [insert date] and [insert date]?,General,"Communication skills, Honesty, Problem-solving skills, Self-awareness"
118,What can you offer us that someone else can not?,General,"Self-promotion, differentiation, unique selling proposition, persuasive communication, concise communication"
119,What are three things your former manager would like you to improve on?,General,"Self-awareness, self-reflection, honesty, critical thinking"
120,Are you willing to relocate?,General,"Adaptability, Willingness to relocate, Communication skills"
121,Are you willing to travel? *(Post COVID-19),General,"Flexibility, Adaptability, Willingness to relocate"


# Demonstrating the model on 50 random jobs

In [0]:
import pandas as pd
import os
from consts import JOBS_PATH, DATA_PATH

# Load the CSV file
jobs_data = pd.read_csv(os.path.join(JOBS_PATH, "all_jobpostings_with_skills.csv"))

# Perform train-test split (e.g., 80-20 split)
jobs_sample = jobs_data.sample(n=50, random_state=42)

# Save the train and test sets to separate files
jobs_sample.to_csv(os.path.join(JOBS_PATH, "jobs_sample.csv"), index=False)

## Topics & skills embeddings

In [0]:
pip install sentence-transformers

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import explode, split, col, lit, udf, array, broadcast
from pyspark.sql.types import ArrayType, FloatType, DoubleType
from sentence_transformers import SentenceTransformer
from calculate_heuristic_score import calculate_score
from consts import DATA_PATH, QUESTIONS_PATH, MID_CALC_PATH, open_csv_file

jobs_sample = open_csv_file(spark, JOBS_PATH, "jobs_sample.csv")
code_questions = open_csv_file(spark, QUESTIONS_PATH, "all_code_questions_with_topics.csv")
open_questions = open_csv_file(spark, QUESTIONS_PATH, "all_open_questions_with_topics.csv")

code_questions_exploded = code_questions.withColumn("topic", explode(split("topics", ",")))
open_questions_exploded = open_questions.withColumn("topic", explode(split("topics", ",")))

# Load the model globally
model = SentenceTransformer('all-MiniLM-L6-v2')
# UDF to generate embeddings
@udf(ArrayType(FloatType()))
def generate_embedding(text):
    return model.encode(text).tolist()
unique_topics_code = code_questions_exploded.select("topic").distinct()
unique_topics_open = open_questions_exploded.select("topic").distinct()
unique_topics_code = unique_topics_code.withColumn("topic_embedding", generate_embedding(col("topic")))
unique_topics_open = unique_topics_open.withColumn("topic_embedding", generate_embedding(col("topic")))

code_questions_exploded_df = code_questions_exploded.toPandas()
open_questions_exploded_df = open_questions_exploded.toPandas()
unique_topics_code_df = unique_topics_code.toPandas()
unique_topics_open_df = unique_topics_open.toPandas()

os.makedirs(MID_CALC_PATH, exist_ok=True)
code_questions_exploded_df.to_csv(os.path.join(MID_CALC_PATH, "code_questions_exploded.csv"), index=False)
open_questions_exploded_df.to_csv(os.path.join(MID_CALC_PATH, "open_questions_exploded.csv"), index=False)
unique_topics_code_df.to_csv(os.path.join(MID_CALC_PATH, "unique_topics_code.csv"), index=False)
unique_topics_open_df.to_csv(os.path.join(MID_CALC_PATH, "unique_topics_open.csv"), index=False)

In [0]:
from pyspark.sql.functions import rand
from calculate_heuristic_score import calculate_score

jobs_sample = open_csv_file(spark, JOBS_PATH, "jobs_sample.csv")
code_scores, open_scores = calculate_score(jobs_sample, spark)

In [0]:
code_scores.select("topics", "skills", "similarity").limit(70).display()
open_scores.select("topics", "skills", "similarity").limit(70).display()

topics,skills,similarity
"Array, Two Pointers, Simulation","Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.1524822603212669
"Array, Matrix, Simulation","Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.1581306618715947
"String, Simulation","Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.1694836907554417
"String manipulation, substring identification, iterative processing, algorithm design","Licensed Occupational Therapist, Physical Therapy Assistant license (preferred), Occupational Therapy Assistant certification (preferred)",0.0391619024643053
"Array, Depth-First Search, Breadth-First Search, Union Find, Matrix","Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.1178168364650299
"Tree, Depth-First Search, Binary Tree","Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.1546903834628161
"Array, Hash Table, Tree, Depth-First Search, Breadth-First Search, Binary Tree","Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.1365442384855561
"Array, Dynamic Programming","Evaluation, Implementation, Execution, On-premise support, Field work",0.2504414746537804
"Array, Math, Greedy, Sorting","Evaluation, Implementation, Execution, On-premise support, Field work",0.2248905116692185
"Hash Table, Binary Search, Design, Sorting, Ordered Set","Statistical methods, Causal inference, Data analysis, Data visualization, Data interpretation, SQL, Python, R, Project management, Data storytelling, Communication, Stakeholder management, Teamwork, Mentorship, Product data analysis, Revenue data analysis",0.1905523349065333


topics,skills,similarity
"Conflict resolution, customer service, communication, empathy, problem-solving","Troubleshooting, Customer Support, Network Engineering, WAN Operations, TCP/IP, OSI Model, Linux/UNIX, VMware, Network Security, Windows OS, Communication (written & verbal - English & Japanese), Data Analysis, Problem-solving, KCS Methodology, Siebel (CRM), Public/Private Cloud Technologies, Scripting (e.g., Python, JavaScript), Containerization (e.g., Docker), Container Orchestration (e.g., Kubernetes), Automation (e.g., Ansible), Technical Documentation, Multitasking, Prioritization, Teamwork, ISO Quality Management Systems.",0.2223421096801758
"Understanding of convolutional neural networks, critical thinking, problem-solving","Statistical analysis, Data modeling, RFM analysis, Predictive modeling, Marketing mix modeling, Attribution modeling, A/B testing, Multivariate testing, Regression analysis, Cluster analysis, CHAID, Factor analysis, Principal component analysis, Time series analysis, Survival analysis, Experimental design, SQL, R, Python, Data visualization, Presentation skills, Communication skills, Project management, Collaboration, Data quality assurance (QA/QC), Database marketing",0.2268657122667019
"Problem-solving, critical thinking, understanding of machine learning concepts","C/C++, Python, Linux-based development, VLSI CAD algorithm development, data structures, algorithms, software engineering principles, strong verbal and written communication skills, strong teamwork skills, problem-solving skills, analysis skills, programming skills, debugging skills, troubleshooting skills, statistical analysis, machine learning, deep learning, routing algorithm knowledge, quality and software processes, Unix/Linux platform experience",0.2834614831954241
"Deep learning,Gradient descent optimization,Debugging,Problem-solving","Linux/UNIX administration, Docker, docker-compose, containerized development, Ansible, Packer, Terraform, OpenShift, Kubernetes, Bash, Python, Node.js, Application clustering, load balancing, VMware vSphere API, Gitlab CI/CD, networking fundamentals, command line tools, HTTP, SSL, LDAP, SQL, HTML, XML, PostgreSQL, Keepalived, CI/CD, Blue/Green Deployments, Consul, Atomic Host, distributed computing, data systems, immutable infrastructure, serverless computing",0.1466910544344607
"Mathematical reasoning,Computational complexity analysis,Algorithm understanding","Software development, Data structures, Algorithms, C, C++, Java, JavaScript, Python, C#, Go, Web application development, Mobile application development, Unix/Linux environments, Distributed systems, Parallel systems, Machine learning, Information retrieval, Natural language processing, Networking, Large software system development, Security software development, UI development, AJAX, Embedded systems, Mobile app development (Android/iOS), Developer tools, Automated test system development, Cloud-based computing, Problem-solving, Leadership, Communication (written and verbal English)",0.2089879236394359
"Understanding of deep learning concepts,Knowledge of gradient descent,Problem-solving","financial analysis, accounting, Microsoft Excel, data analysis, financial reporting, project management, account reconciliation, communication, organization, time management, flexibility, team collaboration, influencing, KPI tracking, cash flow management",0.253946195046107
"Deep learning,Gradient descent optimization,Debugging,Problem-solving","financial analysis, accounting, Microsoft Excel, data analysis, financial reporting, project management, account reconciliation, communication, organization, time management, flexibility, team collaboration, influencing, KPI tracking, cash flow management",0.253946195046107
"Problem-solving, critical thinking, understanding of machine learning concepts","Linux/UNIX administration, Docker, docker-compose, containerized development, Ansible, Packer, Terraform, OpenShift, Kubernetes, Bash, Python, Node.js, Application clustering, load balancing, VMware vSphere API, Gitlab CI/CD, networking fundamentals, command line tools, HTTP, SSL, LDAP, SQL, HTML, XML, PostgreSQL, Keepalived, CI/CD, Blue/Green Deployments, Consul, Atomic Host, distributed computing, data systems, immutable infrastructure, serverless computing",0.1466910544344607
"Understanding of neural networks,Understanding of activation functions,Problem-solving,Critical thinking","Linux/UNIX administration, Docker, docker-compose, containerized development, Ansible, Packer, Terraform, OpenShift, Kubernetes, Bash, Python, Node.js, Application clustering, load balancing, VMware vSphere API, Gitlab CI/CD, networking fundamentals, command line tools, HTTP, SSL, LDAP, SQL, HTML, XML, PostgreSQL, Keepalived, CI/CD, Blue/Green Deployments, Consul, Atomic Host, distributed computing, data systems, immutable infrastructure, serverless computing",0.1466910544344607
"Conflict resolution, customer service, communication, empathy, problem-solving","SQL, RDBMS, Non-RDBMS, MySQL, PostgreSQL, MongoDB, Python, Jupyter Notebook, Inferential Statistics, Probability, ETL, Data Pipeline, Automated Reporting, Data Analysis, Statistical Modeling, Machine Learning, Git, Hive, Spark, Presto, Diagnostic Analytics, Forecasting, Big Data",0.1990645072706367


In [0]:
from consts import MID_CALC_PATH

code_scores_df = code_scores.select("similarity").toPandas()
code_scores_df.to_csv(os.path.join(MID_CALC_PATH, "code_questions_similarity.csv"), index=False)
open_scores_df = open_scores.select("similarity").toPandas()
open_scores_df.to_csv(os.path.join(MID_CALC_PATH, "open_questions_similarity.csv"), index=False)

In [0]:
from pyspark.sql.functions import col, when, expr

""" Distributing the scores, so they'd be further apart """

# Define the transformation function
def apply_root_transform(df, col_name):
    return df.withColumn(
        col_name,
        when(col(col_name) >= 0, col(col_name) ** 0.5)  # Apply x^0.5 for positive values
        .otherwise(-(-col(col_name)) ** 0.5)           # Apply -(-x)^0.5 for negative values
    )

# Apply the transformation on the "similarity" column for both datasets
code_scores_after_transformation = apply_root_transform(code_scores, "similarity")
open_scores_after_transformation = apply_root_transform(open_scores, "similarity")

In [0]:
# Show transformed datasets (optional)
code_scores_after_transformation.select("topics", "skills", "similarity").limit(70).display()
open_scores_after_transformation.select("topics", "skills", "similarity").limit(70).display()

topics,skills,similarity
"Array, Two Pointers, Simulation","Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.390489769803598
"Array, Matrix, Simulation","Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.3976564621272923
"String, Simulation","Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.4116839695147745
"String manipulation, substring identification, iterative processing, algorithm design","Licensed Occupational Therapist, Physical Therapy Assistant license (preferred), Occupational Therapy Assistant certification (preferred)",0.1978936645380679
"Array, Depth-First Search, Breadth-First Search, Union Find, Matrix","Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.3432445723751942
"Tree, Depth-First Search, Binary Tree","Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.3933069837452879
"Array, Hash Table, Tree, Depth-First Search, Breadth-First Search, Binary Tree","Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.3695189284536803
"Array, Dynamic Programming","Evaluation, Implementation, Execution, On-premise support, Field work",0.5004412799258074
"Array, Math, Greedy, Sorting","Evaluation, Implementation, Execution, On-premise support, Field work",0.4742262241475249
"Hash Table, Binary Search, Design, Sorting, Ordered Set","Statistical methods, Causal inference, Data analysis, Data visualization, Data interpretation, SQL, Python, R, Project management, Data storytelling, Communication, Stakeholder management, Teamwork, Mentorship, Product data analysis, Revenue data analysis",0.4365230061595074


topics,skills,similarity
"Conflict resolution, customer service, communication, empathy, problem-solving","Troubleshooting, Customer Support, Network Engineering, WAN Operations, TCP/IP, OSI Model, Linux/UNIX, VMware, Network Security, Windows OS, Communication (written & verbal - English & Japanese), Data Analysis, Problem-solving, KCS Methodology, Siebel (CRM), Public/Private Cloud Technologies, Scripting (e.g., Python, JavaScript), Containerization (e.g., Docker), Container Orchestration (e.g., Kubernetes), Automation (e.g., Ansible), Technical Documentation, Multitasking, Prioritization, Teamwork, ISO Quality Management Systems.",0.4715316634969234
"Understanding of convolutional neural networks, critical thinking, problem-solving","Statistical analysis, Data modeling, RFM analysis, Predictive modeling, Marketing mix modeling, Attribution modeling, A/B testing, Multivariate testing, Regression analysis, Cluster analysis, CHAID, Factor analysis, Principal component analysis, Time series analysis, Survival analysis, Experimental design, SQL, R, Python, Data visualization, Presentation skills, Communication skills, Project management, Collaboration, Data quality assurance (QA/QC), Database marketing",0.4763042223901672
"Problem-solving, critical thinking, understanding of machine learning concepts","C/C++, Python, Linux-based development, VLSI CAD algorithm development, data structures, algorithms, software engineering principles, strong verbal and written communication skills, strong teamwork skills, problem-solving skills, analysis skills, programming skills, debugging skills, troubleshooting skills, statistical analysis, machine learning, deep learning, routing algorithm knowledge, quality and software processes, Unix/Linux platform experience",0.5324110096489592
"Deep learning,Gradient descent optimization,Debugging,Problem-solving","Linux/UNIX administration, Docker, docker-compose, containerized development, Ansible, Packer, Terraform, OpenShift, Kubernetes, Bash, Python, Node.js, Application clustering, load balancing, VMware vSphere API, Gitlab CI/CD, networking fundamentals, command line tools, HTTP, SSL, LDAP, SQL, HTML, XML, PostgreSQL, Keepalived, CI/CD, Blue/Green Deployments, Consul, Atomic Host, distributed computing, data systems, immutable infrastructure, serverless computing",0.3830026820199315
"Mathematical reasoning,Computational complexity analysis,Algorithm understanding","Software development, Data structures, Algorithms, C, C++, Java, JavaScript, Python, C#, Go, Web application development, Mobile application development, Unix/Linux environments, Distributed systems, Parallel systems, Machine learning, Information retrieval, Natural language processing, Networking, Large software system development, Security software development, UI development, AJAX, Embedded systems, Mobile app development (Android/iOS), Developer tools, Automated test system development, Cloud-based computing, Problem-solving, Leadership, Communication (written and verbal English)",0.4571519699612328
"Understanding of deep learning concepts,Knowledge of gradient descent,Problem-solving","financial analysis, accounting, Microsoft Excel, data analysis, financial reporting, project management, account reconciliation, communication, organization, time management, flexibility, team collaboration, influencing, KPI tracking, cash flow management",0.5039307442953912
"Deep learning,Gradient descent optimization,Debugging,Problem-solving","financial analysis, accounting, Microsoft Excel, data analysis, financial reporting, project management, account reconciliation, communication, organization, time management, flexibility, team collaboration, influencing, KPI tracking, cash flow management",0.5039307442953912
"Problem-solving, critical thinking, understanding of machine learning concepts","Linux/UNIX administration, Docker, docker-compose, containerized development, Ansible, Packer, Terraform, OpenShift, Kubernetes, Bash, Python, Node.js, Application clustering, load balancing, VMware vSphere API, Gitlab CI/CD, networking fundamentals, command line tools, HTTP, SSL, LDAP, SQL, HTML, XML, PostgreSQL, Keepalived, CI/CD, Blue/Green Deployments, Consul, Atomic Host, distributed computing, data systems, immutable infrastructure, serverless computing",0.3830026820199315
"Understanding of neural networks,Understanding of activation functions,Problem-solving,Critical thinking","Linux/UNIX administration, Docker, docker-compose, containerized development, Ansible, Packer, Terraform, OpenShift, Kubernetes, Bash, Python, Node.js, Application clustering, load balancing, VMware vSphere API, Gitlab CI/CD, networking fundamentals, command line tools, HTTP, SSL, LDAP, SQL, HTML, XML, PostgreSQL, Keepalived, CI/CD, Blue/Green Deployments, Consul, Atomic Host, distributed computing, data systems, immutable infrastructure, serverless computing",0.3830026820199315
"Conflict resolution, customer service, communication, empathy, problem-solving","SQL, RDBMS, Non-RDBMS, MySQL, PostgreSQL, MongoDB, Python, Jupyter Notebook, Inferential Statistics, Probability, ETL, Data Pipeline, Automated Reporting, Data Analysis, Statistical Modeling, Machine Learning, Git, Hive, Spark, Presto, Diagnostic Analytics, Forecasting, Big Data",0.4461664569088949


In [0]:
code_scores_after_transformation_df = code_scores_after_transformation.select("similarity").toPandas()
code_scores_after_transformation_df.to_csv(os.path.join(MID_CALC_PATH, "code_questions_transformed_similarity.csv"), index=False)
open_scores_after_transformation_df = open_scores_after_transformation.select("similarity").toPandas()
open_scores_after_transformation_df.to_csv(os.path.join(MID_CALC_PATH, "open_questions_transformed_similarity.csv"), index=False)

## Heuristic to match questions to jobs
Questions with the highest hueristic grades wil be the most likely to appear in the interview.

In [0]:
from pyspark.sql.functions import col, lit, udf, abs, coalesce, when
from pyspark.sql.types import FloatType
from scipy.spatial.distance import cosine

# Map difficulty levels to numeric values
difficulty_map = {"Easy": 0, "Medium": 1, "Hard": 2}

# Change difficulty column to numeric
code_with_heuristic = code_scores_after_transformation.withColumn(
    "difficulty",
    when(col("difficulty") == "Easy", difficulty_map["Easy"])
    .when(col("difficulty") == "Medium", difficulty_map["Medium"])
    .when(col("difficulty") == "Hard", difficulty_map["Hard"])
).cache()

# Match question's difficulty to job posting's level
code_with_heuristic = code_with_heuristic.withColumn(
    "difficulty_match",
    1 - abs(col("difficulty") - col("level")) / 2) \
    .withColumn("difficulty_match", coalesce(col("difficulty_match"), lit(0.5))) \
    .drop("difficulty").cache()
    
open_with_heuristic = open_scores_after_transformation.withColumn(
    "difficulty_match",
    lit(0.5)
).drop("difficulty").cache()

# Normalize Acceptance for code questions
max_acceptance = code_with_heuristic.agg({"acceptance": "max"}).collect()[0][0]
code_with_heuristic = code_with_heuristic.withColumn(
    "normalized_acceptance", col("acceptance") / max_acceptance
).drop("acceptance").cache()
open_with_heuristic = open_with_heuristic.withColumn("normalized_acceptance", lit(0.5)).cache()

# Calculate Heuristic Score
def calculate_score(difficulty, similarity, acceptance):
    return 0.3 * difficulty + 0.5 * similarity + 0.2 * acceptance
calculate_score_udf = udf(calculate_score, FloatType())

mean_difficulty_match = code_with_heuristic.agg({"difficulty_match": "mean"}).collect()[0][0]
mean_similarity = open_with_heuristic.agg({"similarity": "mean"}).collect()[0][0]
mean_acceptance = code_with_heuristic.agg({"normalized_acceptance": "mean"}).collect()[0][0]

code_with_heuristic = code_with_heuristic.withColumn("difficulty_match", coalesce(col("difficulty_match"), lit(mean_difficulty_match))) \
    .withColumn("similarity", coalesce(col("similarity"), lit(mean_similarity))) \
    .withColumn("normalized_acceptance", coalesce(col("normalized_acceptance"), lit(mean_acceptance))) \
    .withColumn("heuristic_score",
    calculate_score_udf(
        col("difficulty_match"),
        col("similarity"),
        col("normalized_acceptance"),
    ),
).drop("difficulty_match", "similarity", "normalized_acceptance").cache()
display(code_with_heuristic.head(70))

mean_similarity = open_with_heuristic.agg({"similarity": "mean"}).collect()[0][0]
open_with_heuristic = open_with_heuristic.withColumn("similarity", coalesce(col("similarity"), lit(mean_similarity))) \
    .withColumn("heuristic_score",
    calculate_score_udf(
        col("difficulty_match"),
        col("similarity"),
        col("normalized_acceptance"),
    ),
).drop("difficulty_match", "similarity", "normalized_acceptance").cache()
display(open_with_heuristic.head(70))

formatted_title,question_id,question,similar_questions,no_similar_questions,solution_URL,solution,topics,company_industry,field,level,company_name,job_summary,job_id,job_title,apply_link,post_link,skills,heuristic_score
find-the-array-concatenation-value,2562,"You are given a 0-indexed integer array nums. The concatenation of two numbers is the number formed by concatenating their numerals. For example, the concatenation of 15, 49 is 1549. The concatenation value of nums is initially equal to 0. Perform this operation until nums becomes empty: If there exists more than one number in nums, pick the first element and last element in nums respectively and add the value of their concatenation to the concatenation value of nums, then delete the first and last element from nums. If one element exists, add its value to the concatenation value of nums, then delete it. Return the concatenation value of the nums.  Example 1: Input: nums = [7,52,2,4] Output: 596 Explanation: Before performing any operation, nums is [7,52,2,4] and concatenation value is 0.  - In the first operation: We pick the first element, 7, and the last element, 4. Their concatenation is 74, and we add it to the concatenation value, so it becomes equal to 74. Then we delete them from nums, so nums becomes equal to [52,2].  - In the second operation: We pick the first element, 52, and the last element, 2. Their concatenation is 522, and we add it to the concatenation value, so it becomes equal to 596. Then we delete them from the nums, so nums becomes empty. Since the concatenation value is 596 so the answer is 596. Example 2: Input: nums = [5,14,13,8,12] Output: 673 Explanation: Before performing any operation, nums is [5,14,13,8,12] and concatenation value is 0.  - In the first operation: We pick the first element, 5, and the last element, 12. Their concatenation is 512, and we add it to the concatenation value, so it becomes equal to 512. Then we delete them from the nums, so nums becomes equal to [14,13,8].  - In the second operation: We pick the first element, 14, and the last element, 8. Their concatenation is 148, and we add it to the concatenation value, so it becomes equal to 660. Then we delete them from the nums, so nums becomes equal to [13].  - In the third operation: nums has only one element, so we pick 13 and add it to the concatenation value, so it becomes equal to 673. Then we delete it from nums, so nums become empty. Since the concatenation value is 673 so the answer is 673.  Constraints: 1 <= nums.length <= 1000 1 <= nums[i] <= 104",[''],0.0,https://leetcode.com/problems/find-the-array-concatenation-value/solution,,"Array, Two Pointers, Simulation","Computer Software, Internet, and Retail",Engineering,2.0,Mogul,"Location: Remote – Las Vegas At Mogul, we provide exclusive executive opportunities for top, diverse talent at the VP, SVP, EVP, President, C-Suite, or Board of Directors level across the Fortune 500 and more. Our client is a global leader in SaaS-based solutions for the Procurement industry. Our client is the nation's largest and most trusted community dedicated to sharing, rating, and reviewing deals and coupons. Think Reddit, but for deals and shopping. They are the leading social platform for shopping, where 12 million users interact to share the most up-to-date information on online shopping deals and coupons. Through the power of crowdsourcing, they have saved members more than $8.7 billion by providing a forum for communication and shopping tools such as free Android or iOS apps and browser extensions for Chrome and Edge. Summary Our client is looking for a Site Reliability Engineer (SRE) who can effectively fill a combined SysAdmin and DevOps role. The successful candidate will manage the company's Linux systems, which run predominantly open-source software and internally developed applications and tooling. The SRE team is responsible for production systems and the OS/applications running on them. This role will aid engineering and product teams by providing velocity and stability for development and product teams. As part of the SRE team, the successful candidate will be responsible for dealing with rare incidents and scheduled routine off-hours maintenance tasks. The role will be a significant contributor to the automated tooling that will help developers develop and the business do business. One of the first goals of the SRE will be to improve the company's monitoring platforms - moving the myriad of systems currently used into a ""single pane of glass,"" allowing the engineering and product teams to have a view more aligned to their needs. Secondarily this needs to be built in a way that will enable contribution from development teams since they are most familiar with their application. Essential Job Duties and Responsibilities The role combines aspects of cloud engineering, DevOps, and IT operations Ongoing development such as improving the automation of deploy workflows and tweaking server build cookbooks Contribute to systems that monitor servers and their logs Document and apply backup & restore procedures Keep systems up to date with software updates and patches to ensure the availability and quality of the company's online presence Serve as subject matter expert for infrastructure-related questions and issues that arise Integral in design and build of IaaS and ongoing improvements and maintenance to Kubernetes deployments Provisioning, maintaining, and improving our existing infrastructure Manage tools like Jenkins, Consul, Vault, and Git Work with Developers during the entire release process to improve release velocity Update, deploy and manage monitoring systems (Nagios, Cacti, Grafana, ELK) Manage, monitor, and improve HAProxy Load Balancers Participate in an on-call rotation Requirements 2+ years of experience in similar roles Well-versed in *nix Operating Systems (currently use CentOS and Ubuntu LTS) Good communication (and listening) skills Excellent reading comprehension and attention to detail Very comfortable with SSH, bash & sh, pipes, common UNIX tools Comfortable using software revision control (e.g. Git) Some experience administering Linux ""web"" servers at scale Working knowledge of DNS, HTTP, TLS, web security Experience with networking troubleshooting using tools such as tcpdump Preferred Experience: Scaling for a high traffic web/services environment Infrastructure as code (e.g. Chef/Puppet/Ansible/Terraform) Configuration management Continuous Integration and test automation frameworks MySQL in a high-performance distributed environment Containerization & virtualization Familiarity with interpreted languages Understanding of networking technologies, including design and troubleshooting Experience with on-premise cloud (IaaS) environments Testing & automation (CI/CD/Git hooks) Monitoring (e.g. ELK, Grafana) Email servers and their configuration (e.g. Postfix, SPF, DKIM, DMARC)",2675968976.0,Site Reliability Engineer at Client,,https://www.linkedin.com/jobs/view/site-reliability-engineer-at-client-at-mogul-2675968976,"Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.3444889485836029
queens-that-can-attack-the-king,1222,"On a 0-indexed 8 x 8 chessboard, there can be multiple black queens ad one white king. You are given a 2D integer array queens where queens[i] = [xQueeni, yQueeni] represents the position of the ith black queen on the chessboard. You are also given an integer array king of length 2 where king = [xKing, yKing] represents the position of the white king. Return the coordinates of the black queens that can directly attack the king. You may return the answer in any order.  Example 1: Input: queens = [[0,1],[1,0],[4,0],[0,4],[3,3],[2,4]], king = [0,0] Output: [[0,1],[1,0],[3,3]] Explanation: The diagram above shows the three queens that can directly attack the king and the three queens that cannot attack the king (i.e., marked with red dashes). Example 2: Input: queens = [[0,0],[1,1],[2,2],[3,4],[3,5],[4,4],[4,5]], king = [3,3] Output: [[2,2],[3,4],[4,4]] Explanation: The diagram above shows the three queens that can directly attack the king and the three queens that cannot attack the king (i.e., marked with red dashes).  Constraints: 1 <= queens.length < 64 queens[i].length == king.length == 2 0 <= xQueeni, yQueeni, xKing, yKing < 8 All the given positions are unique.",[''],0.0,https://leetcode.com/problems/queens-that-can-attack-the-king/solution,,"Array, Matrix, Simulation","Computer Software, Internet, and Retail",Engineering,2.0,Mogul,"Location: Remote – Las Vegas At Mogul, we provide exclusive executive opportunities for top, diverse talent at the VP, SVP, EVP, President, C-Suite, or Board of Directors level across the Fortune 500 and more. Our client is a global leader in SaaS-based solutions for the Procurement industry. Our client is the nation's largest and most trusted community dedicated to sharing, rating, and reviewing deals and coupons. Think Reddit, but for deals and shopping. They are the leading social platform for shopping, where 12 million users interact to share the most up-to-date information on online shopping deals and coupons. Through the power of crowdsourcing, they have saved members more than $8.7 billion by providing a forum for communication and shopping tools such as free Android or iOS apps and browser extensions for Chrome and Edge. Summary Our client is looking for a Site Reliability Engineer (SRE) who can effectively fill a combined SysAdmin and DevOps role. The successful candidate will manage the company's Linux systems, which run predominantly open-source software and internally developed applications and tooling. The SRE team is responsible for production systems and the OS/applications running on them. This role will aid engineering and product teams by providing velocity and stability for development and product teams. As part of the SRE team, the successful candidate will be responsible for dealing with rare incidents and scheduled routine off-hours maintenance tasks. The role will be a significant contributor to the automated tooling that will help developers develop and the business do business. One of the first goals of the SRE will be to improve the company's monitoring platforms - moving the myriad of systems currently used into a ""single pane of glass,"" allowing the engineering and product teams to have a view more aligned to their needs. Secondarily this needs to be built in a way that will enable contribution from development teams since they are most familiar with their application. Essential Job Duties and Responsibilities The role combines aspects of cloud engineering, DevOps, and IT operations Ongoing development such as improving the automation of deploy workflows and tweaking server build cookbooks Contribute to systems that monitor servers and their logs Document and apply backup & restore procedures Keep systems up to date with software updates and patches to ensure the availability and quality of the company's online presence Serve as subject matter expert for infrastructure-related questions and issues that arise Integral in design and build of IaaS and ongoing improvements and maintenance to Kubernetes deployments Provisioning, maintaining, and improving our existing infrastructure Manage tools like Jenkins, Consul, Vault, and Git Work with Developers during the entire release process to improve release velocity Update, deploy and manage monitoring systems (Nagios, Cacti, Grafana, ELK) Manage, monitor, and improve HAProxy Load Balancers Participate in an on-call rotation Requirements 2+ years of experience in similar roles Well-versed in *nix Operating Systems (currently use CentOS and Ubuntu LTS) Good communication (and listening) skills Excellent reading comprehension and attention to detail Very comfortable with SSH, bash & sh, pipes, common UNIX tools Comfortable using software revision control (e.g. Git) Some experience administering Linux ""web"" servers at scale Working knowledge of DNS, HTTP, TLS, web security Experience with networking troubleshooting using tools such as tcpdump Preferred Experience: Scaling for a high traffic web/services environment Infrastructure as code (e.g. Chef/Puppet/Ansible/Terraform) Configuration management Continuous Integration and test automation frameworks MySQL in a high-performance distributed environment Containerization & virtualization Familiarity with interpreted languages Understanding of networking technologies, including design and troubleshooting Experience with on-premise cloud (IaaS) environments Testing & automation (CI/CD/Git hooks) Monitoring (e.g. ELK, Grafana) Email servers and their configuration (e.g. Postfix, SPF, DKIM, DMARC)",2675968976.0,Site Reliability Engineer at Client,,https://www.linkedin.com/jobs/view/site-reliability-engineer-at-client-at-mogul-2675968976,"Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.5039038062095642
robot-return-to-origin,657,"There is a robot starting at the position (0, 0), the origin, on a 2D plane. Given a sequence of its moves, judge if this robot ends up at (0, 0) after it completes its moves. You are given a string moves that represents the move sequence of the robot where moves[i] represents its ith move. Valid moves are 'R' (right), 'L' (left), 'U' (up), and 'D' (down). Return true if the robot returns to the origin after it finishes all of its moves, or false otherwise. Note: The way that the robot is ""facing"" is irrelevant. 'R' will always make the robot move to the right once, 'L' will always make it move left, etc. Also, assume that the magnitude of the robot's movement is the same for each move.  Example 1: Input: moves = ""UD"" Output: true Explanation: The robot moves up once, and then down once. All moves have the same magnitude, so it ended up at the origin where it started. Therefore, we return true. Example 2: Input: moves = ""LL"" Output: false Explanation: The robot moves left twice. It ends up two ""moves"" to the left of the origin. We return false because it is not at the origin at the end of its moves.  Constraints: 1 <= moves.length <= 2 * 104 moves only contains the characters 'U', 'D', 'L' and 'R'.","[""'Number of Provinces'"", ""'Execution of All Suffix Instructions Staying in a Grid'"", ""'Furthest Point From Origin'""]",3.0,https://leetcode.com/problems/robot-return-to-origin/solution,,"String, Simulation","Computer Software, Internet, and Retail",Engineering,2.0,Mogul,"Location: Remote – Las Vegas At Mogul, we provide exclusive executive opportunities for top, diverse talent at the VP, SVP, EVP, President, C-Suite, or Board of Directors level across the Fortune 500 and more. Our client is a global leader in SaaS-based solutions for the Procurement industry. Our client is the nation's largest and most trusted community dedicated to sharing, rating, and reviewing deals and coupons. Think Reddit, but for deals and shopping. They are the leading social platform for shopping, where 12 million users interact to share the most up-to-date information on online shopping deals and coupons. Through the power of crowdsourcing, they have saved members more than $8.7 billion by providing a forum for communication and shopping tools such as free Android or iOS apps and browser extensions for Chrome and Edge. Summary Our client is looking for a Site Reliability Engineer (SRE) who can effectively fill a combined SysAdmin and DevOps role. The successful candidate will manage the company's Linux systems, which run predominantly open-source software and internally developed applications and tooling. The SRE team is responsible for production systems and the OS/applications running on them. This role will aid engineering and product teams by providing velocity and stability for development and product teams. As part of the SRE team, the successful candidate will be responsible for dealing with rare incidents and scheduled routine off-hours maintenance tasks. The role will be a significant contributor to the automated tooling that will help developers develop and the business do business. One of the first goals of the SRE will be to improve the company's monitoring platforms - moving the myriad of systems currently used into a ""single pane of glass,"" allowing the engineering and product teams to have a view more aligned to their needs. Secondarily this needs to be built in a way that will enable contribution from development teams since they are most familiar with their application. Essential Job Duties and Responsibilities The role combines aspects of cloud engineering, DevOps, and IT operations Ongoing development such as improving the automation of deploy workflows and tweaking server build cookbooks Contribute to systems that monitor servers and their logs Document and apply backup & restore procedures Keep systems up to date with software updates and patches to ensure the availability and quality of the company's online presence Serve as subject matter expert for infrastructure-related questions and issues that arise Integral in design and build of IaaS and ongoing improvements and maintenance to Kubernetes deployments Provisioning, maintaining, and improving our existing infrastructure Manage tools like Jenkins, Consul, Vault, and Git Work with Developers during the entire release process to improve release velocity Update, deploy and manage monitoring systems (Nagios, Cacti, Grafana, ELK) Manage, monitor, and improve HAProxy Load Balancers Participate in an on-call rotation Requirements 2+ years of experience in similar roles Well-versed in *nix Operating Systems (currently use CentOS and Ubuntu LTS) Good communication (and listening) skills Excellent reading comprehension and attention to detail Very comfortable with SSH, bash & sh, pipes, common UNIX tools Comfortable using software revision control (e.g. Git) Some experience administering Linux ""web"" servers at scale Working knowledge of DNS, HTTP, TLS, web security Experience with networking troubleshooting using tools such as tcpdump Preferred Experience: Scaling for a high traffic web/services environment Infrastructure as code (e.g. Chef/Puppet/Ansible/Terraform) Configuration management Continuous Integration and test automation frameworks MySQL in a high-performance distributed environment Containerization & virtualization Familiarity with interpreted languages Understanding of networking technologies, including design and troubleshooting Experience with on-premise cloud (IaaS) environments Testing & automation (CI/CD/Git hooks) Monitoring (e.g. ELK, Grafana) Email servers and their configuration (e.g. Postfix, SPF, DKIM, DMARC)",2675968976.0,Site Reliability Engineer at Client,,https://www.linkedin.com/jobs/view/site-reliability-engineer-at-client-at-mogul-2675968976,"Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.368908941745758
remove-all-occurrences-of-a-substring,2021,"Given two strings s and part , perform the following operation on s until all occurrences of the substring part are removed: Find the leftmost occurrence of the substring part and remove it from s . Return s after removing all occurrences of part . A substring is a contiguous sequence of characters in a string. Example 1: Input: s = ""daabcbaabcbc"", part = ""abc"" Output: ""dab"" Explanation : The following operations are done: - s = ""da abc baabcbc"", remove ""abc"" starting at index 2, so s = ""dabaabcbc"". - s = ""daba abc bc"", remove ""abc"" starting at index 4, so s = ""dababc"". - s = ""dab abc "", remove ""abc"" starting at index 3, so s = ""dab"". Now s has no occurrences of ""abc"". Example 2: Input: s = ""axxxxyyyyb"", part = ""xy"" Output: ""ab"" Explanation : The following operations are done: - s = ""axxx xy yyyb"", remove ""xy"" starting at index 4 so s = ""axxxyyyb"". - s = ""axx xy yyb"", remove ""xy"" starting at index 3 so s = ""axxyyb"". - s = ""ax xy yb"", remove ""xy"" starting at index 2 so s = ""axyb"". - s = ""a xy b"", remove ""xy"" starting at index 1 so s = ""ab"". Now s has no occurrences of ""xy"". Constraints: 1 <= s.length <= 1000 1 <= part.length <= 1000 s ​​​​​​ and part consists of lowercase English letters.",,,,"package com.fishercoder.solutions.firstthousand; import java.util.HashMap; import java.util.Map; public class _1 {  public static class Solution1 {  public int[] twoSum(int[] nums, int target) {  Map map = new HashMap();  for (int i = 0; i < nums.length; i++) {  if (map.containsKey(target - nums[i])) {  return new int[] {map.get(target - nums[i]), i};  } else {  map.put(nums[i], i);  }  }  return new int[] {-1, -1};  }  } }","String manipulation, substring identification, iterative processing, algorithm design",Hospital & Health Care,,,Renewal Rehab,"May hold an Assistant license (Physical Therapy Assistant or Certified Occupational Therapy Assistant); Must be currently licensed in Occupational Therapy,…",,Regional Director of Rehabilitation,,,"Licensed Occupational Therapist, Physical Therapy Assistant license (preferred), Occupational Therapy Assistant certification (preferred)",0.3664319217205047
making-a-large-island,827,"You are given an n x n binary matrix grid. You are allowed to change at most one 0 to be 1. Return the size of the largest island in grid after applying this operation. An island is a 4-directionally connected group of 1s.  Example 1: Input: grid = [[1,0],[0,1]] Output: 3 Explanation: Change one 0 to 1 and connect two 1s, then we get an island with area = 3. Example 2: Input: grid = [[1,1],[1,0]] Output: 4 Explanation: Change the 0 to 1 and make the island bigger, only one island with area = 4. Example 3: Input: grid = [[1,1],[1,1]] Output: 4 Explanation: Can't change any 0 to 1, only one island with area = 4.  Constraints: n == grid.length n == grid[i].length 1 <= n <= 500 grid[i][j] is either 0 or 1.",[''],0.0,https://leetcode.com/problems/making-a-large-island/solution,,"Array, Depth-First Search, Breadth-First Search, Union Find, Matrix",Defense and Space Manufacturing,Engineering,2.0,Leidos Australia,"Company Description At Leidos, we do work that really matters inspired by our mission to make the world safer, healthier, and more efficient through technology, engineering, and science. With 25 years of local experience, our over 2000 team members, work together to solve Australia’s toughest challenges in government, defence, intelligence and border protection. We’re robust and ambitious, and we empower our people to do their best work. You’ll feel inspired by what you can achieve and will be supported by an inclusive and flexible culture that genuinely cares for your wellbeing. Together, we can be the difference. Benefits We've got so much to offer at Leidos, here are a just a few of the Benefits we provide our team: 12 Extra Days Leave: Life Days are the Leidos way of recognising that we all need some extra time out to take care of life. By working slightly more than the minimum weekly hours (2 hours per week for full timers) you can accrue up to an extra 12 days of leave per year. Leidos Life Hub provides access to discount offers or cashback rewards with over 400 Australian and International retailers. Professional development and support to set you up for success and assist you in achieving your career aspirations. Our exciting programs at Leidos develop and support our Federal Government customers IT systems, including mission critical classified systems that cover a wide breadth of software engineering disciplines including enterprise systems, web applications, cyber security, geospatial systems and real-time signal processing. These are developed using the latest agile processes and industry best practices, leveraging leading commercial and open source frameworks and tools Job Description Design innovative software solutions to complex problems; Participate in/lead agile story mapping and estimation; Contribute to Continuous Integration processes; Integrate and deploy complex systems using SecDevOps practices; Undertake web focused User Interface development and User Experience evolution; Research, prototype, investigate and develop technologies and systems. [MS] have an understanding of and be able to apply principles of Model Based Systems Engineering (MBSE) If you have a desire to expand your career in a dynamic and collaborative workplace that encourages innovation, with a work scope that provides a real sense of purpose, then this role is for you. We are seeking talented agile software engineers who hold a current Australian Government Security clearance (NV1) Qualifications Experience with Agile, Scrum and associated methodologies and it is desirable that you have a Bachelor Degree (or higher) in Software Engineering, Computer Science or Electrical Engineering (with software engineering experience), however, we would also like to encourage candidates to apply that have a wide range of industry work experience from 2+ years. Experience Or Exposure To Some Of The Following Integration with external systems/applications (REST API, integration protocols, JSON/XML) Integration and customisation of open source technologies (NiFi, Kafka, Elastic (ELK) Stack, Solr) CI/CD tools and technologies and infrastructure as code (Ansible, Jenkins, git) Java and related open source frameworks Containerisation using Docker, Kubernetes JIRA and Confluence Linux and Windows We’ve been keeping Australia safer, healthier and more efficient for 25 years, here’s just three ways how We’re building the next generation secret end user environment for the Australian Department of Defence across a number of strategic locations around Australia. We deliver software development and operational support through to the integration of underwater autonomous vehicles and survey ships to meet mission objectives. We support key networks for the Australian Cyber Security Centre. Additional Information Applicants may also need to meet International Traffic in Arms Regulations (ITAR) requirements. In certain circumstances this can place limitations on persons who hold dual nationality, permanent residency or are former nationals of certain countries as per ITAR 126.1. At Leidos, we embrace diversity and are committed to creating a truly inclusive workplace. We welcome and encourage applications from Aboriginal and Torres Strait Islander peoples, culturally and linguistically diverse people, people with disabilities, veterans, neurodiverse people, and people of all genders, sexualities and age groups. Show more Show less",3853685723.0,Software Engineer/Developer,https://au.linkedin.com/jobs/view/software-engineer-developer-at-leidos-australia-3853685723,,"Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.5707583427429199
number-of-good-leaf-nodes-pairs,1530,"You are given the root of a binary tree and an integer distance. A pair of two different leaf nodes of a binary tree is said to be good if the length of the shortest path between them is less than or equal to distance. Return the number of good leaf node pairs in the tree.  Example 1: Input: root = [1,2,3,null,4], distance = 3 Output: 1 Explanation: The leaf nodes of the tree are 3 and 4 and the length of the shortest path between them is 3. This is the only good pair. Example 2: Input: root = [1,2,3,4,5,6,7], distance = 3 Output: 2 Explanation: The good pairs are [4,5] and [6,7] with shortest path = 2. The pair [4,6] is not good because the length of ther shortest path between them is 4. Example 3: Input: root = [7,1,4,6,null,5,3,null,null,null,null,null,2], distance = 3 Output: 1 Explanation: The only good pair is [2,5].  Constraints: The number of nodes in the tree is in the range [1, 210]. 1 <= Node.val <= 100 1 <= distance <= 10",[''],0.0,https://leetcode.com/problems/number-of-good-leaf-nodes-pairs/solution,,"Tree, Depth-First Search, Binary Tree",Defense and Space Manufacturing,Engineering,2.0,Leidos Australia,"Company Description At Leidos, we do work that really matters inspired by our mission to make the world safer, healthier, and more efficient through technology, engineering, and science. With 25 years of local experience, our over 2000 team members, work together to solve Australia’s toughest challenges in government, defence, intelligence and border protection. We’re robust and ambitious, and we empower our people to do their best work. You’ll feel inspired by what you can achieve and will be supported by an inclusive and flexible culture that genuinely cares for your wellbeing. Together, we can be the difference. Benefits We've got so much to offer at Leidos, here are a just a few of the Benefits we provide our team: 12 Extra Days Leave: Life Days are the Leidos way of recognising that we all need some extra time out to take care of life. By working slightly more than the minimum weekly hours (2 hours per week for full timers) you can accrue up to an extra 12 days of leave per year. Leidos Life Hub provides access to discount offers or cashback rewards with over 400 Australian and International retailers. Professional development and support to set you up for success and assist you in achieving your career aspirations. Our exciting programs at Leidos develop and support our Federal Government customers IT systems, including mission critical classified systems that cover a wide breadth of software engineering disciplines including enterprise systems, web applications, cyber security, geospatial systems and real-time signal processing. These are developed using the latest agile processes and industry best practices, leveraging leading commercial and open source frameworks and tools Job Description Design innovative software solutions to complex problems; Participate in/lead agile story mapping and estimation; Contribute to Continuous Integration processes; Integrate and deploy complex systems using SecDevOps practices; Undertake web focused User Interface development and User Experience evolution; Research, prototype, investigate and develop technologies and systems. [MS] have an understanding of and be able to apply principles of Model Based Systems Engineering (MBSE) If you have a desire to expand your career in a dynamic and collaborative workplace that encourages innovation, with a work scope that provides a real sense of purpose, then this role is for you. We are seeking talented agile software engineers who hold a current Australian Government Security clearance (NV1) Qualifications Experience with Agile, Scrum and associated methodologies and it is desirable that you have a Bachelor Degree (or higher) in Software Engineering, Computer Science or Electrical Engineering (with software engineering experience), however, we would also like to encourage candidates to apply that have a wide range of industry work experience from 2+ years. Experience Or Exposure To Some Of The Following Integration with external systems/applications (REST API, integration protocols, JSON/XML) Integration and customisation of open source technologies (NiFi, Kafka, Elastic (ELK) Stack, Solr) CI/CD tools and technologies and infrastructure as code (Ansible, Jenkins, git) Java and related open source frameworks Containerisation using Docker, Kubernetes JIRA and Confluence Linux and Windows We’ve been keeping Australia safer, healthier and more efficient for 25 years, here’s just three ways how We’re building the next generation secret end user environment for the Australian Department of Defence across a number of strategic locations around Australia. We deliver software development and operational support through to the integration of underwater autonomous vehicles and survey ships to meet mission objectives. We support key networks for the Australian Cyber Security Centre. Additional Information Applicants may also need to meet International Traffic in Arms Regulations (ITAR) requirements. In certain circumstances this can place limitations on persons who hold dual nationality, permanent residency or are former nationals of certain countries as per ITAR 126.1. At Leidos, we embrace diversity and are committed to creating a truly inclusive workplace. We welcome and encourage applications from Aboriginal and Torres Strait Islander peoples, culturally and linguistically diverse people, people with disabilities, veterans, neurodiverse people, and people of all genders, sexualities and age groups. Show more Show less",3853685723.0,Software Engineer/Developer,https://au.linkedin.com/jobs/view/software-engineer-developer-at-leidos-australia-3853685723,,"Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.4801308214664459
create-binary-tree-from-descriptions,2196,"You are given a 2D integer array descriptions where descriptions[i] = [parenti, childi, isLefti] indicates that parenti is the parent of childi in a binary tree of unique values. Furthermore, If isLefti == 1, then childi is the left child of parenti. If isLefti == 0, then childi is the right child of parenti. Construct the binary tree described by descriptions and return its root. The test cases will be generated such that the binary tree is valid.  Example 1: Input: descriptions = [[20,15,1],[20,17,0],[50,20,1],[50,80,0],[80,19,1]] Output: [50,20,80,15,17,19] Explanation: The root node is the node with value 50 since it has no parent. The resulting binary tree is shown in the diagram. Example 2: Input: descriptions = [[1,2,1],[2,3,0],[3,4,1]] Output: [1,2,null,null,3,4] Explanation: The root node is the node with value 1 since it has no parent. The resulting binary tree is shown in the diagram.  Constraints: 1 <= descriptions.length <= 104 descriptions[i].length == 3 1 <= parenti, childi <= 105 0 <= isLefti <= 1 The binary tree described by descriptions is valid.","[""'Convert Sorted List to Binary Search Tree'"", ""'Number Of Ways To Reconstruct A Tree'""]",2.0,https://leetcode.com/problems/create-binary-tree-from-descriptions/solution,,"Array, Hash Table, Tree, Depth-First Search, Breadth-First Search, Binary Tree",Defense and Space Manufacturing,Engineering,2.0,Leidos Australia,"Company Description At Leidos, we do work that really matters inspired by our mission to make the world safer, healthier, and more efficient through technology, engineering, and science. With 25 years of local experience, our over 2000 team members, work together to solve Australia’s toughest challenges in government, defence, intelligence and border protection. We’re robust and ambitious, and we empower our people to do their best work. You’ll feel inspired by what you can achieve and will be supported by an inclusive and flexible culture that genuinely cares for your wellbeing. Together, we can be the difference. Benefits We've got so much to offer at Leidos, here are a just a few of the Benefits we provide our team: 12 Extra Days Leave: Life Days are the Leidos way of recognising that we all need some extra time out to take care of life. By working slightly more than the minimum weekly hours (2 hours per week for full timers) you can accrue up to an extra 12 days of leave per year. Leidos Life Hub provides access to discount offers or cashback rewards with over 400 Australian and International retailers. Professional development and support to set you up for success and assist you in achieving your career aspirations. Our exciting programs at Leidos develop and support our Federal Government customers IT systems, including mission critical classified systems that cover a wide breadth of software engineering disciplines including enterprise systems, web applications, cyber security, geospatial systems and real-time signal processing. These are developed using the latest agile processes and industry best practices, leveraging leading commercial and open source frameworks and tools Job Description Design innovative software solutions to complex problems; Participate in/lead agile story mapping and estimation; Contribute to Continuous Integration processes; Integrate and deploy complex systems using SecDevOps practices; Undertake web focused User Interface development and User Experience evolution; Research, prototype, investigate and develop technologies and systems. [MS] have an understanding of and be able to apply principles of Model Based Systems Engineering (MBSE) If you have a desire to expand your career in a dynamic and collaborative workplace that encourages innovation, with a work scope that provides a real sense of purpose, then this role is for you. We are seeking talented agile software engineers who hold a current Australian Government Security clearance (NV1) Qualifications Experience with Agile, Scrum and associated methodologies and it is desirable that you have a Bachelor Degree (or higher) in Software Engineering, Computer Science or Electrical Engineering (with software engineering experience), however, we would also like to encourage candidates to apply that have a wide range of industry work experience from 2+ years. Experience Or Exposure To Some Of The Following Integration with external systems/applications (REST API, integration protocols, JSON/XML) Integration and customisation of open source technologies (NiFi, Kafka, Elastic (ELK) Stack, Solr) CI/CD tools and technologies and infrastructure as code (Ansible, Jenkins, git) Java and related open source frameworks Containerisation using Docker, Kubernetes JIRA and Confluence Linux and Windows We’ve been keeping Australia safer, healthier and more efficient for 25 years, here’s just three ways how We’re building the next generation secret end user environment for the Australian Department of Defence across a number of strategic locations around Australia. We deliver software development and operational support through to the integration of underwater autonomous vehicles and survey ships to meet mission objectives. We support key networks for the Australian Cyber Security Centre. Additional Information Applicants may also need to meet International Traffic in Arms Regulations (ITAR) requirements. In certain circumstances this can place limitations on persons who hold dual nationality, permanent residency or are former nationals of certain countries as per ITAR 126.1. At Leidos, we embrace diversity and are committed to creating a truly inclusive workplace. We welcome and encourage applications from Aboriginal and Torres Strait Islander peoples, culturally and linguistically diverse people, people with disabilities, veterans, neurodiverse people, and people of all genders, sexualities and age groups. Show more Show less",3853685723.0,Software Engineer/Developer,https://au.linkedin.com/jobs/view/software-engineer-developer-at-leidos-australia-3853685723,,"Agile methodologies, Scrum, Software Engineering, Computer Science, Electrical Engineering, REST API, JSON/XML, Open Source Technologies (NiFi, Kafka, Elastic Stack, Solr), CI/CD, Ansible, Jenkins, Git, Java, Docker, Kubernetes, JIRA, Confluence, Linux, Windows, Model Based Systems Engineering (MBSE), SecDevOps, User Interface Development, User Experience, Web Application Development, System Integration, Software Design, Problem Solving, Continuous Integration",0.4906989932060241
maximum-product-subarray,152,"Given an integer array nums, find a subarray that has the largest product, and return the product. The test cases are generated so that the answer will fit in a 32-bit integer.  Example 1: Input: nums = [2,3,-2,4] Output: 6 Explanation: [2,3] has the largest product 6. Example 2: Input: nums = [-2,0,-1] Output: 0 Explanation: The result cannot be 2, because [-2,-1] is not a subarray.  Constraints: 1 <= nums.length <= 2 * 104 -10 <= nums[i] <= 10 The product of any prefix or suffix of nums is guaranteed to fit in a 32-bit integer.","[""'Maximum Subarray'"", ""'House Robber'"", ""'Product of Array Except Self'"", ""'Maximum Product of Three Numbers'"", ""'Subarray Product Less Than K'""]",5.0,https://leetcode.com/problems/maximum-product-subarray/solution,"package com.fishercoder.solutions.firstthousand; import java.util.HashMap; import java.util.Map; public class _1 {  public static class Solution1 {  public int[] twoSum(int[] nums, int target) {  Map map = new HashMap();  for (int i = 0; i < nums.length; i++) {  if (map.containsKey(target - nums[i])) {  return new int[] {map.get(target - nums[i]), i};  } else {  map.put(nums[i], i);  }  }  return new int[] {-1, -1};  }  } }","Array, Dynamic Programming",Wine & Spirits,,,Heaven Hill Brands,"This individual will execute at the field level in on-premise accounts and will require the ability to evaluate, implement and execute such programs within our…",,Deep Eddy Vodka On Premise Channel Manager – Massachusetts,,,"Evaluation, Implementation, Execution, On-premise support, Field work",0.4753826260566711
append-k-integers-with-minimal-sum,2195,"You are given an integer array nums and an integer k. Append k unique positive integers that do not appear in nums to nums such that the resulting total sum is minimum. Return the sum of the k integers appended to nums.  Example 1: Input: nums = [1,4,25,10,25], k = 2 Output: 5 Explanation: The two unique positive integers that do not appear in nums which we append are 2 and 3. The resulting sum of nums is 1 + 4 + 25 + 10 + 25 + 2 + 3 = 70, which is the minimum. The sum of the two integers appended is 2 + 3 = 5, so we return 5. Example 2: Input: nums = [5,6], k = 6 Output: 25 Explanation: The six unique positive integers that do not appear in nums which we append are 1, 2, 3, 4, 7, and 8. The resulting sum of nums is 5 + 6 + 1 + 2 + 3 + 4 + 7 + 8 = 36, which is the minimum. The sum of the six integers appended is 1 + 2 + 3 + 4 + 7 + 8 = 25, so we return 25.  Constraints: 1 <= nums.length <= 105 1 <= nums[i] <= 109 1 <= k <= 108","[""'Remove K Digits'"", ""'Find All Numbers Disappeared in an Array'"", ""'Kth Missing Positive Number'"", ""'Maximum Number of Integers to Choose From a Range I'"", ""'Maximum Number of Integers to Choose From a Range II'""]",5.0,https://leetcode.com/problems/append-k-integers-with-minimal-sum/solution,,"Array, Math, Greedy, Sorting",Wine & Spirits,,,Heaven Hill Brands,"This individual will execute at the field level in on-premise accounts and will require the ability to evaluate, implement and execute such programs within our…",,Deep Eddy Vodka On Premise Channel Manager – Massachusetts,,,"Evaluation, Implementation, Execution, On-premise support, Field work",0.441324770450592
tweet-counts-per-frequency,1348,"A social media company is trying to monitor activity on their site by analyzing the number of tweets that occur in select periods of time. These periods can be partitioned into smaller time chunks based on a certain frequency (every minute, hour, or day). For example, the period [10, 10000] (in seconds) would be partitioned into the following time chunks with these frequencies: Every minute (60-second chunks): [10,69], [70,129], [130,189], ..., [9970,10000] Every hour (3600-second chunks): [10,3609], [3610,7209], [7210,10000] Every day (86400-second chunks): [10,10000] Notice that the last chunk may be shorter than the specified frequency's chunk size and will always end with the end time of the period (10000 in the above example). Design and implement an API to help the company with their analysis. Implement the TweetCounts class: TweetCounts() Initializes the TweetCounts object. void recordTweet(String tweetName, int time) Stores the tweetName at the recorded time (in seconds). List getTweetCountsPerFrequency(String freq, String tweetName, int startTime, int endTime) Returns a list of integers representing the number of tweets with tweetName in each time chunk for the given period of time [startTime, endTime] (in seconds) and frequency freq.  freq is one of ""minute"", ""hour"", or ""day"" representing a frequency of every minute, hour, or day respectively.  Example: Input [""TweetCounts"",""recordTweet"",""recordTweet"",""recordTweet"",""getTweetCountsPerFrequency"",""getTweetCountsPerFrequency"",""recordTweet"",""getTweetCountsPerFrequency""] [[],[""tweet3"",0],[""tweet3"",60],[""tweet3"",10],[""minute"",""tweet3"",0,59],[""minute"",""tweet3"",0,60],[""tweet3"",120],[""hour"",""tweet3"",0,210]] Output [null,null,null,null,[2],[2,1],null,[4]] Explanation TweetCounts tweetCounts = new TweetCounts(); tweetCounts.recordTweet(""tweet3"", 0); // New tweet ""tweet3"" at time 0 tweetCounts.recordTweet(""tweet3"", 60); // New tweet ""tweet3"" at time 60 tweetCounts.recordTweet(""tweet3"", 10); // New tweet ""tweet3"" at time 10 tweetCounts.getTweetCountsPerFrequency(""minute"", ""tweet3"", 0, 59); // return [2]; chunk [0,59] had 2 tweets tweetCounts.getTweetCountsPerFrequency(""minute"", ""tweet3"", 0, 60); // return [2,1]; chunk [0,59] had 2 tweets, chunk [60,60] had 1 tweet tweetCounts.recordTweet(""tweet3"", 120); // New tweet ""tweet3"" at time 120 tweetCounts.getTweetCountsPerFrequency(""hour"", ""tweet3"", 0, 210); // return [4]; chunk [0,210] had 4 tweets  Constraints: 0 <= time, startTime, endTime <= 109 0 <= endTime - startTime <= 104 There will be at most 104 calls in total to recordTweet and getTweetCountsPerFrequency.","[""'Design Video Sharing Platform'""]",1.0,https://leetcode.com/problems/tweet-counts-per-frequency/solution,"package com.fishercoder.solutions.firstthousand; import java.util.HashMap; import java.util.Map; public class _1 {  public static class Solution1 {  public int[] twoSum(int[] nums, int target) {  Map map = new HashMap();  for (int i = 0; i < nums.length; i++) {  if (map.containsKey(target - nums[i])) {  return new int[] {map.get(target - nums[i]), i};  } else {  map.put(nums[i], i);  }  }  return new int[] {-1, -1};  }  } }","Hash Table, Binary Search, Design, Sorting, Ordered Set","Information Technology and Services, Computer Software, and Internet",Engineering and Information Technology,1.0,Mozilla,"Now more than ever, the Internet is a utility that facilitates modern life. At Mozilla, we take this to heart, striving to build products that keep the Internet open, accessible, and secure for everyone. We collect terabytes of data every day from millions of users to guide our decision-making processes. We need your help to enable the future of Mozilla in a way that makes us proud! As a Data Scientist within the Data Org, you will work as part of a cross-functional team that is responsible for understanding and empowering the future of Mozilla. The Role The Data Science team sits at the intersection of finance, business, operations, product, engineering, and leadership. We collaborate closely with these partners to provide meaningful insights. As a Data Scientist At Mozilla You Will Apply a variety of statistical methods including causal inference to understand the intricate ecosystem of our users, products, partners, revenue Build key data sets to empower operational and exploratory analysis Define, evaluate and report on key business metrics Generate data-informed insights and communicate these data stories to colleagues to influence decision making across Mozilla Contribute to the team through mentorship, technical methods, improvements in how we work Your Professional Profile You lead with empathy. You value teamwork and teammates. You are invested in knowledge sharing and learning from others. You contribute positively and meaningfully to cultivate an inclusive and equitable team culture. You have a background in quantitative analysis knowing how to analyze, visualize, and interpret data and have 2+ years of experience applying these skills in an industry setting. You have experience in SQL or SQL-like environments, as well as scripting languages such as Python or R and desire to gain further technical experience on the job. You have owned complex projects from inception to completion and your work has influenced entire organizations. You’ve contributed to and executed against data science roadmaps in collaboration with others. You take stakeholder needs into account, bridging methodology and data into meaningful strategy from your analyses. You are a communicator who crafts impactful data narratives that inform company decisions around product, business, and finance. Experience synthesizing product and revenue data is a plus! About Mozilla Mozilla exists to build the Internet as a public resource accessible to all because we believe that open and free is better than closed and controlled. When you work at Mozilla, you give yourself a chance to make a difference in the lives of Web users everywhere. And you give us a chance to make a difference in your life every single day. Join us to work on the Web as the platform and help create more opportunity and innovation for everyone online. Commitment to diversity, equity, inclusion, and belonging Mozilla understands that valuing diverse creative practices and forms of knowledge are crucial to and enrich the company’s core mission. We encourage applications from everyone, including members of all equity-seeking communities, such as (but certainly not limited to) women, racialized and Indigenous persons, persons with disabilities, and persons of all sexual orientations and gender identities and expressions. We will ensure that qualified individuals with disabilities are provided reasonable accommodations to participate in the job application or interview process, to perform essential job functions, and to receive other benefits and privileges of employment, as appropriate. Please contact us at hiringaccomodation@mozilla.com to request accommodation. We are an equal opportunity employer. We do not discriminate on the basis of race (including hairstyle and texture), religion (including religious grooming and dress practices), gender, gender identity, gender expression, color, national origin, pregnancy, ancestry, domestic partner status, disability, sexual orientation, age, genetic predisposition, medical condition, marital status, citizenship status, military or veteran status, or any other basis covered by applicable laws. Mozilla will not tolerate discrimination or harassment based on any of these characteristics or any other unlawful behavior, conduct, or purpose. Group: D",2642485072.0,Inference Data Scientist,,https://www.linkedin.com/jobs/view/inference-data-scientist-at-mozilla-2642485072,"Statistical methods, Causal inference, Data analysis, Data visualization, Data interpretation, SQL, Python, R, Project management, Data storytelling, Communication, Stakeholder management, Teamwork, Mentorship, Product data analysis, Revenue data analysis",0.6135098934173584


question_id,question,category,topics,company_industry,field,level,company_name,job_summary,job_id,job_title,apply_link,post_link,skills,heuristic_score
134,How would you deal with an angry or irate customer?,General,"Conflict resolution, customer service, communication, empathy, problem-solving","Information Technology and Services, Computer Software, and Financial Services",Information Technology,1.0,F5,"Job DescriptionThe Challenges You Get To Accomplish Do you take pride in excellent and quality customer success? Then this could be an exciting opportunity for you. For many of our customers, our Support Centers are the first port of call when facing an IT crisis. In 2017, F5 Networks support centers fielded over 2,600 cases per week and over 130,000 cases annually. This hub of fast and reliable F5 specialist help us keep our customer satisfaction scores consistently above 9.0 (and higher!) out of 10. We are seeking a dynamic Sr Network Support Engineer (Sr NSE) who prides themselves as a credible and knowledgeable Enterprise Support professional. You would be supporting our Japanese customers and Partners! You will be providing remote technical assistance on F5 solutions to both internal and external customers and F5 partners. You'll be handling multiple active cases of diverse scope where analysis of customer network environments and customer specific data requires a review of identifiable factors, arrive at a conclusion which can either be a workaround, restore or resolve the customer’s problem with keeping customers satisfied. You'll exercise excellent judgment within an ISO certified quality management system set of defined procedures in order to select the best approach among several possible methods and techniques, to build a plan of action and take appropriate action. Your Day-to-Day Are you a self-starter and independent thinker? Our ideal member would be someone who is creative and independent thinkers as you will receive little day to day instructions on your work, and general instructions on new projects or assignments. Demonstrates good judgment to select the best methods and techniques to provide a diverse scope of technical support (Level 1 to Level 3) to resolve problems and hardware and software issues on F5 product and services, based on data analysis of a complex set of customer specific factors. Proactively and effectively communicate status, plan-of-action, and resolution of issues based on an ISO Quality Management System defined a set of procedures. Provides F5 customers and partners with a consistently high-quality support experience Participates in ongoing training with F5 products and related technologies Maintains high schedule adherence (work hours and on-phone time) Effectively handles case escalations to tier 3 (Engineering Services) while maintaining customer communication, with limited assistance/mentoring from senior support personnel or management Manages multiple routine cases and prioritizes based on customer and business needs Collaborate with other Senior Network personnel and build strong working relationships with peers, Escalation Engineers, and Managers, or other related F5 departments. Superb communication with our customers, by phone, email and/or Zoom, MS Team, and accept ownership of issues until a resolution along with providing high customer satisfaction. Work hours are scheduled shifts corresponding to forecasted customer activity. The Job Description is intended to be a general representation of the responsibilities and requirements of the job. However, the description may not be all-inclusive, and responsibilities and requirements are subject to change. What You Will Bring to the Team Must be able to read, write and speak English AND Japanese fluently, including technical concepts and terminology. Multiple language skills a plus. Use the KCS methodology for contributing to the growth and maintenance of F5 Support's knowledge base As a valued member, you should be proficient with a number of troubleshooting tools and equipment in the course of providing resolutions to cases. Your excellent customer service skills together with experience supporting corporate customers and service providers in production environments. Hands-on technical experience preferred with internetworking/data center operations including WAN operations (e.g. DNS, Open SSL, Cryptography, virtualization, etc), Network Protocols, TCP/IP, OSI Model, UNIX or Linux (e.g. Redhat) operating systems, VMware or equivalent hypervisors and network hardware preferred. The potential for exposure to Network Security (e.g. Web Application Security, AAA, VPN, DDoS, Malware, Application Firewalls, etc) Proficient with Windows OS We seek an independent self-starter as you would work under limited or little instructions on routine work. Evidence of building strong internal/external partnership within a team environment. Experience with the main Customer Relationship Management system. Siebel experience preferred. Analytical thinker with good attention to detail Should effectively relay technical information to customers of varying skill levels, including senior technical customer levels. Qualifications Bachelor’s degree (or equivalent related experience) and minimum 5+ years of experience in a professional technical support role or equivalent experience, working with relevant technologies Must be able to read, write, and speak English and Japanese at native a speaker’s level Would Be Excellent If You Had These Additional consideration for any experience with Public/Private Cloud technologies (Microsoft, AWS, VMware, Google, Rackspace, Oracle), Programming/Scripting (JavaScript, Python, BASH,PERL, Node.JS), Containers (OpenStack, Docker, Velcro), Container Orchestration (Kubernetes) and/or Automation (Ansible, Git). Physical demands and work environment Duties are performed in a normal office environment while sitting at a desk or computer table. Duties require the ability to use a computer, communicate over the telephone via headset, and read printed material. Working in an environment where work hours are scheduled shifts corresponding to forecasted customers activity. This role may be required to work outside of core business hours, including early morning, late evening, overnight, weekends, and/or holidays as needed. Occasional travel may be required (5-10% of work time) The Job Description is intended to be a general representation of the responsibilities and requirements of the job. However, the description may not be all-inclusive, and responsibilities and requirements are subject to change. Phishing Alert Please note that F5 only contacts candidates through F5 email address (ending with @f5.com) or auto email notification from Yello/Workday (ending with f5.com or @myworkday.com). Equal Employment Opportunity It is the policy of F5 to provide equal employment opportunities to all employees and employment applicants without regard to unlawful considerations of race, religion, color, national origin, sex, sexual orientation, gender identity or expression, age, sensory, physical, or mental disability,marital status, veteran or military status, genetic information, or any other classification protected by applicable local, state, or federal laws.This policy applies to all aspects of employment, including, but not limited to, hiring, job assignment, compensation, promotion, benefits, training, discipline, and termination. Reasonable accommodation is available for qualified individuals with disabilities, upon request.",2706811200.0,Network Support Engineer (Japanese),,https://www.linkedin.com/jobs/view/network-support-engineer-japanese-at-f5-2706811200,"Troubleshooting, Customer Support, Network Engineering, WAN Operations, TCP/IP, OSI Model, Linux/UNIX, VMware, Network Security, Windows OS, Communication (written & verbal - English & Japanese), Data Analysis, Problem-solving, KCS Methodology, Siebel (CRM), Public/Private Cloud Technologies, Scripting (e.g., Python, JavaScript), Containerization (e.g., Docker), Container Orchestration (e.g., Kubernetes), Automation (e.g., Ansible), Technical Documentation, Multitasking, Prioritization, Teamwork, ISO Quality Management Systems.",0.4857658445835113
57,What problem does Bi-LSTM solve instead of only LSTM,Data Science,"Understanding of LSTMs, Understanding of Bi-LSTMs, Comparative analysis, Problem-solving","Computer & Network Security, Computer Software, and Information Technology and Services",Engineering and Information Technology,0.0,Okta,"We are looking for an experienced Site Reliability Engineering to join Okta’s Technical Operations Team. At Okta our motto is ""Always On"", and nowhere do we embrace that more than in Technical Operations. We strive to build the most reliable and performant systems on the planet through the skillful use of automation. We've created an integrated system that securely connects any person via any device to the technologies they need to do their most significant work. This SRE Monitoring role is ideal for someone who has a love of monitoring and automation technology and enjoys seeing their team grow and succeed. The monitoring team at Okta is instrumental in providing visibility into Okta's large scale production environment and helping our customers high availability. The Ideal Candidate Has a track record of working in high-performing teams whilst still being hands-on. Has production experience with AWS cloud-based infrastructure. Has operated complex custom applications on UNIX/Linux and/or Enterprise Java platforms Is passionate about monitoring, observability and actionable alerts at scale. Is a champion for automation and leveraging agile software development methodologies Has in-depth knowledge of industry standard commercial or open source monitoring tools Job Duties And Responsibilities Become a team member of experienced engineers using agile development Delivery of monitoring components: Collaborate with TPM, architects, and executive management Design and code reviews Partner with Okta security teams. Partner with recruiting to hire staff Continuously refine monitoring processes, thresholds, and configuration Respond to issues and escalations and participate in a management on-call rotation Work closely with our monitoring tool vendors to drive improvement and economies Minimum REQUIRED Knowledge, Skills, And Abilities Experience with Amazon Web Services and knowledge of building and configuring AWS services Experience with managing Linux Systems in production. Proficient in at least one scripting language (Bash, Perl, Ruby, Python) Production use of one of the following tools Splunk, Wavefront, Zabbix, ELK, Prometheus or Grafana Prior experience in software development, DevOps role, or SRE role Okta’s Top 5 Core Leadership Competencies are part of the deeply ingrained principles that guide all of our company’s actions. They also align strongly to our cultural cornerstones, our Okta values: love our customers, empower our people, never stop innovating, act with integrity, and maintain transparency. It’s our expectation that our managers and leaders embody these core competencies: Builds Effective Teams: Building strong-identity teams that apply their diverse skills and perspectives to achieve common goals. Demonstrates Self-Awareness (EQ): Using a combination of feedback and reflection to gain productive insight into personal strengths and weaknesses. Develops Talent: Developing people to meet both their career goals and the organization’s goals. Drives Results: Consistently achieving results, even under tough circumstances. Strategic Mindset: Seeing ahead to future possibilities and translating them into breakthrough strategies. Okta is an Equal Opportunity Employer. Okta is rethinking the traditional work environment, providing our employees with the flexibility to be their most creative and successful versions of themselves, no matter where they are located. We enable a flexible approach to work, meaning for roles where it makes sense, you can work from the office, or from home, regardless of where you live. Okta invests in the best technologies and provides flexible benefits and collaborative work environments/experiences, empowering employees to work productively in a setting that best and uniquely suits their needs. Find your place at Okta https://www.okta.com/company/careers/. By submitting an application, you agree to the retention of your personal data for consideration for a future position at Okta. More details about Okta’s privacy practices can be found at: https://www.okta.com/privacy-policy.",2675353052.0,Site Reliability Engineer (SRE)_Monitoring,,https://www.linkedin.com/jobs/view/site-reliability-engineer-sre-monitoring-at-okta-2675353052,"AWS, Linux, Bash/Perl/Ruby/Python, Splunk/Wavefront/Zabbix/ELK/Prometheus/Grafana, Software Development, DevOps, SRE, Monitoring, Observability, Alerting, Automation, Agile, Team Leadership, Communication, Problem-solving",0.5080862045288086
130,Tell me how you handled a difficult situation.,General,"Problem-solving, communication, critical thinking, storytelling, self-awareness",Mechanical or Industrial Engineering,"Information Technology, Business Development, and Engineering",1.0,Schaeffler,"Description Schaeffler , a global automotive and industrial supplier, is seeking a Data Scientist in the Charlotte, North Carolina area.The goal of this position is to discover the information hidden in the vast amounts of data, help business to make smarter decisions, optimize operating processes and deliver even better products. The primary focus will be in applying data mining techniques, doing advanced statistical analysis, and building high quality machine learning or deep learning models and deploying them as productive solutions to get integrated with our existing IT digital solutions and products Key responsibilities include but not limited to: Lead discovery processes with business stakeholders to identify opportunities and business problems and framing them into an IT data science/ advanced data analytics project initiative. Identification and extraction of available and relevant data from internal and external data sources to perform data science solution development. Perform data cleansing using data processing and statistical software packages. Perform data quality assessments and statistical testing to verify data quality and data integrity Exploratory data analysis for extracting business insights from large volume of data using data science toolkits and statistical packages. Translate and visualize data into information and insights to discover trends and patterns for solving the business problem and improving the Key Performance Indicators (KPIs) Develop custom data models, standard statistical models, machine learning or deep learning algorithms for building diagnostic, predictive and prescriptive data science solution. Coordinate with different functional agile teams such as data engineering and software development to deploy the data science solutions to production and integrate it with existing IT digital solutions and products Qualified candidates will have: Master’s degree in Applied Mathematics, Statistics, Machine Learning, Electrical Engineering, Computer Science/Information Systems or related fields or MBA with quantitative focus. 2-3 years of Data Science Experience in industry environment. Industrial research and development and advanced data analytics experience. Experience in data mining for large volumes of data, extracting insights and building prediction and system optimization models. Comprehensive statistical knowledge (regression/classification models, design of experiments, statistical testing etc.) and experience applying it to real-world projects. Strong knowledge in the application of Machine Learning Experience with common data science toolkits, such as SQL, R and/or Python with high proficiency in the use of opensource statistical and machine learning packages (numpy, pandas, scikit-learn, stats tool etc.) Experience delivering data science projects from model development to production deployment Any of the following qualifications is a plus Deep Learning algorithms (TensorFlow or Equivalent framework), ideally demonstrated by relevant industrial experience. Experience in deploying predictive or prescriptive data science solutions from any embedded electronic sensor data streams (such as IoT sensors, PLC systems, condition monitoring systems, wearables etc.) Schaeffler is an Equal Opportunity Employer/Minorities/Females/Disabled/Veterans",2690043448.0,Data Scientist,,https://www.linkedin.com/jobs/view/data-scientist-at-schaeffler-2690043448,"Data Mining, Statistical Analysis, Machine Learning, Deep Learning, Data Cleansing, Data Quality Assessment, Exploratory Data Analysis, Data Visualization, Model Development, Predictive Modeling, Prescriptive Modeling, SQL, R, Python, NumPy, Pandas, Scikit-learn, TensorFlow, Agile Development, Data Engineering, Software Development, Stakeholder Management, Communication, Problem-Solving, Business Acumen",0.5067728757858276
47,difference between Vanishing gradient Vs Exploding gradient,Data Science,"Understanding of deep learning concepts,Knowledge of gradient descent,Problem-solving",Manufacturing,Engineering and Information Technology,1.0,Hilo EV Ltd,"Job Title: Software Engineer Location: Cambridge. Hybrid and flexible working is ingrained into our culture, this permanent role offers a mix of office and home working. We are seeking a highly skilled and motivated Software Engineer with proven industry experience to join our team, who would relish the opportunity to be instrumental in the development of urban mobility products and technology. Your passion for innovation will drive forward-thinking electrical solutions to realise a ‘best-in-class’ product. About Us: At Hilo, we’re creating class-defining micromobility products that set new standards in safety and urban integration. We think people, and our planet, deserve better and our mission is clear – to allow people to move around in a safer, cleaner way, and to achieve greater gender equality in micromobility ridership. To achieve this, we’re bringing together the best creative talents to realise our technological innovations in compelling micromobility products and services. We’re seeking a forward-thinking Software Engineer to join our dynamic team, to help deliver our range of leading micromobility products. About the Role: We are seeking a highly skilled and motivated Software Engineer to join our team. The ideal candidate will be a qualified engineer with a strong passion for sustainability and micromobility. They must possess expertise in PCB design, software development, and coding, along with a keen understanding of safety standards relevant to the industry. A successful candidate will play a crucial role in working with suppliers, selecting components, and collaborating with clients to develop tailored solutions. As an instrumental part of a fast-paced innovative development programme, you’ll be comfortable quickly leading the development of electrical architectures. You’ll be highly motivated to hit key milestones by working pro-actively both autonomously and within a diverse team, both from our Cambridgeshire R&D centre and remotely. Fundamentally, you’ll be a positive, creative colleague who is a problem solver with excellent communication skills. You’ll understand the value of strategic product innovation, and how to communicate that to the consumer through intelligent, intuitive electrical solutions. Key Responsibilities: Work closely with key stakeholders and clients to understand their needs and translate them into technical specifications Design and create comprehensive schematic diagrams of the electrical systems, including understanding and implementing design best practices Translate schematics into optimized PCB layouts, considering component placement and signal routing, using industry-standard PCB design software like Altium Designer, KiCad or Eagle PCB Write and debug code for embedded systems, including firmware and software applications Collaborate with suppliers to select components that meet quality standards and performance requirements Ensure compliance with industry safety standards and regulations throughout the development process Conduct thorough testing and validation of software and hardware systems to identify and resolve any issues Qualifications: Bachelor's degree in Electrical Engineering, Computer Science, or a related field, and at least 3 years industry experience Proven experience in PCB design and layout, preferably in the electric vehicle industry Proficiency in coding languages such as C/C++ or Python Familiarity with safety standards relevant to electric vehicle manufacturing Strong problem-solving skills and attention to detail. Excellent communication and interpersonal skills, with the ability to work effectively in a team environment. Passion for sustainability and a genuine interest in advancing micromobility solutions. About you: Creates strong and trusted relationships Takes ownership for driving performance Demonstrates ongoing commitment to self-development to drive high performance Prioritises effectively to deliver maximum value Builds alignment that delivers outstanding team work Respects others and values their diversity Collaborates within and across the company Knows and leverages their own strengths Understands the constraints and mechanisms to overcome in bringing about change What you will get: 40 Hours per week, flexible work days and location. Completive salary, based on experience 25 days holiday Pension contribution More information: We encourage candidates to submit their applications as early as possible and not wait until the published closing date. Hilo’s recruitment periods can and may vary. We reserve the right to remove this advert or close it to further applications at any point during the recruitment process. Please send your CV and covering letter to careers@hiloev.co Application deadline: 15/04/2024 At Hilo, we work towards the highest standards in everything we do, including how we support, value and develop our people. Our aim is to encourage and support employees to thrive and be the best they can be. We celebrate the difference people can bring into our organisation, and welcome and encourage applicants with diverse experiences and backgrounds, and offer flexible and tailored support, at home and in the office. Our goal is to drive, develop and operate our business in a way that results in a more inclusive culture. All employment is decided on the basis of qualifications, the innovation from diverse teams & perspectives and business need. We are committed to building a workforce so we can represent the communities we serve and have a working environment in which each individual feels valued, respected, fairly treated, and able to reach their full potential. Show more Show less",3855488382.0,Software Engineer,https://uk.linkedin.com/jobs/view/software-engineer-at-hilo-ev-ltd-3855488382,,"PCB design, Software development, Coding, C/C++, Python, Embedded systems, Firmware, Altium Designer, KiCad, Eagle PCB, Electrical Engineering, Problem-solving, Communication, Teamwork, Stakeholder management, Client collaboration, Supplier management, Safety standards (electric vehicles), Sustainability, Micromobility, Strategic product innovation",0.5078698992729187
130,Tell me how you handled a difficult situation.,General,"Problem-solving, communication, critical thinking, storytelling, self-awareness","Banking, Financial Services, and Information Technology and Services",Finance and Information Technology,2.0,Phyton Talent Advisors,"Principal Data Scientist (open to Remote) ﻿ Job Summary: As part of the Enterprise Data team, the Principal Data Scientist supports development of next generation data platforms by employing data analytics and various data science capabilities (i.e. machine learning). Incumbents will be part of an innovative and energetic team that develops capabilities which will influence our business model and help Raymond James build the next generation data platform. The Principal Data Scientist will have access to the vast amount of data stored in heterogeneous formats. Will handle complex problems independently and demonstrate analytical thinking. Is able to make judgements and recommendations based on the analysis and interpretation of data. Although the role is primarily based out of our corporate headquarters in St. Petersburg, FL, we are also open to considering candidates based out of other locations in the U.S. Essential Duties and Responsibilities: • Analyze and understand the business model, profile/analyze the data stored in various heterogeneous formats and help develop data insights. • Identify patterns in the data and prepare an inventory of items which could be developed/trained using data science capabilities like machine learning. • Utilize appropriate pre built models or help develop new models to predict and derive insights from heterogeneous data stores. • Ability to explore and implement advanced technological solutions such as machine learning. • Ability to identify unique data sets and information that may be useful for model development and analysis. • Under limited direction, seeks to understand what moves the needle for our financial advisors and our overall business model and explores data that could be helpful. • Performs other duties and responsibilities as assigned. Qualifications Knowledge, Skills, and Abilities: Knowledge of: • Extensive experience with various statistical and machine learning techniques and methods of applied mathematics, particularly around natural language processing, time series analysis, mathematical optimization, graph theory, and quantitative finance. • Working knowledge of Python and R. • Familiarity with symbolic AI and hybrid symbolic and machine learning approaches. • Familiarity with data engineering and MLOps. • Familiarity with visualization and analytics. • Familiarity with model risk, governance, and controls. • Familiarity with cloud, version control, DevOps basics. • Experience in Financial Services or Wealth Management industries strongly preferred. Skill in: • Rigorous development of statistical and machine learning models, including model validation, monitoring, explainability, and interpretability • Reconciling cutting edge machine learning ideas, including transformers, k-shot learning, and self-supervised learning, with the requirements and limitations of an established financial services firm • Bespoke statistical inference and machine learning algorithm research and development. • Interfacing with data engineering and DevOps technical partners. • Exceptional communication and presentation skills and the ability to explain technical concepts to a non-technical audience, with demonstrated skill in managing stakeholders and connecting business insights to appropriate algorithms and methods. Ability to: • Identify and understand issues, problems and opportunities; compare data from different sources to draw conclusions. • Clearly convey information and ideas through a variety of media to individuals or groups in a manner that engages the audience and helps them understand and retain the message. • Use effective approaches for choosing a course of action or developing appropriate solutions; recommend or take action that is consistent with available facts, constraints and probable consequences. • Demonstrate a satisfactory level of technical and professional skill or knowledge in position-related areas; remains current with developments and trends in areas of expertise. • Develop and use collaborative relationships to facilitate the accomplishment of work goals. • Make internal and external clients and their needs a primary focus of actions; develop and sustain productive client relationships. Education/Previous Experience: • Minimum of a Master’s degree in Statistics, Mathematics, Computer Science, MIS or related degree and seven (7) years of relevant experience or combination of education, training and experience. • PhD in Statistic, Mathematics, or Computer Science and ten (10) years of experience preferred. • OR ~ • An equivalent combination of education, experience and training. Licenses/Certifications: • None required.",2547636001.0,Principal Data Scientist (open to Remote),,https://www.linkedin.com/jobs/view/principal-data-scientist-open-to-remote-at-phyton-talent-advisors-2547636001,"Statistical modeling, Machine learning, Natural language processing, Time series analysis, Mathematical optimization, Graph theory, Quantitative finance, Python, R, Symbolic AI, Hybrid symbolic and machine learning, Data engineering, MLOps, Data visualization, Model risk management, Model governance, Cloud computing, Version control, DevOps, Financial services, Wealth management, Model validation, Model monitoring, Model explainability, Model interpretability, Transformer models, K-shot learning, Self-supervised learning, Algorithm research, Stakeholder management, Communication, Presentation skills, Technical communication, Data analysis, Problem-solving, Decision-making, Collaboration",0.5119950771331787
47,difference between Vanishing gradient Vs Exploding gradient,Data Science,"Understanding of deep learning concepts,Knowledge of gradient descent,Problem-solving","Computer Software, Internet, and Retail",Engineering,2.0,Mogul,"Location: Remote – Las Vegas At Mogul, we provide exclusive executive opportunities for top, diverse talent at the VP, SVP, EVP, President, C-Suite, or Board of Directors level across the Fortune 500 and more. Our client is a global leader in SaaS-based solutions for the Procurement industry. Our client is the nation's largest and most trusted community dedicated to sharing, rating, and reviewing deals and coupons. Think Reddit, but for deals and shopping. They are the leading social platform for shopping, where 12 million users interact to share the most up-to-date information on online shopping deals and coupons. Through the power of crowdsourcing, they have saved members more than $8.7 billion by providing a forum for communication and shopping tools such as free Android or iOS apps and browser extensions for Chrome and Edge. Summary Our client is looking for a Site Reliability Engineer (SRE) who can effectively fill a combined SysAdmin and DevOps role. The successful candidate will manage the company's Linux systems, which run predominantly open-source software and internally developed applications and tooling. The SRE team is responsible for production systems and the OS/applications running on them. This role will aid engineering and product teams by providing velocity and stability for development and product teams. As part of the SRE team, the successful candidate will be responsible for dealing with rare incidents and scheduled routine off-hours maintenance tasks. The role will be a significant contributor to the automated tooling that will help developers develop and the business do business. One of the first goals of the SRE will be to improve the company's monitoring platforms - moving the myriad of systems currently used into a ""single pane of glass,"" allowing the engineering and product teams to have a view more aligned to their needs. Secondarily this needs to be built in a way that will enable contribution from development teams since they are most familiar with their application. Essential Job Duties and Responsibilities The role combines aspects of cloud engineering, DevOps, and IT operations Ongoing development such as improving the automation of deploy workflows and tweaking server build cookbooks Contribute to systems that monitor servers and their logs Document and apply backup & restore procedures Keep systems up to date with software updates and patches to ensure the availability and quality of the company's online presence Serve as subject matter expert for infrastructure-related questions and issues that arise Integral in design and build of IaaS and ongoing improvements and maintenance to Kubernetes deployments Provisioning, maintaining, and improving our existing infrastructure Manage tools like Jenkins, Consul, Vault, and Git Work with Developers during the entire release process to improve release velocity Update, deploy and manage monitoring systems (Nagios, Cacti, Grafana, ELK) Manage, monitor, and improve HAProxy Load Balancers Participate in an on-call rotation Requirements 2+ years of experience in similar roles Well-versed in *nix Operating Systems (currently use CentOS and Ubuntu LTS) Good communication (and listening) skills Excellent reading comprehension and attention to detail Very comfortable with SSH, bash & sh, pipes, common UNIX tools Comfortable using software revision control (e.g. Git) Some experience administering Linux ""web"" servers at scale Working knowledge of DNS, HTTP, TLS, web security Experience with networking troubleshooting using tools such as tcpdump Preferred Experience: Scaling for a high traffic web/services environment Infrastructure as code (e.g. Chef/Puppet/Ansible/Terraform) Configuration management Continuous Integration and test automation frameworks MySQL in a high-performance distributed environment Containerization & virtualization Familiarity with interpreted languages Understanding of networking technologies, including design and troubleshooting Experience with on-premise cloud (IaaS) environments Testing & automation (CI/CD/Git hooks) Monitoring (e.g. ELK, Grafana) Email servers and their configuration (e.g. Postfix, SPF, DKIM, DMARC)",2675968976.0,Site Reliability Engineer at Client,,https://www.linkedin.com/jobs/view/site-reliability-engineer-at-client-at-mogul-2675968976,"Linux System Administration, DevOps, Cloud Engineering, Automation, Bash Scripting, SSH, Git, Jenkins, Consul, Vault, Networking, TCP/IP, DNS, HTTP, TLS, Web Security, Troubleshooting, Monitoring (Nagios, Cacti, Grafana, ELK), HAProxy, Kubernetes, Infrastructure as Code (IaC), Configuration Management, CI/CD, Containerization, Virtualization, MySQL, Backup and Restore, Documentation, Communication, Problem-solving, Software Updates, High Availability (HA), Security, CentOS, Ubuntu, Python/other interpreted languages (Preferred), Email Server Administration (Preferred)",0.4693641364574432
70,Why is Rectified Linear Unit a good activation function,Data Science,"Understanding of activation functions,Knowledge of neural networks,Comparative analysis,Problem-solving",-,,,Massachusetts Bay Transportation Authority,"Salary $95,810.00 - $110,000.00 Annually Location 10 Park Plaza, Boston Job Type Full-Time Department MBTA - Commercial Strategy Programs Job Number 22-19079…",,Deputy Director of Reduced Fare Programs,,,"The provided text only gives salary, location, type, department, and job number. It offers no information about required skills. Therefore, it's impossible to infer a comma-separated list of skills.",0.4419497847557068
108,Why large filter sizes in early layers can be a bad choice How to choose filter size,Data Science,"Understanding of convolutional neural networks, critical thinking, problem-solving",,,,Infinity Direct,"Title: Data Scientist Job Classification: Full-Time, Exempt Department: Consumer Insight and Analytics Summary: The Consumer Insight and Analytics group partners with both internal and external clients, and data providers, leveraging predictive analytics and advanced statistical techniques to drive strategic thought and effective decision making. The Data Scientist is responsible for applying and interpreting various statistical and data analysis techniques in support of a variety of analytic solutions. The key objective of this position is to assist internal and external stakeholders in the development of target audience for client or prospect acquisition strategies through data modeling, profiling, and RFM analysis. Proactively promote knowledge and awareness of current and future best practices in the direct marketing arena. Essential Duties & Responsibilities: Provide quality, statistically valid analysis and related output Employ and interpret appropriate data analysis techniques and statistical methodologies Executing A/B and multi-variate testing, predictive modeling, marketing mix modeling and attribution modeling Strategically collaborate with Business Development in assessing prospect marketing strategies and opportunities, especially related to analytics and segmenting Take a proactive role in analysis design and execution Appropriately account for the timeliness and quality of all assignments Collaborate with cross-functional internal resources QA/QC data and analysis output to ensure accuracy Manage multiple projects concurrently Monitor project progress relative to timeline and scope Present statistical results to both technical and non-technical audience in a clear manner Minimum Qualifications: Familiarity and basic knowledge of open source statistical programs like R & Python Demonstrated proficiency in SQL programming Bachelor's degree in a quantitative discipline (e.g., statistics, economics, mathematics, marketing analytics) or significant relevant coursework Minimum of 2 years of related experience Proficiency with various statistical methodologies such as regression analysis (both linear and non-linear), cluster analysis, CHAID, factor analysis/principal component analysis, time series, survival models, experimental designs using statistical software Proficiency in MS Office; including PowerPoint, Word, Excel, Access and Outlook Desirable Qualifications: Advanced degree (Master's/PhD) in Statistics, Economics or other quantitative discipline Database marketing experience/knowledge",2670484779.0,Data Scientist,,https://www.linkedin.com/jobs/view/data-scientist-at-infinity-direct-2670484779,"Statistical analysis, Data modeling, RFM analysis, Predictive modeling, Marketing mix modeling, Attribution modeling, A/B testing, Multivariate testing, Regression analysis, Cluster analysis, CHAID, Factor analysis, Principal component analysis, Time series analysis, Survival analysis, Experimental design, SQL, R, Python, Data visualization, Presentation skills, Communication skills, Project management, Collaboration, Data quality assurance (QA/QC), Database marketing",0.4881521165370941
39,How to Select a Batch Size Will selecting a batch size produce better or worse results?,Data Science,"Problem-solving, critical thinking, understanding of machine learning concepts","Information Technology and Services, Computer Software, and Semiconductors",Engineering and Information Technology,1.0,Cadence Design Systems,"At Cadence, we hire and develop leaders and innovators who want to make an impact on the world of technology. At Cadence, you’ll be part of a team that’s passionate about innovation and excited to work together to make a difference. We’re looking for dynamic team players who are ready to push the limits of what’s possible. You will immediately work on real projects alongside industry experts. We empower you as a future leader to collaborate with our global teams, develop ideas, and then run with them. Key Qualifications Strong verbal and written communication skills Strong teamwork skills with the ability to collaborate with multiple geographies and cross-functionally. Experience varies depending on area of interest but may include the following: C/C++, Python, Linux-based development, VLSI CAD algorithm development, designing data structures, algorithms, and software engineering principles Description Your time at Cadence will give you everything you need to launch your career. Below are the various areas within the Digital Signoff Group you can explore: Characterization: You will be a member of an expert R&D team creating technologies and products that enable static and dynamic transistor level analysis and characterization of the most advanced custom digital and mixed-signal circuits built for communication, IOT and AI markets. Experience with quality and software processes, developing and maintaining C++ based applications on a Unix or Linux environment. Proficiency designing data structures, algorithms, and software engineering principles. DDI - Routing : Your focus area will be developing the next generation routing engine while working in a team environment to design, improve, and maintain core routing technology as part of Innovus Digital Implementation system. Routing algorithm knowledge is preferred to implement a robust and efficient routing engine. DDI – Synthesis : You will be working with a mentor to develop and improve global placement and detailed placement QoR in Innovus Digital Implementation System. Strong problem solving, analysis, and programming skills are required. Product Engineering: You will be part of a dynamic environment working with innovative R&D and Customer Engagement teams to influence the development of software tools for advanced chip design platforms. Designing, developing, troubleshooting and debugging software programs on Unix/Linux platforms. Supporting customers in performing successful tapeouts of their System-on-chip designs. Machine Learning: You will be part of a high energy team exploring and implementing Machine Learning and Deep Learning techniques to Electronic Design Automation [EDA] tools. You would be involved with researching and developing Machine Learning approaches to problems in the EDA and system design, as well as designing, implementing, verifying and maintaining software to address those markets. Tempus Timing Signoff: You will be responsible for designing, developing, troubleshooting and debugging software programs in the areas of static timing analysis with a focus on statistical analysis and incremental timing in presence of signal integrity effects. Experience with deep algorithmic knowledge and intent to build highly scalable solutions in C/C++, combined with passion to innovate. Verification: You will be focused on developing algorithms in the physical design space. You will be exposed to the development of software for Physical Verification of Integrated Circuits in a cross-geographical, multi-cultural environment and given the opportunity to learn and develop solutions for Physical Verification, which includes testing, profiling and analyzing the performance of geometric and topological functions, as well as development of prototype geometric and topological functions for improved efficiency. We’re doing work that matters. Help us solve what others can’t.",2706162033.0,"Software Engineer, New College Graduate",,https://www.linkedin.com/jobs/view/software-engineer-new-college-graduate-at-cadence-design-systems-2706162033,"C/C++, Python, Linux-based development, VLSI CAD algorithm development, data structures, algorithms, software engineering principles, strong verbal and written communication skills, strong teamwork skills, problem-solving skills, analysis skills, programming skills, debugging skills, troubleshooting skills, statistical analysis, machine learning, deep learning, routing algorithm knowledge, quality and software processes, Unix/Linux platform experience",0.5162054896354675
47,difference between Vanishing gradient Vs Exploding gradient,Data Science,"Understanding of deep learning concepts,Knowledge of gradient descent,Problem-solving","Information Technology and Services, Computer Software, and Internet",Engineering and Information Technology,1.0,Mozilla,"Now more than ever, the Internet is a utility that facilitates modern life. At Mozilla, we take this to heart, striving to build products that keep the Internet open, accessible, and secure for everyone. We collect terabytes of data every day from millions of users to guide our decision-making processes. We need your help to enable the future of Mozilla in a way that makes us proud! As a Data Scientist within the Data Org, you will work as part of a cross-functional team that is responsible for understanding and empowering the future of Mozilla. The Role The Data Science team sits at the intersection of finance, business, operations, product, engineering, and leadership. We collaborate closely with these partners to provide meaningful insights. As a Data Scientist At Mozilla You Will Apply a variety of statistical methods including causal inference to understand the intricate ecosystem of our users, products, partners, revenue Build key data sets to empower operational and exploratory analysis Define, evaluate and report on key business metrics Generate data-informed insights and communicate these data stories to colleagues to influence decision making across Mozilla Contribute to the team through mentorship, technical methods, improvements in how we work Your Professional Profile You lead with empathy. You value teamwork and teammates. You are invested in knowledge sharing and learning from others. You contribute positively and meaningfully to cultivate an inclusive and equitable team culture. You have a background in quantitative analysis knowing how to analyze, visualize, and interpret data and have 2+ years of experience applying these skills in an industry setting. You have experience in SQL or SQL-like environments, as well as scripting languages such as Python or R and desire to gain further technical experience on the job. You have owned complex projects from inception to completion and your work has influenced entire organizations. You’ve contributed to and executed against data science roadmaps in collaboration with others. You take stakeholder needs into account, bridging methodology and data into meaningful strategy from your analyses. You are a communicator who crafts impactful data narratives that inform company decisions around product, business, and finance. Experience synthesizing product and revenue data is a plus! About Mozilla Mozilla exists to build the Internet as a public resource accessible to all because we believe that open and free is better than closed and controlled. When you work at Mozilla, you give yourself a chance to make a difference in the lives of Web users everywhere. And you give us a chance to make a difference in your life every single day. Join us to work on the Web as the platform and help create more opportunity and innovation for everyone online. Commitment to diversity, equity, inclusion, and belonging Mozilla understands that valuing diverse creative practices and forms of knowledge are crucial to and enrich the company’s core mission. We encourage applications from everyone, including members of all equity-seeking communities, such as (but certainly not limited to) women, racialized and Indigenous persons, persons with disabilities, and persons of all sexual orientations and gender identities and expressions. We will ensure that qualified individuals with disabilities are provided reasonable accommodations to participate in the job application or interview process, to perform essential job functions, and to receive other benefits and privileges of employment, as appropriate. Please contact us at hiringaccomodation@mozilla.com to request accommodation. We are an equal opportunity employer. We do not discriminate on the basis of race (including hairstyle and texture), religion (including religious grooming and dress practices), gender, gender identity, gender expression, color, national origin, pregnancy, ancestry, domestic partner status, disability, sexual orientation, age, genetic predisposition, medical condition, marital status, citizenship status, military or veteran status, or any other basis covered by applicable laws. Mozilla will not tolerate discrimination or harassment based on any of these characteristics or any other unlawful behavior, conduct, or purpose. Group: D",2642485072.0,Inference Data Scientist,,https://www.linkedin.com/jobs/view/inference-data-scientist-at-mozilla-2642485072,"Statistical methods, Causal inference, Data analysis, Data visualization, Data interpretation, SQL, Python, R, Project management, Data storytelling, Communication, Stakeholder management, Teamwork, Mentorship, Product data analysis, Revenue data analysis",0.5103309750556946


In [0]:
# Select Top Questions for Each Job
from pyspark.sql import Window
from pyspark.sql.functions import row_number
import pandas as pd

job_cols = pd.read_csv(os.path.join(JOBS_PATH, "jobs_sample.csv"), header=0).columns
window_spec = Window.partitionBy(*job_cols).orderBy(col("heuristic_score").desc())

top_code_questions = code_with_heuristic.withColumn(
    "rank", row_number().over(window_spec)
).filter(col("rank") <= 20)

top_open_questions = open_with_heuristic.withColumn(
    "rank", row_number().over(window_spec)
).filter(col("rank") <= 20)

In [0]:
from consts import DATA_PATH
top_code_questions_df = top_code_questions.toPandas()
top_code_questions_df.to_csv(os.path.join(DATA_PATH, "top_code_questions.csv"), index=False)
top_open_questions_df = top_open_questions.toPandas()
top_open_questions_df.to_csv(os.path.join(DATA_PATH, "top_open_questions.csv"), index=False)