In [0]:
from pyspark.sql.types import *
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, regexp_extract, explode, lower, length, trim, when, regexp_replace, collect_list, struct, concat_ws
from pyspark.sql import functions as F

import sparknlp
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.linalg import Vectors
from pyspark.ml.functions import array_to_vector

import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt

In [0]:
spark = sparknlp.start()
clusters_pd = pd.read_csv('data/clustering_4options.csv')
requirements_clusters = spark.createDataFrame(clusters_pd)

cluster_titles_pd = pd.read_csv('data/Grouped_Clusters.csv')
final_clusters = spark.createDataFrame(cluster_titles_pd)



In [0]:
# Convert string columns to arrays

def to_float_array(embedding_str):
    return [float(x) for x in embedding_str.strip("[]").split(",")]
def to_int_array(embedding_str):
    return [int(x) for x in embedding_str.strip("[]").split(",")]
def to_string_array(embedding_str):
    return [x for x in embedding_str.strip("[]").split(",")]
    
to_float_array_udf = udf(to_float_array, ArrayType(FloatType()))
requirements_clusters = requirements_clusters.withColumn("Embeddings", to_float_array_udf(col("Embeddings_Model1"))).drop("Embeddings_Model1", "Embeddings_Model2")

to_int_array_udf = udf(to_int_array, ArrayType(IntegerType()))
to_string_array_udf = udf(to_string_array, ArrayType(StringType()))
final_clusters = final_clusters.withColumn("Clusters", to_int_array_udf(col("Clusters"))).withColumn("Job_Titles", to_string_array_udf(col("Job_Titles")))


# Map cluster numbers to cluster titles
cluster_mapping = final_clusters.select(explode("Clusters").alias("Cluster"), "Cluster_Title")
requirements_clusters_new = requirements_clusters.join(
    cluster_mapping,
    requirements_clusters["Cluster_DBSCAN_Model1"] == cluster_mapping["Cluster"],
    how="left"
).drop("Cluster")

requirements_clusters_new.display()

Title,Company,Full Job Description,Search Word,Description,Description_English,Requirements_Text,Cluster_spark_KMeans_1,Cluster_spark_KMeans_2,Cluster_DBSCAN_Model1,Cluster_DBSCAN_Model2,Embeddings,Cluster_Title
Data Analytics,Apple,"Summary Posted: Nov 7, 2024 Role Number: 200577967 We're looking for an exceptional data analyst with a strong background in distributed data processing and a demonstrated ability to turn data into actionable insights. As a data analyst for our Storage Data Analytics team at Apple, you'll play a crucial role in developing the fastest and most efficient SSDs for our products, including the iPhone, iPad, Apple Watch, and new Apple Silicon Macs. If you're passionate about continuously improving the ways we use data to make Apple's products amazing, we want to hear from you! Description On the Storage Data Analytics team, we are responsible for performing ad hoc data analysis, development and maintenance of distributed data pipelines, as well as creation and support of analysis tools. Our team focuses on Apple’s storage solutions. It is small and nimble, able to quickly explore ideas and present them to the storage teams and to leadership. Minimum Qualifications +3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Opened URL: https://il.indeed.com/rc/clk?jk=0433bc7e45e694a3&bb=r6UIYw2mow7Pn8IzQwq2_7gXyX4L0a9oUPOLDIseoxAN8l0MRFFfCqpU6vSJRCmVlzxZCXcY6V-lzbB66NkEzDukGjQJgUJrBPff0HR1apFcJos6F0MbonwvLMjHhNz9&xkcb=SoDD67M33wH7H8WWaB0JbzkdCdPP&fccid=8e007f7a76f9cee5&vjs=3",data,"Summary Posted: Nov 7, 2024 Role Number: 200577967 We're looking for an exceptional data analyst with a strong background in distributed data processing and a demonstrated ability to turn data into actionable insights. As a data analyst for our Storage Data Analytics team at Apple, you'll play a crucial role in developing the fastest and most efficient SSDs for our products, including the iPhone, iPad, Apple Watch, and new Apple Silicon Macs. If you're passionate about continuously improving the ways we use data to make Apple's products amazing, we want to hear from you! Description On the Storage Data Analytics team, we are responsible for performing ad hoc data analysis, development and maintenance of distributed data pipelines, as well as creation and support of analysis tools. Our team focuses on Apple’s storage solutions. It is small and nimble, able to quickly explore ideas and present them to the storage teams and to leadership. Minimum Qualifications +3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)","Summary Posted: Nov 7, 2024 Role Number: 200577967 We're looking for an exceptional data analyst with a strong background in distributed data processing and a demonstrated ability to turn data into actionable insights. As a data analyst for our Storage Data Analytics team at Apple, you'll play a crucial role in developing the fastest and most efficient SSDs for our products, including the iPhone, iPad, Apple Watch, and new Apple Silicon Macs. If you're passionate about continuously improving the ways we use data to make Apple's products amazing, we want to hear from you! Description On the Storage Data Analytics team, we are responsible for performing ad hoc data analysis, development and maintenance of distributed data pipelines, as well as creation and support of analysis tools. Our team focuses on Apple’s storage solutions. It is small and nimble, able to quickly explore ideas and present them to the storage teams and to leadership. Minimum Qualifications +3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)","+3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)",17,23,0,0,"List(-0.008841365, -0.03922254, -0.035059825, 0.042735167, -0.07066962, -0.11702762, -0.027481457, 0.014314407, -0.104267225, 0.027926553, -0.13308541, -0.056480568, 0.08141239, -0.026536204, 0.009246755, 0.043634046, -0.012438819, -0.06463415, 0.019930793, -0.13849823, -0.06763474, -0.043868758, 0.025196284, -0.0983635, 0.046145473, -0.016239071, 0.0032367348, 0.0067900005, -0.007220647, 0.030812275, -0.0065880693, 0.040012065, -0.012177706, 0.084536985, 0.077133514, 0.05039852, 0.047842044, 0.018171517, -0.0024092682, 0.013607141, 0.04229354, -0.041193184, -0.03633726, -0.07317762, -0.03508492, -0.019576976, -0.009679355, -0.037679937, -0.009759741, 0.020858645, -0.07545148, 0.026400855, -0.010346487, 0.021871338, 0.02280509, 0.039563525, 0.048123475, 0.023863154, -0.014378908, -0.07377611, -0.047488105, -0.01346882, -0.0030843772, -0.0071597695, -0.025082337, -0.0034260238, -0.028239515, 0.0738588, 0.06699543, -0.04865767, -0.014116794, -0.023018647, -0.18436208, 0.046717994, 0.05505563, -0.037509803, 0.08014922, -0.017928418, 0.024219323, 0.04738822, 3.8910616E-4, 0.026197795, -0.095298655, 0.08441068, -0.07509214, -0.070502646, -0.048232406, 0.050662514, 0.050845366, 0.02694858, 0.09195362, -0.047451414, -0.0019253033, 0.032139335, -0.01269966, -0.0063552414, 0.06518699, 0.002008076, -0.014413211, 0.041129943, -0.023289412, -0.06809452, 0.022095388, 0.028659329, -0.040416677, 0.042018082, 0.025160188, -0.00202035, 0.019702962, 0.021777565, -0.013656149, -0.051313233, -0.04497646, -0.014631011, 0.059728637, 0.07593007, -0.082299784, 0.032355968, 0.0044729547, 0.06531633, 0.015795855, 0.084006965, -0.0068327803, -0.050892714, 0.038108964, -0.02454908, -0.06265796, 2.5220444E-33, -0.044166196, -0.001347358, 0.08235813, -0.007007566, 0.08890799, 0.010926969, 0.048537202, 0.035362706, -0.06755346, 0.08065806, -0.047143545, 0.15052986, -0.051712602, 0.007743377, 0.0065271705, 0.03629012, -0.022165097, -0.0071957675, -0.041202158, 0.05054965, 0.03288302, -0.065342665, -0.0435555, 0.08561902, 0.03559053, -0.022923121, 0.04949009, -0.0037656806, 0.12611488, 0.026593897, -0.053843573, -0.043974094, -0.11885948, -0.02735148, 0.030206485, 0.050310433, -0.07482688, -0.05851688, 0.04760424, -0.010419616, -0.06619075, 0.07070949, -0.022335932, -0.04451241, -0.04947817, 0.036794, 0.09824889, 0.01724759, 0.07204829, 0.048312318, -0.051752888, -0.09025775, 0.0916269, 0.053144947, 0.02713012, 0.07368689, 0.12561926, -0.0016340836, 0.05036303, 0.024970159, -0.044168174, -0.0017080267, 0.004947939, -0.012640446, 0.011970955, 0.011879299, 0.0017737702, 0.04202747, 0.12449005, -0.019065812, -0.082275026, 0.021578003, -0.0046439264, 0.022330984, -0.025510574, -0.029374225, -0.012013197, -0.044567946, 0.004120308, 0.048463393, -0.0022042012, 0.017926654, -0.04282157, -0.022221169, -0.019569948, 0.0037771566, 9.27978E-4, 0.0071854815, -0.03358333, -0.012537659, -0.008971462, -0.05393706, 0.047261942, 0.037615813, -0.10200891, -3.645473E-33, -0.07226257, -0.0070582414, -0.06130244, 0.0811645, 0.1045354, 0.018696334, 0.07912544, -0.03308308, 0.06324003, -0.0665069, -0.031799477, -0.021002896, 0.029073669, 0.040397428, 0.036709767, 0.044560984, -0.04978148, 0.01902109, -0.077330425, 0.0014166969, -0.0095353285, 0.05020993, -0.07146384, 0.028144993, 0.006661172, -0.04766623, -0.026359499, -0.066174455, -0.02251002, 0.0142386425, 0.003785665, 0.047709268, -0.05137375, 0.011881764, -0.013423481, -0.13021852, 0.06813814, -0.02570223, -0.0014707573, 0.02198746, 0.10670381, 0.030275151, -0.0061813854, 0.024690256, -0.016149765, 0.05734902, 0.006247445, 0.051220585, 0.04653701, -0.07878882, -0.014186172, 0.0035672425, 0.019670391, 0.009687702, 0.025794582, -0.038192537, 0.07879535, 0.021188704, -0.060337037, -0.025604641, 0.017753173, -0.019285059, 0.09721533, 0.057794902, -0.025287513, -0.018112333, -0.04367064, 0.05390888, -0.15762874, -0.06873541, 0.013214026, -0.034912545, 0.022502566, -0.040428247, -0.048058435, -0.04037447, -0.11269173, -0.00939744, -0.026796278, 0.022870356, -0.08203317, 0.0148278475, -0.025436388, 0.044117484, 0.0209005, 0.061733004, 0.017283028, -0.07416076, 0.005052495, -0.064698, -0.107965276, 0.03070776, -0.1072288, 0.025248624, 0.006587955, -4.3987804E-8, -0.014536431, 0.00945059, -0.043026436, 0.021258749, 0.0074991393, 0.026453322, -0.06734927, 0.063609846, -0.018449267, 0.035950236, 0.07701531, -0.05545728, -0.022039931, -0.033781715, 0.079458155, 0.02992643, 0.074860424, 0.035386804, -0.0042102844, 0.018032629, 0.117039256, -0.012195354, -0.07222459, 0.020477466, -0.04351911, -0.048398666, -0.007319725, -0.016831944, -0.03734885, -0.017150367, -0.012754011, -0.06360401, 0.042130403, -0.10278343, 0.015521012, -0.02966701, 0.016603023, -0.014801565, 0.028059384, 0.039389964, -0.06328157, 0.009948977, 0.0075169858, -0.010036776, -0.032425597, 0.07060074, -0.015688417, 0.051337164, 0.022686949, 0.055258285, 0.011793028, -0.0073220916, -0.026577298, -2.2336579E-5, 0.11238045, 0.06923051, -0.00418351, 0.018816656, -0.021250969, 0.04199925, 0.05988893, 0.010295226, -0.028454185, 0.0424255)",data scientist
Junior Backend Big Data Developer,ThetaRay,"Israel, Hod HaSharon Description About ThetaRay: ThetaRay is a trailblazer in AI-powered Anti-Money Laundering (AML) solutions, offering cutting-edge technology to fintechs, banks, and regulatory bodies worldwide. Our mission is to enhance trust in financial transactions, ensuring compliant and innovative business growth. Our technology empowers customers to expand into new markets and introduce groundbreaking products. Thetaray is a culture-driven company. Our values are at the heart of our success. By joining us, you'll have the opportunity to embody these values and inspire others through your actions. Why Join ThetaRay? At ThetaRay, you'll be part of a dynamic global team committed to redefining the financial services sector through technological innovation. You will contribute to creating safer financial environments and have the opportunity to work with some of the brightest minds in AI, ML, and financial technology. We offer a collaborative, inclusive, and forward-thinking work environment where your ideas and contributions are valued and encouraged. Join us in our mission to revolutionize the financial world, making it safer and more trustworthy for millions worldwide. Explore exciting career opportunities at ThetaRay – where innovation meets purpose. We are seeking a skilled Junior Backend Big Data Developer to join our global R&D department. About the position Design and develop a scalable data processing and ML pipeline using the latest big data technologies in a fast paced agile environment Investigate new technological areas and understand them in depth through rapid self-learning Demonstrates strong analytical problem-solving skills to support scalable and sustainable design solutions Ability to take ownership of product development including all life cycle stages: translating product requirements into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker Opened URL: https://il.indeed.com/rc/clk?jk=f8a4240257a1f2ce&bb=r6UIYw2mow7Pn8IzQwq2_-W1QCjSWXjTolCGGPP6r1hrgYR_04g-rduMaSln8QXPTKg72oJi8A05b61aBYrHRJnGe_P_Fe_A6kxMxp1ZSL-qNeOaC0lT-K0Lac-PNtP0&xkcb=SoB367M33wH7H8WWaB0IbzkdCdPP&fccid=69237999e36cf5e9&vjs=3",data,"Israel, Hod HaSharon Description About ThetaRay: ThetaRay is a trailblazer in AI-powered Anti-Money Laundering (AML) solutions, offering cutting-edge technology to fintechs, banks, and regulatory bodies worldwide. Our mission is to enhance trust in financial transactions, ensuring compliant and innovative business growth. Our technology empowers customers to expand into new markets and introduce groundbreaking products. Thetaray is a culture-driven company. Our values are at the heart of our success. By joining us, you'll have the opportunity to embody these values and inspire others through your actions. Why Join ThetaRay? At ThetaRay, you'll be part of a dynamic global team committed to redefining the financial services sector through technological innovation. You will contribute to creating safer financial environments and have the opportunity to work with some of the brightest minds in AI, ML, and financial technology. We offer a collaborative, inclusive, and forward-thinking work environment where your ideas and contributions are valued and encouraged. Join us in our mission to revolutionize the financial world, making it safer and more trustworthy for millions worldwide. Explore exciting career opportunities at ThetaRay – where innovation meets purpose. We are seeking a skilled Junior Backend Big Data Developer to join our global R&D department. About the position Design and develop a scalable data processing and ML pipeline using the latest big data technologies in a fast paced agile environment Investigate new technological areas and understand them in depth through rapid self-learning Demonstrates strong analytical problem-solving skills to support scalable and sustainable design solutions Ability to take ownership of product development including all life cycle stages: translating product requirements into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker","Israel, Hod HaSharon Description About ThetaRay: ThetaRay is a trailblazer in AI-powered Anti-Money Laundering (AML) solutions, offering cutting-edge technology to fintechs, banks, and regulatory bodies worldwide. Our mission is to enhance trust in financial transactions, ensuring compliant and innovative business growth. Our technology empowers customers to expand into new markets and introduce groundbreaking products. Thetaray is a culture-driven company. Our values are at the heart of our success. By joining us, you'll have the opportunity to embody these values and inspire others through your actions. Why Join ThetaRay? At ThetaRay, you'll be part of a dynamic global team committed to redefining the financial services sector through technological innovation. You will contribute to creating safer financial environments and have the opportunity to work with some of the brightest minds in AI, ML, and financial technology. We offer a collaborative, inclusive, and forward-thinking work environment where your ideas and contributions are valued and encouraged. Join us in our mission to revolutionize the financial world, making it safer and more trustworthy for millions worldwide. Explore exciting career opportunities at ThetaRay – where innovation meets purpose. We are seeking a skilled Junior Backend Big Data Developer to join our global R&D department. About the position Design and develop a scalable data processing and ML pipeline using the latest big data technologies in a fast paced agile environment Investigate new technological areas and understand them in depth through rapid self-learning Demonstrates strong analytical problem-solving skills to support scalable and sustainable design solutions Ability to take ownership of product development including all life cycle stages: translating product requirements into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker","into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker",13,13,1,0,"List(-0.015692811, -0.028329486, 0.035626475, 0.047216993, 0.022981994, -0.11717073, -0.08117962, 0.03025087, -0.12956153, -0.008425663, -0.090249896, -0.061070453, 0.015740285, -0.006715748, 0.0064129797, -0.009488731, 0.00869092, -0.042265214, 0.0186285, -0.06838546, -0.07261187, 0.014991496, -0.0065111956, -0.040677372, -0.022825506, 0.041047353, 0.014603797, -0.013530217, -0.05337878, -0.068208806, 0.026637672, 0.021555832, 0.023925427, 0.019462913, 0.059177287, 0.13524431, 0.09721672, -0.084608905, -0.073547795, -0.045055605, -0.037026055, -0.09944774, -0.017087802, -0.07676972, 0.0182997, -0.063894644, 0.015854273, -0.07988908, 0.008508865, 0.05781816, -0.07911141, -0.097126216, 0.0114401225, -0.007053282, -0.034884937, 0.024335163, 0.019481424, 0.008347702, -0.021837126, -0.043337587, -0.0048732795, -0.040264923, -0.05068365, 0.010291237, -0.06461337, -0.007531179, 0.02110967, 0.012427128, 0.05695044, -0.09337665, -0.03947586, -0.025093598, -0.121335894, -0.0432337, 0.030765442, 0.059087854, 0.074825235, -0.034777258, 0.040141042, -0.0021573375, 0.010751915, 0.043707497, -0.11472698, 0.07784606, -0.073306665, -0.006490386, -0.017797876, 0.065332055, 0.016254138, 0.005245715, 0.035583753, -0.05512072, 0.0098691275, -0.04395415, 0.048991766, 0.052130517, 0.032898843, -0.04442252, -0.0047840625, 0.017467622, -0.07730712, -0.03207672, 0.04243074, 0.0141609805, -0.025354648, 0.0435332, -0.024620825, -0.017275674, -0.024120351, 0.0029935765, -0.018975647, -0.054317497, -0.02573899, -0.016932549, 0.056291133, -0.032948785, -0.073109135, -0.07042682, -0.03462629, 0.07602297, 0.044064403, 0.057120666, 0.017709747, -0.0051781703, 0.0466919, 0.04275913, -0.07907655, 7.2677164E-33, 0.04196181, -0.021134501, 0.006225057, -0.0018035312, 0.11127587, -0.040806543, 0.02671489, 0.005469006, -0.050021, 0.020723611, -0.01119432, 0.10780331, -0.055546936, -0.030270392, 0.09289377, -0.06663051, -0.03981408, 0.029081626, -0.019867718, 0.019834546, 0.0065457798, -0.08258694, -0.04076227, 0.06465756, 0.10399452, -0.020066435, 0.073513575, 0.022595953, 0.05645048, 0.0042875246, -0.037795935, -0.037838914, -0.042587243, 0.040469456, -0.011878327, 0.037207976, -0.09614116, -0.049517337, 0.011740923, 0.052362368, -0.07851677, 0.07729907, -0.0120289065, -0.05969904, 0.014180871, 0.020616675, 0.07387737, -0.009322789, 0.053638555, 0.03127244, -0.035997476, -0.056651156, 0.09375888, 0.03310517, 0.049321804, -0.005735967, 0.0807953, -0.045382623, 0.014857416, 0.07408199, -0.102721244, 0.024392657, 0.021328839, -0.055799697, 0.008644133, -0.037168782, 0.03367818, 0.076219834, 0.035818405, 0.044819452, -0.04922159, 0.042916518, 0.053132832, 0.0042503336, 0.04929977, -0.037735056, 0.012033609, -0.065863945, 0.002653698, 0.037887335, 0.006631782, 0.039893918, -0.014556988, 0.041749217, 0.028274398, -0.019228969, -0.04122416, 0.047929123, -0.082127325, 0.027100177, -0.022910744, -0.020177146, 0.032838155, -0.016052067, 0.011877875, -7.748177E-33, -0.025731264, -0.077723145, -0.07821727, 0.09177505, 0.10496153, -0.0205363, 0.048389405, 0.050270908, 0.014893264, -0.042927865, -0.08377237, -0.016911037, 0.042133044, 0.04461997, -0.08561634, -0.033690233, -0.030168701, -0.042923167, 0.057458483, 0.027369525, -0.0076346686, 0.088060185, -0.027080974, 0.011370918, -0.049335923, -0.033380978, -0.03608549, -0.04327438, -0.08743865, -0.0117966, 0.03647189, 0.008028569, 0.0050810548, 0.02832931, 0.03634631, -0.052839976, 0.040742725, 0.02581903, -0.00570374, -0.011438812, 0.12821804, -0.02469867, -0.050316807, 0.0088443905, -0.03067933, 0.029651603, -0.0567662, 0.030260283, -0.019062286, -0.09765061, -0.06814974, 0.036949597, -0.03624523, -0.09384863, -0.030932678, -0.04056812, 0.08089578, 0.007881205, -0.05168492, 0.08807111, 0.029667474, -0.004143877, 0.11551355, 0.020692, 0.0041544526, 0.008746452, -0.0073338654, 0.03924351, -0.14935891, 0.03763239, -0.0030503282, -0.023661243, -0.04789086, -0.0033455687, 0.015315556, -0.047661662, -0.07408036, -0.032759894, 0.030323036, 0.05470475, -0.014257489, -0.0017291148, -0.03384902, 0.09104049, 0.038540542, 0.08462952, 0.022589682, 0.009037066, 0.019691424, -0.005308765, -0.07867376, 0.04842228, -0.065734304, 0.07773198, 0.044556957, -4.7468625E-8, 0.06723304, 0.0030474896, -0.0014288974, 0.039293516, 0.05814192, 0.05718914, -0.008807937, 0.07284816, 0.0075520123, 0.033381835, -0.032448873, -0.0015857236, -0.058839604, 0.03404845, 0.07996668, 0.0584914, -0.008998244, 0.11228442, -0.06580761, -0.034991574, 0.09073969, 0.07620584, -0.02298122, -0.012567871, -0.06571547, -0.06756512, 0.056285895, 0.021926958, 0.039114542, -0.025292771, 0.0020010066, -0.02117052, 0.016683312, -0.011926114, 0.0012759364, -0.09230321, 0.024454044, 0.052660096, -3.647985E-4, -0.013327576, -0.089014724, 0.08120021, 0.005625686, -0.0500305, -0.065604545, 0.044338424, -0.064484015, -0.019751353, -0.00922614, -0.011509422, 0.0067652976, -0.009845406, -0.012156925, 0.077542484, 0.13037676, 0.080940574, 0.045571014, -0.054907013, 0.035436913, 0.06997232, 0.020599622, -0.033214547, -0.00912485, 0.090733506)",junior backend
Data Annotator,Prisma Photonics,"Description: Prisma-Photonics is a rapidly growing startup company, developing the next-generation smart-infrastructure solution based on novel fiber-sensing technology (smart roads, smart cities, perimeters, grid monitoring, etc.). The company offers an award-winning disruptive solution – a “sensor free” approach to smart infrastructure. The company is VC backed and is in the revenues stage. Combining pioneering technology in optical fiber sensing with state-of-the-art machine learning, we help prevent environmental disasters, protect human lives, and keep critical energy and transportation backbones running smoothly. We are rapidly growing, looking for the best minds and spirits to join us in our journey. We know our products are only as great as the individuals building the hardware and software and harnessing data for good causes! Being a great team member means being eager to learn and grow, able to challenge others while accepting being challenged yourself, and working for the team and product with enthusiasm and passion. We are seeking a detail-oriented and highly technical Data Annotator to join our Data Operations team. The role is full time, on an hourly pay, with the aim of adjusting work hours per need. In this role, you will be responsible for annotating various events on our distinct fiber sensing data, and managing datasets to support various products and projects. You will play a crucial part in ensuring the quality and integrity of our data, contributing to multiple critical initiatives. You will help improve our annotation procedures, techniques and tooling, and work closely with ML and Algo teams as well as Data Engineering. This is a unique opportunity to gain hands-on experience in data operations within a dynamic environment using cutting edge and novel technology. You will join a talented team dedicated to leveraging data for impactful projects. If you are passionate about data and eager to contribute to exciting challenges, this might be the perfect opportunity for you. Key Responsibilities: Data Annotation – Accurately annotate datasets using internal tools, adhering to annotation guidelines and maintaining high standards for consistency and quality.. Quality Assurance – Review and validate annotated data to ensure accuracy and consistency, identify discrepancies, and suggest solutions to improve processes. Continuous improvement – Participate in the continuous improvement of our tools and data operation processes to achieve more accurate models, and more streamlined data flow between the different teams in the company. Data Management – Support the ongoing maintenance of annotated datasets, ensuring data integrity and accessibility, and implementing best-practices to create seamless data flows Requirements: A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python. Opened URL: https://il.indeed.com/rc/clk?jk=bc34db8ca4455a09&bb=r6UIYw2mow7Pn8IzQwq2_2KWxyEGo0k-OJpOJwwWN8Jm7Ix9rFlIZvMB9V3E68_9Tw5jXdxlGcRgdwn8R5bozu_4HSOWEd50RAMDfMKBbPpkx12msEPjULZyxC-ZVeo-&xkcb=SoD567M33wH7H8WWaB0PbzkdCdPP&fccid=fa9a43168015ecb7&vjs=3",data,"Description: Prisma-Photonics is a rapidly growing startup company, developing the next-generation smart-infrastructure solution based on novel fiber-sensing technology (smart roads, smart cities, perimeters, grid monitoring, etc.). The company offers an award-winning disruptive solution – a “sensor free” approach to smart infrastructure. The company is VC backed and is in the revenues stage. Combining pioneering technology in optical fiber sensing with state-of-the-art machine learning, we help prevent environmental disasters, protect human lives, and keep critical energy and transportation backbones running smoothly. We are rapidly growing, looking for the best minds and spirits to join us in our journey. We know our products are only as great as the individuals building the hardware and software and harnessing data for good causes! Being a great team member means being eager to learn and grow, able to challenge others while accepting being challenged yourself, and working for the team and product with enthusiasm and passion. We are seeking a detail-oriented and highly technical Data Annotator to join our Data Operations team. The role is full time, on an hourly pay, with the aim of adjusting work hours per need. In this role, you will be responsible for annotating various events on our distinct fiber sensing data, and managing datasets to support various products and projects. You will play a crucial part in ensuring the quality and integrity of our data, contributing to multiple critical initiatives. You will help improve our annotation procedures, techniques and tooling, and work closely with ML and Algo teams as well as Data Engineering. This is a unique opportunity to gain hands-on experience in data operations within a dynamic environment using cutting edge and novel technology. You will join a talented team dedicated to leveraging data for impactful projects. If you are passionate about data and eager to contribute to exciting challenges, this might be the perfect opportunity for you. Key Responsibilities: Data Annotation – Accurately annotate datasets using internal tools, adhering to annotation guidelines and maintaining high standards for consistency and quality.. Quality Assurance – Review and validate annotated data to ensure accuracy and consistency, identify discrepancies, and suggest solutions to improve processes. Continuous improvement – Participate in the continuous improvement of our tools and data operation processes to achieve more accurate models, and more streamlined data flow between the different teams in the company. Data Management – Support the ongoing maintenance of annotated datasets, ensuring data integrity and accessibility, and implementing best-practices to create seamless data flows Requirements: A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.","Description: Prisma-Photonics is a rapidly growing startup company, developing the next-generation smart-infrastructure solution based on novel fiber-sensing technology (smart roads, smart cities, perimeters, grid monitoring, etc.). The company offers an award-winning disruptive solution – a “sensor free” approach to smart infrastructure. The company is VC backed and is in the revenues stage. Combining pioneering technology in optical fiber sensing with state-of-the-art machine learning, we help prevent environmental disasters, protect human lives, and keep critical energy and transportation backbones running smoothly. We are rapidly growing, looking for the best minds and spirits to join us in our journey. We know our products are only as great as the individuals building the hardware and software and harnessing data for good causes! Being a great team member means being eager to learn and grow, able to challenge others while accepting being challenged yourself, and working for the team and product with enthusiasm and passion. We are seeking a detail-oriented and highly technical Data Annotator to join our Data Operations team. The role is full time, on an hourly pay, with the aim of adjusting work hours per need. In this role, you will be responsible for annotating various events on our distinct fiber sensing data, and managing datasets to support various products and projects. You will play a crucial part in ensuring the quality and integrity of our data, contributing to multiple critical initiatives. You will help improve our annotation procedures, techniques and tooling, and work closely with ML and Algo teams as well as Data Engineering. This is a unique opportunity to gain hands-on experience in data operations within a dynamic environment using cutting edge and novel technology. You will join a talented team dedicated to leveraging data for impactful projects. If you are passionate about data and eager to contribute to exciting challenges, this might be the perfect opportunity for you. Key Responsibilities: Data Annotation – Accurately annotate datasets using internal tools, adhering to annotation guidelines and maintaining high standards for consistency and quality.. Quality Assurance – Review and validate annotated data to ensure accuracy and consistency, identify discrepancies, and suggest solutions to improve processes. Continuous improvement – Participate in the continuous improvement of our tools and data operation processes to achieve more accurate models, and more streamlined data flow between the different teams in the company. Data Management – Support the ongoing maintenance of annotated datasets, ensuring data integrity and accessibility, and implementing best-practices to create seamless data flows Requirements: A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.","A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.",16,16,2,1,"List(-0.0040657767, 0.017679114, 0.041731153, 0.012855818, 0.017805379, -0.06119407, -0.030116705, 0.019684222, -0.045797404, 0.039286718, -0.09790244, -0.11022979, -0.059792973, 0.029107831, 0.01671864, 0.08185745, 0.0073288395, 0.002775802, 0.028403582, -0.096111335, -0.08645158, -0.015235563, 0.09157352, -0.023835555, 0.011230137, -0.006649349, 0.0014633923, 0.038815457, 0.019313227, -0.0035079701, 0.041546475, 0.06795711, 0.07808947, 0.02944012, 0.060184706, 0.033948597, 0.06679215, 0.043108754, -0.04713535, 0.04241199, -0.010429698, -0.043665, 0.11074899, 0.010370648, 0.009733539, -0.036504775, 0.063275866, -0.07695126, 0.036232684, 0.04581675, -0.025949754, -0.03896995, 0.008721179, 0.012558545, -0.014602689, 0.11828587, 0.051638134, 0.008400843, -0.061458237, -0.012024004, -0.0024436412, -0.03605075, 0.012936688, 0.015985463, 0.013752847, -0.032885116, -0.04918419, 0.10108121, 0.08913325, -0.102398716, 0.03978789, -9.640103E-5, -0.050447773, 0.048032187, 0.056648113, -0.052285656, 0.032763906, -0.012037704, 0.044823114, -0.026153443, -0.004274314, 0.03341474, -0.05551652, 0.024325399, 0.011141037, -0.0650513, -0.022619652, 0.066010244, 0.004543516, 0.054653686, 0.051995102, -0.08621523, -0.0013644263, 0.034796342, -0.0010543942, 0.0036792709, 0.001885116, -0.056991268, 0.010527659, 0.014941485, 0.0064730244, -0.0081208525, -0.031581428, 0.08685977, -0.03433713, 0.007249538, -0.05683966, 0.053659666, 0.028998736, 0.013727496, 0.033244558, -0.003574537, -0.06585004, 0.05054785, 0.06123076, 0.0017836845, -0.058439918, -0.0037832803, 0.038946737, -0.048002023, -0.0025970263, 0.04569991, -0.040129393, -0.056413483, -0.002254004, -0.04257622, -0.027514426, 2.0127626E-33, 0.04332194, 0.09417905, -0.023120936, 0.052348867, -0.028113741, 0.03515906, -0.03747603, 0.029539978, -0.06952784, 0.04025994, 0.001068162, 0.100418895, -0.05735717, 0.053812392, 0.005824162, -0.020422684, -0.049470842, 0.03398937, -0.08347106, 0.03355433, -0.046673983, -0.012638478, -0.05342446, 0.02848572, 0.053941756, 0.028447734, -0.061261214, -0.009505381, 0.068914756, -0.006899278, -0.06208463, -0.059812643, -0.053845856, -0.042468786, 0.09391362, -0.0037409002, -0.024071138, -0.034509696, 0.059408303, 0.019726168, -0.0024248997, 0.039799664, 0.075241886, -0.05802253, 0.028931107, 0.020839581, 0.078829356, 0.03860783, 0.04169907, 0.021131312, -0.018827394, -0.08564968, 0.015514691, -0.017165285, 0.0582361, 0.07146377, 0.04309963, 0.008822628, -0.014008673, -0.07121279, -0.06314407, 0.024597034, -0.081949584, -6.906209E-4, -0.04536596, -0.0010788789, 0.020072475, 0.040485296, 0.1352587, 0.0034666988, -0.04801153, 0.038204707, -0.037616916, 0.019307166, -0.09549462, 0.0016246688, -0.022879373, -0.10227457, 0.0760777, 0.1071858, -0.0139708165, -0.038280778, -0.026711617, -0.035896134, 0.07029063, 0.039683193, 2.107197E-4, -0.046765987, 0.025296565, -0.032681935, 0.015952421, -3.4323338E-4, -0.061319258, 0.003586926, -0.029790802, -3.3751874E-33, -0.05670592, 0.028115993, -0.098433, 0.039319534, 0.027114578, 0.018771902, 0.054271985, 0.05157081, -0.0075439177, -0.021052133, -0.013031787, 0.007269853, -0.026724689, -0.019659005, -0.055605482, -0.044391654, -0.08707739, 0.0065559573, -0.043585002, 0.022036256, 0.014812071, 0.050427645, -0.13530545, 0.027719682, 0.051597044, -0.00915395, -0.12206269, -0.063832164, -0.032956447, -0.021301735, 0.008188977, -0.011893273, -0.04042016, -0.09282374, -0.0033726078, -0.019319754, 0.10379529, -0.06536912, -0.017002918, 0.05981278, -0.0053258194, 0.0393764, -0.04811393, -0.041251488, -0.05775458, 0.005146791, 0.018443095, 0.083703846, 0.041360237, -0.070317715, -0.061997887, -0.00781513, 0.080040075, -0.01528788, 0.030083055, -0.14804415, 0.03735725, -0.037927467, -0.044190925, -0.04353146, -0.040721808, -0.020632606, 0.07759672, 0.09718638, -0.023545295, -0.08321662, -0.06464335, 0.050135396, -0.17115715, -0.032820847, 0.09385349, 0.024056016, 0.029000584, 0.00811139, -0.075912274, -0.052334823, -0.028505284, -0.04010161, 0.0057029384, -0.0017755671, -0.034470014, -0.085441865, 0.046151128, 0.041437592, 0.047802307, 0.07677224, -0.021489445, -0.08434696, 0.078568265, -0.10439219, -0.03949089, 0.025072804, -0.1433754, 0.070930004, -0.019150272, -4.4411546E-8, -0.06652616, 0.007400297, -0.040025044, -0.042448975, -0.041396607, -0.013382399, -0.06905934, 0.012856292, -0.012991169, -0.030382566, -0.0087731965, -0.010933297, -0.077104434, -0.0125383455, 0.106025845, 0.006116161, 0.1543986, 0.053072315, 0.0019448269, 0.06666058, 0.037559293, 0.02238489, -0.095956646, -0.0051855203, -0.045420304, 0.006382786, 0.047549043, -0.003457956, 0.032434657, 0.055207916, -0.053996578, 0.0041210297, 0.028560407, -0.030582793, 0.012572053, 0.045727488, 0.04974453, -0.06703446, 1.609263E-4, 0.019711088, -0.046236303, 0.08758971, -0.05706937, -0.022936558, 0.032675173, 0.010718025, 0.03787768, 0.06283102, -0.037419487, 0.049007796, 0.043448914, -0.022825116, -0.07723533, -0.009807111, 0.06144693, 0.03800597, 0.05066732, -3.299691E-4, -0.039032925, 0.045859043, 0.043475702, 0.013899119, 0.022938227, 0.018276945)",data analyst
Marketing Data Analyst,Plarium,"The Business Performance department at Plarium is in charge of all the analytical aspects of our game operations and strategic planning: From game design, to user acquisition, player behavior, live-ops, and game optimization (such as monetization, retention, etc). We are looking for a data analyst for our Marketing Analytics team. The scope of responsibility of this team includes budget optimization for user acquisition for our games at the scope of tens of million dollars per game, analyses of game profitability and long term potential, analysis of the players behavior, player game funnel and reaction to new game features and offers, and a lot more. Our goal is making sure Plarium brings the best and most relevant players to its games in the most efficient and effective way. Responsibilities Be a focal point for anything related to new players performance and KPIs across our range of games, strategic planning, user segmentation, tests and growth opportunities identification Daily support of ongoing analytical needs of the marketing department managers & teams Establish and monitor success metrics that measure and explain the impact of the marketing activities Design and initiate new features and reports to monitor & give insights on the marketing activity, using the marketing in-house BI system (for example, LTV, ROI forecasting, automatic rules) Run ad-hoc and in-depth analyses of the different Marketing activities and trends Support decision making based on data Collaborate with the game analysis team and the game studios to monitor and optimize our players experience, maximizing their retention in our games What we expect B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first! Opened URL: https://il.indeed.com/rc/clk?jk=2b03298659a436f7&bb=r6UIYw2mow7Pn8IzQwq2_wWeK1_evLckj39fLIfBgJ2KNH0z3augA147ODzOJXM4VPM1yn4wZeNukSlYyB-0ehIHUhqYKXcTKfcnYDIqLgoQwawaxa9eNtYV3Qk82DTa&xkcb=SoBN67M33wH7H8WWaB0ObzkdCdPP&fccid=f24b7ff57afa3405&vjs=3",data,"The Business Performance department at Plarium is in charge of all the analytical aspects of our game operations and strategic planning: From game design, to user acquisition, player behavior, live-ops, and game optimization (such as monetization, retention, etc). We are looking for a data analyst for our Marketing Analytics team. The scope of responsibility of this team includes budget optimization for user acquisition for our games at the scope of tens of million dollars per game, analyses of game profitability and long term potential, analysis of the players behavior, player game funnel and reaction to new game features and offers, and a lot more. Our goal is making sure Plarium brings the best and most relevant players to its games in the most efficient and effective way. Responsibilities Be a focal point for anything related to new players performance and KPIs across our range of games, strategic planning, user segmentation, tests and growth opportunities identification Daily support of ongoing analytical needs of the marketing department managers & teams Establish and monitor success metrics that measure and explain the impact of the marketing activities Design and initiate new features and reports to monitor & give insights on the marketing activity, using the marketing in-house BI system (for example, LTV, ROI forecasting, automatic rules) Run ad-hoc and in-depth analyses of the different Marketing activities and trends Support decision making based on data Collaborate with the game analysis team and the game studios to monitor and optimize our players experience, maximizing their retention in our games What we expect B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!","The Business Performance department at Plarium is in charge of all the analytical aspects of our game operations and strategic planning: From game design, to user acquisition, player behavior, live-ops, and game optimization (such as monetization, retention, etc). We are looking for a data analyst for our Marketing Analytics team. The scope of responsibility of this team includes budget optimization for user acquisition for our games at the scope of tens of million dollars per game, analyses of game profitability and long term potential, analysis of the players behavior, player game funnel and reaction to new game features and offers, and a lot more. Our goal is making sure Plarium brings the best and most relevant players to its games in the most efficient and effective way. Responsibilities Be a focal point for anything related to new players performance and KPIs across our range of games, strategic planning, user segmentation, tests and growth opportunities identification Daily support of ongoing analytical needs of the marketing department managers & teams Establish and monitor success metrics that measure and explain the impact of the marketing activities Design and initiate new features and reports to monitor & give insights on the marketing activity, using the marketing in-house BI system (for example, LTV, ROI forecasting, automatic rules) Run ad-hoc and in-depth analyses of the different Marketing activities and trends Support decision making based on data Collaborate with the game analysis team and the game studios to monitor and optimize our players experience, maximizing their retention in our games What we expect B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!","B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!",0,8,3,0,"List(-0.017894428, 0.012737444, 0.012829153, 0.059998777, -0.006030504, 0.039236054, 0.052049745, -0.008832954, -0.013833111, -0.053027686, 0.01050116, -0.073226064, 0.042602498, -0.04779995, 0.05228456, 0.011208763, 0.09763997, -0.032732088, -0.056132864, -0.07322719, -0.068949856, -0.041666865, 0.069628015, 0.03135208, 0.031939633, 0.0371354, 0.091081224, 0.015997406, -0.034322016, -0.06000969, -0.0036450534, 0.040159225, 0.059343204, 0.006162782, 0.0067362804, 0.027964339, 0.0070072566, -0.026612828, -0.017288763, 0.0108869225, -0.081482, -0.103326716, 0.029542023, 0.022504646, 0.0438849, -0.024581827, -0.067578204, -0.0056427093, 0.019385377, 0.1022777, -0.14948642, 5.980743E-5, 0.002258151, 0.023782605, 0.009178827, 0.083475254, -0.07112794, -0.041816443, -0.011541809, 0.021825992, 0.0075950795, -0.06431769, 0.01125684, 0.07768633, -0.023176, -0.0451983, -0.12153217, 0.054225236, -0.03285786, -0.060991153, -0.05259051, -0.06919808, -0.06595257, 0.077649444, 0.023701187, 0.013511327, -0.015493101, -0.045621376, 0.0061544417, 0.008160444, 0.016661894, 0.06106316, -0.037329268, 0.112437926, -0.02693654, -0.075766735, 0.027077649, 0.024732938, 0.007608673, -0.021988994, 0.01579799, 0.02747372, -0.03706465, 0.0018398839, 0.0033813303, 0.010791719, -0.06428251, -0.047921386, 0.04916679, 0.039186124, -0.0050397227, 0.082556196, 0.0037290906, 0.03708842, -0.13536118, -0.07866484, 0.033948276, 0.081934296, 0.10820316, 0.0060873744, -0.014928359, -0.017974503, -0.0631752, -0.024705637, 0.069618486, 0.016213339, -0.023727382, -0.0019474629, 0.09605722, 0.07730291, 0.026685886, 0.050097547, 0.03128373, -0.06852652, -0.041564494, -0.03702442, -0.097936176, 6.533329E-33, -0.03123079, 0.039205886, 0.08126642, 0.04339143, 0.009506729, -0.031308148, -0.052199174, -0.018353138, -0.027868047, 0.047694765, -0.0462894, 0.10106232, -0.031539254, 0.035818253, 0.04540186, 0.01788256, -0.06900698, 0.08153136, -0.04630739, 0.054978468, 0.035987493, -0.1336168, -0.012978637, 0.04969282, -0.0015150962, 0.024029419, 0.08838012, 0.03167883, 0.078877196, -0.0038257216, -0.037384648, -0.007068904, -0.099537894, -0.07782257, -0.05579099, -0.028185006, -0.07732149, -0.07588411, 0.031434234, 0.014811613, -0.12312831, 0.039128635, 0.025822675, 0.006400575, -0.0018100663, 0.069708124, 0.04364665, 0.033617705, 0.050622888, -0.02785873, -0.09444704, -0.054945126, 0.08236348, -0.010668572, -0.06494866, 0.0065162843, 0.0027106965, 0.009167541, -0.043427266, 0.018856596, -0.006305124, 0.063681304, -0.06780075, -0.07036117, -0.05243099, 0.028502235, 0.009436756, -0.003611055, 0.08348707, 0.013653134, 0.047744606, 0.05512058, 0.09674741, -0.0019218422, 0.015517268, -3.3380798E-4, -0.045721006, -0.0037597385, -0.0017957219, 0.056897014, 0.06924747, 0.025977321, 0.0018257988, -0.030956207, 0.046697345, 0.015506195, 0.035036072, -0.08195947, -0.044286944, 0.023566736, -0.052486245, 0.017004583, -0.0013699415, 0.09311329, -0.1434528, -7.451294E-33, -0.026255237, 0.018667528, -0.0103491, 0.051478326, 0.12754056, 0.007743376, 0.02169416, -0.029252013, 0.053391427, -0.015727779, 0.022447193, 0.049147088, 0.033572096, 0.005275048, -0.0041146292, 0.048147913, 0.016829029, -0.0039198366, -0.15608035, -0.010838831, -0.00965925, 0.12805597, -0.10219767, 0.013205358, -0.00724953, 0.08680388, -0.031549126, 0.040899713, -0.02863756, 0.021655297, -0.051818468, -0.04530617, -0.073348, -0.025021905, 0.025824614, -0.057493087, 0.013506886, -0.09936218, -0.021031566, 0.07866322, 0.09826391, -0.020942532, -7.582768E-4, 0.0058034617, 0.012349522, -0.017652541, -6.669065E-4, -0.061432265, 0.0075210636, -0.075548336, 0.0013502307, 0.04893764, -0.048626196, -0.020567238, 0.0021450126, -0.00425449, 0.01671126, -0.028539626, -0.030099744, -0.007513882, -0.07506431, 0.01612059, 0.052880656, 0.091003716, -0.00799477, -0.042347286, 0.05145995, 0.011385633, -0.0757549, -0.045910712, -0.035774227, -0.0019703738, 0.033937614, -0.0292455, -0.081281856, -0.040770758, 0.0068796626, -0.03883625, -0.061082356, 0.022245442, -0.054002102, -0.07773013, 0.015929207, 0.0016569851, 0.0648359, 0.018250529, -0.03226749, 0.026299376, 0.0011846529, 0.059231054, -0.08674133, 0.039645545, -0.09982557, 0.05272522, -0.021291682, -6.7520716E-8, 0.013175189, -0.0820163, 0.032193866, 0.045808002, 0.039719846, -0.03508876, -0.07549243, 0.054251865, 0.029592877, -0.0071590734, -0.0038353065, 0.014041573, -0.06863335, 0.047337417, -0.0017542979, -7.35556E-4, 0.04058605, 0.093951724, -0.02071865, -0.03784846, 0.08307618, -0.020910017, 0.0029789554, 0.023814365, -0.022687916, -0.006334474, -0.036646772, 0.048715223, 0.046712715, -0.030475438, 0.030692615, -0.03383613, 0.042252317, -0.052727383, -0.027032759, -0.053016398, 0.05841245, -0.033590555, -0.0060484703, 0.05181813, -0.12456911, -0.055980366, -0.015851444, 0.028470721, -0.010241989, 0.011468757, -0.09357705, -0.012805069, 0.004053726, 0.039242562, -0.04382419, -0.0790641, 0.0349892, -0.021157993, 0.029430967, 0.05034667, -0.0012822036, 0.011528633, -0.006602682, 0.043173414, 0.09628778, -0.056996785, -0.055068288, 0.026252504)",data analyst
Junior Data Analyst,aiOla,"aiOla is a high-growth, product-led SaaS startup backed by $45 million in funding from premier investors such as New Era Capital Partners and Hamilton Lane. Our mission is to revolutionize efficiency, intelligence, collaboration, and safety in the industrial sector through our proprietary speech-powered AI technology. Our team spans the US and Israel, bringing together experts in Product, Data Science, Data Engineering, Analytics, Marketing, and Customer Success. At aiOla, we thrive on innovation, collaboration, and a commitment to excellence. We are seeking forward-thinking professionals passionate about pushing the limits of AI and making an impact. Role Overview: We are seeking a passionate and innovative Data Analyst to join our growing Analytics team. This is an exceptional opportunity for a detail-oriented professional to not only analyze data but also play a pivotal role in shaping and enhancing our cutting-edge AI models and products. As a Data Analyst at aiOla, you will dive into complex datasets, uncover actionable insights, and contribute directly to the improvement and monitoring of our proprietary AI technologies. If you thrive in a dynamic environment, this role is perfect for you! Requirements: B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making. Opened URL: https://il.indeed.com/rc/clk?jk=8c0efb63a00ef0d5&bb=r6UIYw2mow7Pn8IzQwq2_xdQrNIprFLEwcJQe8rzWVWb1_NSvbpLXcpcn5TXeAHedsgImdoi_2ey__b10Ys3nn9vr-SRmf6buKLYOFDHfBc8ksA0lyjjiA%3D%3D&xkcb=SoDQ67M33wH7H8WWaB0NbzkdCdPP&fccid=ba07516c418dda52&vjs=3",data,"aiOla is a high-growth, product-led SaaS startup backed by $45 million in funding from premier investors such as New Era Capital Partners and Hamilton Lane. Our mission is to revolutionize efficiency, intelligence, collaboration, and safety in the industrial sector through our proprietary speech-powered AI technology. Our team spans the US and Israel, bringing together experts in Product, Data Science, Data Engineering, Analytics, Marketing, and Customer Success. At aiOla, we thrive on innovation, collaboration, and a commitment to excellence. We are seeking forward-thinking professionals passionate about pushing the limits of AI and making an impact. Role Overview: We are seeking a passionate and innovative Data Analyst to join our growing Analytics team. This is an exceptional opportunity for a detail-oriented professional to not only analyze data but also play a pivotal role in shaping and enhancing our cutting-edge AI models and products. As a Data Analyst at aiOla, you will dive into complex datasets, uncover actionable insights, and contribute directly to the improvement and monitoring of our proprietary AI technologies. If you thrive in a dynamic environment, this role is perfect for you! Requirements: B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.","aiOla is a high-growth, product-led SaaS startup backed by $45 million in funding from premier investors such as New Era Capital Partners and Hamilton Lane. Our mission is to revolutionize efficiency, intelligence, collaboration, and safety in the industrial sector through our proprietary speech-powered AI technology. Our team spans the US and Israel, bringing together experts in Product, Data Science, Data Engineering, Analytics, Marketing, and Customer Success. At aiOla, we thrive on innovation, collaboration, and a commitment to excellence. We are seeking forward-thinking professionals passionate about pushing the limits of AI and making an impact. Role Overview: We are seeking a passionate and innovative Data Analyst to join our growing Analytics team. This is an exceptional opportunity for a detail-oriented professional to not only analyze data but also play a pivotal role in shaping and enhancing our cutting-edge AI models and products. As a Data Analyst at aiOla, you will dive into complex datasets, uncover actionable insights, and contribute directly to the improvement and monitoring of our proprietary AI technologies. If you thrive in a dynamic environment, this role is perfect for you! Requirements: B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.","B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.",0,23,0,0,"List(-0.037287023, -0.07398327, -0.054673623, 0.030411635, -0.044928666, -0.032182064, 0.03975411, 0.0077564884, -0.09049783, -0.020759799, -0.09696329, -0.065157376, 0.045826644, 0.009563188, 0.0055521927, 0.061755788, -0.030428987, -0.07342802, -0.022181576, -0.15916513, -0.043986794, 0.078852236, 0.0083882, -0.034474812, 0.016227113, 0.0073856427, 0.007023919, -0.06962306, -0.03739581, -0.020242993, -0.0069017946, 0.030975534, 0.034613274, 0.07292392, -0.018641574, 0.0067003705, 0.061191514, 0.014426827, 0.017397853, -0.040446427, -0.038757034, -0.057834934, -0.023426706, -0.053488396, -0.01680438, -0.033263225, -0.022913303, -0.045943614, 0.024324812, 0.07109617, -0.10092588, -0.019158375, -0.010015787, 0.045092583, -0.0034992138, 0.06907416, 0.012234393, -0.022890309, 0.023438549, -0.054052833, -0.027747467, -0.05128041, 0.062802896, 0.042801768, -0.00853864, -0.0044930847, -0.090706445, 0.062262543, 0.0017800888, -0.042966165, -0.052493423, -0.05159628, -0.083503716, 0.045893155, 0.0019868263, 0.009562043, 0.055444855, -0.032001574, 0.10372618, -0.02030679, -0.014590537, 0.023627598, -0.026220225, 0.13911903, -0.007233985, -0.09953399, 0.00739656, -0.013099278, -0.007353466, 0.0021552541, 0.05015036, -0.07196758, 0.042247005, 0.0014463156, -0.021692038, 0.035989463, -0.013515302, -0.026625404, 0.034635387, 0.071509145, -0.035363633, 0.014647127, 0.007384675, 0.030352646, -0.14437681, 2.3125426E-4, 0.03792587, 0.01884975, 0.03255542, 0.04197407, 0.021850577, -0.011269225, -0.040187914, -0.029170172, 0.06922653, -8.7717525E-4, -0.11097726, -0.0074424758, -0.0047952714, 0.07505412, -0.021625303, 0.11920328, 0.015033977, 0.033476397, 0.04277799, 0.07728909, -0.049087293, 2.6989091E-33, -0.035499044, 0.020894825, 0.015919149, -0.005891474, 0.029036157, -0.018817201, 0.035394922, 0.08169106, -0.059781406, 0.008579, -0.026460715, 0.16244847, -0.031243952, 0.04269737, 0.026557626, 0.01584251, -0.008830952, 0.031292554, -0.03430443, 0.01053923, 0.053488623, -0.073560834, 0.009251504, 0.10656604, 0.0930761, -0.043360546, 0.06769818, 0.026333379, 0.05969688, 0.036186997, -0.041616432, -0.03063421, -0.0860299, -0.026864892, 0.005191372, -0.032981955, -0.05450798, -0.03549523, 0.016327368, 0.03436284, -0.031146795, 0.037175294, -0.035013467, -0.045778744, -0.06695162, 0.039712198, 0.057169378, 0.03864619, 0.061850905, 0.02112041, -0.08222113, -0.11123935, 0.07245237, 0.06121903, 0.030611094, 0.0715514, 0.09411301, -0.085837714, 0.056785356, 0.028338814, -0.07642616, 0.05238451, -0.016956136, -0.016138954, -0.022817405, 0.029223805, 0.04604621, 0.105707504, 0.0770988, -0.10050472, -0.044681802, -9.7315287E-4, 0.102357596, 0.0034900056, -0.008390357, -0.017045982, -0.037866056, -0.052283246, 0.009469511, 0.05312355, 0.03473828, 0.0026033132, 0.017525233, -0.054748952, -8.0572785E-4, -0.059415244, 0.017231202, -0.02417769, -0.021948634, 0.014702881, -0.056114335, 0.010968902, -0.0336422, 0.08547326, -0.08721248, -4.0607743E-33, -0.05021597, 0.011040055, 0.0037645148, 0.055818416, 0.0813529, 0.012710698, 0.078061625, -0.042578463, 0.055024344, -0.0526079, -0.041990295, -0.008668065, 0.027699145, -0.021075137, 0.032750025, 0.020331888, -0.014119808, -0.008853348, -0.035131976, 0.030254478, -0.029052796, 0.07844592, -0.074813545, -0.0063241348, -0.02072651, -0.0035919405, -0.038977362, -0.04051989, 0.008001175, 0.06966162, -0.017910583, 0.022466786, -0.03250431, -0.02178496, -0.04614627, -0.07106092, 0.043010037, -0.051326305, -0.009509707, -0.013487641, 0.10680661, 0.064919725, -0.05610044, -0.004317043, 0.01828332, 0.033453457, -0.013806745, 0.062920704, -0.008407928, -0.09281314, -0.021064458, 0.011327074, 0.044690724, -0.010319928, -0.03232999, -0.034762684, 0.0818405, -0.030743603, -0.08806379, 0.022189902, 0.007312463, 0.022809183, 0.11901186, -0.0057119513, -0.0053649894, -0.057203356, -0.019017918, 0.0400593, -0.075743414, -0.07724041, 0.038626015, -0.014589495, 0.036966775, -0.022367142, -0.066347964, -0.023473257, -0.09278659, -0.12666872, -0.053917807, 0.017317604, -0.05018779, 0.0036016952, 0.030801743, 0.039352622, 0.009718344, 0.0847866, -0.033974066, -0.022320306, 0.057994008, -0.03606053, -0.15004873, -0.03329851, -0.071301006, 0.054737374, 0.005558137, -4.973067E-8, -0.046776555, 0.013852712, 0.04653769, 0.038755316, 0.01822366, -0.051886152, -0.06768714, 0.04790629, 0.06025569, -0.023531571, 0.055956148, -0.06353358, -0.08414057, 0.023000997, 0.064272575, 0.019047337, 0.069121905, 0.07577943, -0.003137615, -0.04612232, 0.10488167, -0.017142365, 0.010635332, 0.01847603, -0.010105538, -0.08723894, -0.039309822, 0.0456472, -0.011109873, -0.036587488, -0.058120545, -0.03003252, 0.02329143, -0.044460863, 0.07615767, -0.062027253, 0.04619925, -0.03462876, 0.017586559, 0.062781624, -0.06459568, 0.102278985, -0.03666302, 0.004824432, -0.021321004, 0.022894291, -0.08579399, -0.020938845, 0.020436872, 0.048236497, 0.05259596, -0.035174683, 0.0037756772, 0.06756369, 0.079703696, 0.10410282, 0.008534739, -0.025091998, 0.0018273814, 0.04984422, 0.033157874, -0.027284535, -0.040033337, 0.013530128)",data scientist
Data Engineer,Meta,"As a Data Engineer at Meta, you will shape the future of people-facing and business-facing products we build across our entire family of applications (Facebook, Instagram, Messenger, WhatsApp, Reality Labs, Threads). Your technical skills and analytical mindset will be utilized designing and building some of the world's most extensive data sets, helping to craft experiences for billions of people and hundreds of millions of businesses worldwide.In this role, you will collaborate with software engineering, data science, and product management teams to design/build scalable data solutions across Meta to optimize growth, strategy, and user experience for our 3 billion plus users, as well as our internal employee community.You will be at the forefront of identifying and solving some of the most interesting data challenges at a scale few companies can match. By joining Meta, you will become part of a vibrant community dedicated to skill development and career growth in data engineering and beyond.Data Engineering: You will guide teams by building optimal data artifacts (including datasets and visualizations) to address key questions. You will refine our systems, design logging solutions, and create scalable data models. Ensuring data security and quality, and with a strong focus on efficiency, you will suggest architecture and development approaches and data management standards to address complex analytical problems.Product leadership: You will use data to shape product development, identify new opportunities, and tackle upcoming challenges. You'll ensure our products add value for users and businesses, by prioritizing projects, and driving innovative solutions to respond to challenges or opportunities.Communication and influence: You won't simply present data, but tell data-driven stories. You will convince and influence your partners using clear insights and recommendations. You will build credibility through structure and clarity, and be a trusted strategic partner. Data Engineer Responsibilities: Conceptualize and own the data architecture for multiple large-scale projects, while evaluating design and operational cost-benefit tradeoffs within systems Create and contribute to frameworks that improve the efficacy of logging data, while working with data infrastructure to triage issues and resolve Collaborate with engineers, product managers, and data scientists to understand data needs, representing key data insights in a meaningful way Define and manage Service Level Agreements for all data sets in allocated areas of ownership Determine and implement the security model based on privacy requirements, confirm safeguards are followed, address data quality issues, and evolve governance processes within allocated areas of ownership Design, build, and launch collections of sophisticated data models and visualizations that support multiple use cases across different products or domains Solve our most challenging data integration problems, utilizing optimal Extract, Transform, Load (ETL) patterns, frameworks, query techniques, sourcing from structured and unstructured data sources Assist in owning existing processes running in production, optimizing complex code through advanced algorithmic concepts Optimize pipelines, dashboards, frameworks, and systems to facilitate easier development of data artifacts Influence product and cross-functional teams to identify data opportunities to drive impact Mentor team members by giving/receiving actionable feedback Minimum Qualifications: Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta. Opened URL: https://il.indeed.com/rc/clk?jk=332ce6560e8316dc&bb=r6UIYw2mow7Pn8IzQwq2_562YukhS6UN3gQsCZxi2egxfUB-GUkmkThkhz4gjqWPCW22mO4kb3LIM8QmQyIpRZmtmdc9AupjQYBTLVDksxFngEUdv3hLf8xgn6T3kHo6&xkcb=SoBk67M33wH7H8WWaB0MbzkdCdPP&fccid=4d28265c01696bc0&vjs=3",data,"As a Data Engineer at Meta, you will shape the future of people-facing and business-facing products we build across our entire family of applications (Facebook, Instagram, Messenger, WhatsApp, Reality Labs, Threads). Your technical skills and analytical mindset will be utilized designing and building some of the world's most extensive data sets, helping to craft experiences for billions of people and hundreds of millions of businesses worldwide.In this role, you will collaborate with software engineering, data science, and product management teams to design/build scalable data solutions across Meta to optimize growth, strategy, and user experience for our 3 billion plus users, as well as our internal employee community.You will be at the forefront of identifying and solving some of the most interesting data challenges at a scale few companies can match. By joining Meta, you will become part of a vibrant community dedicated to skill development and career growth in data engineering and beyond.Data Engineering: You will guide teams by building optimal data artifacts (including datasets and visualizations) to address key questions. You will refine our systems, design logging solutions, and create scalable data models. Ensuring data security and quality, and with a strong focus on efficiency, you will suggest architecture and development approaches and data management standards to address complex analytical problems.Product leadership: You will use data to shape product development, identify new opportunities, and tackle upcoming challenges. You'll ensure our products add value for users and businesses, by prioritizing projects, and driving innovative solutions to respond to challenges or opportunities.Communication and influence: You won't simply present data, but tell data-driven stories. You will convince and influence your partners using clear insights and recommendations. You will build credibility through structure and clarity, and be a trusted strategic partner. Data Engineer Responsibilities: Conceptualize and own the data architecture for multiple large-scale projects, while evaluating design and operational cost-benefit tradeoffs within systems Create and contribute to frameworks that improve the efficacy of logging data, while working with data infrastructure to triage issues and resolve Collaborate with engineers, product managers, and data scientists to understand data needs, representing key data insights in a meaningful way Define and manage Service Level Agreements for all data sets in allocated areas of ownership Determine and implement the security model based on privacy requirements, confirm safeguards are followed, address data quality issues, and evolve governance processes within allocated areas of ownership Design, build, and launch collections of sophisticated data models and visualizations that support multiple use cases across different products or domains Solve our most challenging data integration problems, utilizing optimal Extract, Transform, Load (ETL) patterns, frameworks, query techniques, sourcing from structured and unstructured data sources Assist in owning existing processes running in production, optimizing complex code through advanced algorithmic concepts Optimize pipelines, dashboards, frameworks, and systems to facilitate easier development of data artifacts Influence product and cross-functional teams to identify data opportunities to drive impact Mentor team members by giving/receiving actionable feedback Minimum Qualifications: Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.","As a Data Engineer at Meta, you will shape the future of people-facing and business-facing products we build across our entire family of applications (Facebook, Instagram, Messenger, WhatsApp, Reality Labs, Threads). Your technical skills and analytical mindset will be utilized designing and building some of the world's most extensive data sets, helping to craft experiences for billions of people and hundreds of millions of businesses worldwide.In this role, you will collaborate with software engineering, data science, and product management teams to design/build scalable data solutions across Meta to optimize growth, strategy, and user experience for our 3 billion plus users, as well as our internal employee community.You will be at the forefront of identifying and solving some of the most interesting data challenges at a scale few companies can match. By joining Meta, you will become part of a vibrant community dedicated to skill development and career growth in data engineering and beyond.Data Engineering: You will guide teams by building optimal data artifacts (including datasets and visualizations) to address key questions. You will refine our systems, design logging solutions, and create scalable data models. Ensuring data security and quality, and with a strong focus on efficiency, you will suggest architecture and development approaches and data management standards to address complex analytical problems.Product leadership: You will use data to shape product development, identify new opportunities, and tackle upcoming challenges. You'll ensure our products add value for users and businesses, by prioritizing projects, and driving innovative solutions to respond to challenges or opportunities.Communication and influence: You won't simply present data, but tell data-driven stories. You will convince and influence your partners using clear insights and recommendations. You will build credibility through structure and clarity, and be a trusted strategic partner. Data Engineer Responsibilities: Conceptualize and own the data architecture for multiple large-scale projects, while evaluating design and operational cost-benefit tradeoffs within systems Create and contribute to frameworks that improve the efficacy of logging data, while working with data infrastructure to triage issues and resolve Collaborate with engineers, product managers, and data scientists to understand data needs, representing key data insights in a meaningful way Define and manage Service Level Agreements for all data sets in allocated areas of ownership Determine and implement the security model based on privacy requirements, confirm safeguards are followed, address data quality issues, and evolve governance processes within allocated areas of ownership Design, build, and launch collections of sophisticated data models and visualizations that support multiple use cases across different products or domains Solve our most challenging data integration problems, utilizing optimal Extract, Transform, Load (ETL) patterns, frameworks, query techniques, sourcing from structured and unstructured data sources Assist in owning existing processes running in production, optimizing complex code through advanced algorithmic concepts Optimize pipelines, dashboards, frameworks, and systems to facilitate easier development of data artifacts Influence product and cross-functional teams to identify data opportunities to drive impact Mentor team members by giving/receiving actionable feedback Minimum Qualifications: Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.","Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.",22,1,4,2,"List(-0.014569731, -0.0027316588, 0.0027245558, -0.021502957, 0.023312023, -0.058448993, 0.013240254, 0.0501527, -0.008100762, -0.009443254, -0.06558906, 0.0062356587, -0.011787309, 0.017015379, 0.05598374, -0.018151809, 0.017162018, -0.13633077, -0.04679079, -0.037920553, -0.012636417, -0.043245587, 0.017345423, -0.03991703, 0.032438796, -7.67807E-6, -0.05454916, 0.009187075, -0.022273546, -0.004657205, 0.027621513, 0.060435403, -0.033522595, 0.07913052, -0.08080712, -0.030519534, 0.030191787, 0.02975833, -0.049625862, -0.040831275, -0.038711313, -0.089742385, -0.033248156, -0.0014651035, 0.047972456, 0.021975733, 0.04444758, -0.013460473, -0.032662608, -4.137189E-5, -0.078513294, -0.03938488, -0.0050460766, 0.11512905, -0.05526738, 0.055840977, 0.015988482, 0.026846813, 0.0077817272, 0.043348726, -0.015949879, -0.026970496, -0.010987052, 0.050916344, 0.030270342, 0.043654256, -0.07340496, -0.042266972, 0.009989929, -0.090677336, -0.044257216, -9.060939E-4, -0.07212267, 0.0798737, 0.11449272, -0.016844332, 0.007605203, 0.013431289, 0.102018654, -0.0020263789, 0.12403749, 0.055362232, -0.03636972, 0.04654642, -0.047701046, -0.09944168, -0.024254078, 0.04484393, -0.017830059, -0.002660413, -0.0025948745, 0.039080806, -0.0129607115, 0.07916704, -0.04570746, -0.007826936, -0.042280823, -0.019293508, 0.0029678303, 0.0488284, -0.04515944, -0.010521086, 0.0077978554, 0.04940712, -0.022720175, 0.06785523, 0.02696165, 0.088459685, 0.02006731, 0.08765735, -0.013130547, 0.01674992, -0.04195382, -0.059509054, 0.040252525, 0.036201358, -0.08315725, 0.05668881, 0.18694079, 0.003464829, 0.058152072, 0.107166395, -0.022522448, -0.047381964, -0.0151257925, -0.038645245, -0.0746716, 2.9676639E-33, 0.027492953, 0.12076994, 0.043325067, 0.07199055, 0.040429298, 0.025340872, 0.017111972, 0.03205723, -0.023674857, -0.025892647, 0.028465996, 0.045716498, -0.024890717, 0.02355838, 0.0919558, -0.0019829068, -0.011703319, -0.0108266855, 0.01604132, 0.030193472, 0.06613411, -0.109983765, -0.025392503, 0.06934669, 0.03327995, 0.04427709, -0.0072461492, 0.07932057, 0.05982988, -0.0070845447, -0.043919493, 0.010918162, 0.008445716, -0.029934563, 0.051785808, -0.04585149, 0.07637133, -0.097971104, 0.042859122, 0.017442767, -0.04254071, 0.07618353, -0.021218613, -0.04999468, -0.07618031, -0.012566857, 0.015949294, -0.03909986, -0.01106342, 0.09472451, -0.036397398, -0.081687, -0.054550327, 0.026773278, -0.008727571, 0.013402576, -0.011092619, -0.022939434, -0.03742841, -1.4294298E-4, -1.0812202E-4, -0.025563115, -0.023527127, 0.036644302, -0.035400566, 0.037638914, 0.06575509, 0.021362612, 0.017633982, 0.0016583258, -0.078075014, 0.020473402, -0.012578775, -0.06771739, -0.10982496, 0.04879945, -0.014361157, -0.10060391, -0.05611007, 0.10146884, -0.0038307833, 0.061487798, -0.006319861, 0.035261437, -0.0013988804, -0.01347279, 0.0018570087, -0.04753364, 0.05656096, -0.0033380832, -0.014000219, 0.016231881, 0.051920254, 0.09321461, -0.06308184, -3.1534083E-33, -0.05880869, 0.043238513, -0.008446956, 0.024557944, 0.07636342, -0.04056913, 0.1059635, -0.0011248739, -0.004493334, -0.008657642, -0.09426293, -0.04460641, -0.0031485234, 0.02701041, -0.017334761, 0.02501122, 0.024152828, -0.11732177, -0.085070156, 0.016098332, 0.045175288, 0.052800294, -0.09619915, 0.041623726, 0.076176174, -0.016056689, 0.025755681, -0.032240845, 0.035515387, 0.067999616, 0.04881451, -0.0055498634, -0.018688112, -0.05517317, 0.022433339, 0.013971256, 0.003699086, -0.06639759, 0.026685974, -0.023337903, 0.046309374, -0.039520103, -0.080494866, -0.023597663, -0.032502104, 0.06102885, -0.016811099, 0.009127947, 8.890342E-4, -0.08899484, 0.06565417, -0.0076345084, 0.019789936, -0.053609792, -0.045730595, -0.1211179, 0.05883217, 0.009449286, -0.03550878, -0.026180578, 0.06644181, -0.02522991, 0.0055731055, -0.011688366, -0.042880915, -0.005258507, -0.0036846448, -2.5634521E-5, -0.103383146, 0.0014713851, 0.1168455, -0.00956363, -0.04660713, -0.07204612, -0.043807566, 0.05543066, 0.037685376, 0.003328165, -0.11114134, 0.08960624, -0.027348906, 0.052483473, 0.0675371, -0.06725671, 0.0140198935, 0.012073311, -0.06695529, 0.030247187, -0.060765024, -0.044796176, -0.06880697, -0.010565617, -0.10105792, 0.05056165, -0.010299375, -5.7499363E-8, -0.061373804, -0.053195156, -0.052486684, 0.02030919, -0.04045937, 0.05059021, 0.009640813, -0.043339126, 0.14123955, 0.008740152, -0.087393194, -0.032828968, 0.0150799, 0.09205539, 0.11338101, 0.04493351, 0.058680862, -0.03479871, -0.0012381461, 0.042563178, 0.028203806, 3.8820048E-4, -0.08665946, -0.09355354, 0.03367173, -0.05869033, -0.028949354, 0.05433084, -0.03564613, 0.081336476, -0.0457933, -0.024583997, 0.081049316, -0.09419612, 0.050358318, -0.09259081, 0.035082318, -0.05814931, -0.0331346, 0.0035738728, 0.052738443, 0.044070937, 0.0626025, 0.014275892, -0.01628397, 0.025435515, -0.064047314, 0.027015518, -0.012051084, 0.019308874, 0.005480344, -0.015071906, -0.023302784, -0.06071374, -0.017482737, 0.0076499023, 0.053340256, 0.031732745, -0.024206284, 0.0065780347, 0.061242573, -0.037663817, -0.025295947, 0.018979099)",data scientist
Junior Data Engineer,Aquant,"At Aquant, we empower field service professionals to become heroes using our personalized generative AI platform, Service Co-Pilot. Using historical service data, Aquant equips service leaders, field technicians, and customer service representatives with the precise information they need when they need it—enabling them to make decisions with confidence, reduce costs, and optimize service delivery. Recognized as one of Fast Company's Next Big Things in Tech of 2024 , we are committed to providing cutting-edge AI solutions that tackle the most pressing challenges in manufacturing. Ready to join a team that values innovation, excellence, and above all, its people? Apply today! Aquant's Data Analytics team is looking for a highly skilled Data Engineer who is comfortable with big data and complex data models. As a member of our Data Engineering team, you will: Work with large amounts of data and develop Python code to streamline processes Analyze different data structures and create the required reports out of them Collaborate \ connect to existing codes Build tools to track data quality on large DB with complicated schemas Monitor the integrity and validity of the customer service data Deep dive into the service data and learn the service world Required skills & background: 2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce. Opened URL: https://il.indeed.com/rc/clk?jk=ae2c3d8865d70a20&bb=r6UIYw2mow7Pn8IzQwq2_-VNnSBFDXo8Ynf-6utD7gBjMIDitHzsM1MRE1_A3cWHXQijHHfglfmBI95kAdDXDG8bj1QKQcCe9jGagYeS_BwRE1OhyZy2w5SIATwSfwc1&xkcb=SoCN67M33wH7H8WWaB0DbzkdCdPP&fccid=ba03cd038fd30855&vjs=3",data,"At Aquant, we empower field service professionals to become heroes using our personalized generative AI platform, Service Co-Pilot. Using historical service data, Aquant equips service leaders, field technicians, and customer service representatives with the precise information they need when they need it—enabling them to make decisions with confidence, reduce costs, and optimize service delivery. Recognized as one of Fast Company's Next Big Things in Tech of 2024 , we are committed to providing cutting-edge AI solutions that tackle the most pressing challenges in manufacturing. Ready to join a team that values innovation, excellence, and above all, its people? Apply today! Aquant's Data Analytics team is looking for a highly skilled Data Engineer who is comfortable with big data and complex data models. As a member of our Data Engineering team, you will: Work with large amounts of data and develop Python code to streamline processes Analyze different data structures and create the required reports out of them Collaborate \ connect to existing codes Build tools to track data quality on large DB with complicated schemas Monitor the integrity and validity of the customer service data Deep dive into the service data and learn the service world Required skills & background: 2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.","At Aquant, we empower field service professionals to become heroes using our personalized generative AI platform, Service Co-Pilot. Using historical service data, Aquant equips service leaders, field technicians, and customer service representatives with the precise information they need when they need it—enabling them to make decisions with confidence, reduce costs, and optimize service delivery. Recognized as one of Fast Company's Next Big Things in Tech of 2024 , we are committed to providing cutting-edge AI solutions that tackle the most pressing challenges in manufacturing. Ready to join a team that values innovation, excellence, and above all, its people? Apply today! Aquant's Data Analytics team is looking for a highly skilled Data Engineer who is comfortable with big data and complex data models. As a member of our Data Engineering team, you will: Work with large amounts of data and develop Python code to streamline processes Analyze different data structures and create the required reports out of them Collaborate \ connect to existing codes Build tools to track data quality on large DB with complicated schemas Monitor the integrity and validity of the customer service data Deep dive into the service data and learn the service world Required skills & background: 2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.","2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.",8,11,5,3,"List(-0.014761563, 0.047748502, -0.00393963, 0.019807676, -0.025821341, -0.01565448, 0.03149938, -0.030830542, -0.049643174, 0.056058507, -0.021059705, -0.037397634, 0.049674686, 0.0059348843, 0.003793617, 0.15428844, -0.007823648, -0.027342863, -0.105259605, -0.05339775, -0.061655283, -0.0420146, 0.019377477, 0.0080760745, -0.02205881, -1.12436275E-4, -0.001879923, -0.01893828, -0.05183868, -0.07014931, -0.053242058, -0.03918744, 0.036981873, 0.034749918, 0.058307223, 0.067802995, 0.048621472, -0.034694992, 0.017374609, -0.048519656, 0.01589422, -0.04302871, 0.027137859, 0.0017406226, -0.011287892, -0.017558724, 0.0046721823, 0.007008367, 0.07685408, 0.048049536, -0.123370506, -0.056550648, -0.03526917, 0.005422226, 0.0029274381, 0.0554066, 0.044308327, -0.011024317, 0.014206263, -0.13321082, 0.012318238, -0.04493213, -8.4100786E-4, 0.06835014, 0.007536691, 0.026606144, -0.033712804, 0.03557254, -0.037511963, 0.032694746, -0.062503435, -0.03104403, -0.076861925, 0.06471701, 0.04596618, 0.028779455, -0.013185177, -0.028531441, 0.04281249, 0.067146555, -0.017359937, 0.02800867, -0.049473867, 0.16009037, -0.012161724, -0.09916391, 0.063368544, 0.08005397, -5.5956014E-4, 0.02329878, 0.0016420513, 0.02943613, 0.03590133, 0.013347638, -0.0029638987, 0.0059200823, -0.061069537, -0.05434134, -0.012414084, 0.060803078, -4.6608277E-4, 0.04611405, -0.0048391866, -0.050998814, -0.0770966, 0.036091566, 0.031604584, 0.025599288, 0.0046789944, -0.02404874, -0.027946781, 0.015965708, -0.086123556, -0.027689775, 0.092712015, 0.024762338, -0.080303796, 0.025195746, -0.013346541, 0.021999862, 0.051988184, 0.056256257, 0.0016659686, -0.06601861, -0.021399898, 0.050053906, 0.02957061, 5.999078E-33, 0.017503662, 0.08195619, 0.041907575, 0.06775165, 0.06794867, -0.0102062635, 0.053090136, 0.036018513, -0.092946365, 0.058733325, -0.035117574, 0.08123424, 0.016447768, 0.0026198602, 0.028984083, -0.036343265, -0.046531774, -0.025985373, 0.005056949, 0.08434616, 0.086050585, -0.11257075, -0.01408033, 0.044403262, 0.049747918, -0.040310886, 0.09084196, -0.019521555, 0.03573244, 0.002995621, -0.065423556, -0.012188956, -0.07475543, -0.028716357, -0.03202195, -0.021485852, -0.09258298, -0.050497286, 0.062632926, 0.040181667, -0.028465511, 0.041921787, -0.045310564, -0.0028517474, 0.03746027, 0.015839806, 0.014399702, -0.024239102, 0.03141996, 0.044210274, -0.04968937, -0.08684466, 0.08878507, 0.015702982, -0.0809371, 0.006911549, 0.07864827, -0.07369867, -0.029470079, -0.03618605, 0.042435385, -0.065932736, -0.055752408, 0.022963194, -0.047769744, -0.026709367, -0.020276558, -0.011858379, 0.10217248, -0.058773085, 0.006474436, -0.03227743, 0.06976102, -0.04281328, -0.030429667, -0.004639518, -0.037671667, -0.07155388, 0.06950644, 0.03888417, -0.037524424, 0.02996194, -0.032692645, -0.06222585, -0.04010435, 7.7371456E-4, 0.043386523, 0.009971999, -0.026574304, 0.05716679, -0.042788003, -0.0026117503, 0.034828532, 0.011036778, -0.054618683, -5.6570974E-33, 0.033734627, 0.04706463, 9.395892E-4, 0.031947136, 0.082056254, -0.048001785, -0.024561815, -0.011534042, 0.08179777, -0.025343873, 0.011523633, 0.023365708, 0.050804153, 0.016673772, -0.030246016, -0.04862066, 0.058785994, -0.009239955, -0.026462791, -0.04894127, -0.05390194, 0.13010707, -0.067833446, 0.008711416, -0.015609286, 0.023155443, -0.023049384, -0.04091871, 0.035168163, 0.01528941, -0.07531592, 0.026935067, 7.823763E-4, 0.015398595, 0.005148816, -0.05194725, 0.063587256, -0.097156644, -0.006940776, 0.0701076, 0.080812514, -0.0371197, -0.06592668, -0.060599517, 0.01015478, 0.03837239, 0.015612155, 0.009440392, -0.053161975, 0.0014406864, -0.04349989, 0.0635051, -0.021798102, 0.0075255134, 0.039204974, -0.054732725, 0.076294445, -0.07565721, -0.041352354, -0.007302369, -0.082008444, 0.049987186, 0.052801847, 0.057957057, 0.043567754, 0.013887798, -0.0116774095, 0.021747267, -0.114398286, -0.029457143, -0.13284689, 0.016481852, 0.012113824, -0.015820777, -0.036318175, -0.109639175, -0.12617554, -0.108476, -0.06279991, 0.020054227, -0.060890734, 0.021905264, -0.02525467, 0.027319878, -0.0067995004, 0.09437736, 0.02689794, -0.012346243, 0.022319058, 0.02740696, -0.16335729, 0.016589703, -0.031073002, 0.018398847, -0.017372696, -6.459109E-8, -0.03303134, 0.080173604, 0.01996942, 0.052886855, 0.011824334, -0.023035962, -0.10974015, 0.10030125, 0.06627146, 0.020776844, 0.015437614, -0.05777865, -0.021743372, 0.037025962, 0.056709472, 0.04281026, 0.043240286, 0.051464472, -0.043739706, -0.08010013, 0.043483708, -0.032440376, -0.036657076, -0.0010089801, -0.01931215, 0.0133964475, -0.03181043, 0.056289546, 0.020530818, -0.008533792, -0.011573621, -0.05548158, 0.022240698, -0.06686532, -0.02619683, -0.010625606, 0.077594355, -0.03263886, 0.09563383, 0.07019017, -0.061582997, 0.111086845, 0.018408855, 0.025843037, -0.05274545, -0.013058331, -0.09153101, 0.047762245, -0.031924, 0.014453264, 0.028841615, -0.037048295, -0.025147654, 0.11156594, 0.03581136, 0.074793525, 0.030695178, 0.0099180285, 0.03241941, 0.08678527, 0.068691306, -0.012274801, -0.12135923, -0.009479558)",data engineer
Data Scientist,Rubrik Job Board,"About Team & About Role: Rubrik is seeking a Data Scientist to join our Data Research team, part of the Security Apps organization responsible for developing Rubrik’s suite of security products. This is the first data scientist position on the team, requiring a high degree of independence and ownership. You’ll spearhead research and development of ML/AI models that power the features of our security offerings. What You’ll Do: Research and develop ML/AI models in the NLP domain Develop advanced anomaly detection models for threat detection Work closely with data analysts on research, pattern analysis, etc. Work closely with developers on implementation of models as part of features in the product Work with global teams to continuously push our ML infrastructure forward Experience You’ll Need: Bachelor’s degree in Mathematics, Computer Science, or other related field 3 years of experience in data science or a related field Proficiency in programming languages, especially Python Experience with common data science toolkits, such as Jupyter Notebook, Pandas, NumPy, Matplotlib, etc. Demonstrated experience in implementing ML algorithms in production environments, with a focus on anomaly detection and natural language processing. Strong problem-solving skills and analytic capability to develop insights and recommendations Excellent communication skills in Hebrew and English Preferred Qualifications: Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment. Opened URL: https://il.indeed.com/rc/clk?jk=ad3b38d6d843bb02&bb=r6UIYw2mow7Pn8IzQwq2_yYq9SF7G8SWRQOq2tS0E5s2WTIxqyX5UvFT8u-SbU_SASUW0mVJ_I7cT2iX1zPkHRDbfnLFNxLKcDajKYoqI5cz62T43EBWZ9Z5mO0gfo-W&xkcb=SoA567M33wH7H8WWaB0CbzkdCdPP&fccid=c4e7a75c6cddb35a&vjs=3",data,"About Team & About Role: Rubrik is seeking a Data Scientist to join our Data Research team, part of the Security Apps organization responsible for developing Rubrik’s suite of security products. This is the first data scientist position on the team, requiring a high degree of independence and ownership. You’ll spearhead research and development of ML/AI models that power the features of our security offerings. What You’ll Do: Research and develop ML/AI models in the NLP domain Develop advanced anomaly detection models for threat detection Work closely with data analysts on research, pattern analysis, etc. Work closely with developers on implementation of models as part of features in the product Work with global teams to continuously push our ML infrastructure forward Experience You’ll Need: Bachelor’s degree in Mathematics, Computer Science, or other related field 3 years of experience in data science or a related field Proficiency in programming languages, especially Python Experience with common data science toolkits, such as Jupyter Notebook, Pandas, NumPy, Matplotlib, etc. Demonstrated experience in implementing ML algorithms in production environments, with a focus on anomaly detection and natural language processing. Strong problem-solving skills and analytic capability to develop insights and recommendations Excellent communication skills in Hebrew and English Preferred Qualifications: Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.","About Team & About Role: Rubrik is seeking a Data Scientist to join our Data Research team, part of the Security Apps organization responsible for developing Rubrik’s suite of security products. This is the first data scientist position on the team, requiring a high degree of independence and ownership. You’ll spearhead research and development of ML/AI models that power the features of our security offerings. What You’ll Do: Research and develop ML/AI models in the NLP domain Develop advanced anomaly detection models for threat detection Work closely with data analysts on research, pattern analysis, etc. Work closely with developers on implementation of models as part of features in the product Work with global teams to continuously push our ML infrastructure forward Experience You’ll Need: Bachelor’s degree in Mathematics, Computer Science, or other related field 3 years of experience in data science or a related field Proficiency in programming languages, especially Python Experience with common data science toolkits, such as Jupyter Notebook, Pandas, NumPy, Matplotlib, etc. Demonstrated experience in implementing ML algorithms in production environments, with a focus on anomaly detection and natural language processing. Strong problem-solving skills and analytic capability to develop insights and recommendations Excellent communication skills in Hebrew and English Preferred Qualifications: Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.","Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.",18,17,6,4,"List(-0.07289213, -0.030340372, -0.05981637, 0.0065679974, 0.0400004, -0.044599734, 0.04036256, -0.017003872, 0.0074327313, 0.05728951, -0.035771415, 0.054015167, 0.13501756, -0.020386264, -0.039017048, 0.04988803, 0.008080918, -0.013436759, -0.06938608, -0.034345537, -0.099463746, -0.055192128, -0.037190937, -0.01884894, -0.04213097, 0.0058876406, 0.02865954, -0.023633955, -0.04426467, -0.09277752, -7.7801803E-4, -0.042138413, -0.03550346, 0.115184695, -0.03370891, 0.06498506, -0.0054475255, 0.035087533, -0.036142766, -0.0122957975, -0.008162714, -0.07490176, -0.07230306, -0.016296223, 0.015962979, 0.03722575, 0.0284575, -0.078981146, -0.009736331, -0.02624088, -0.08058972, -0.03454171, 0.005524349, 0.06996719, -0.020686228, -0.009180767, 0.05449644, 0.006162978, 0.014225302, -0.0042977226, 0.020088326, -0.068702206, -0.015601614, 0.029444218, 0.075774506, 0.02124625, -0.027471082, 0.105055094, -0.033724662, -0.018189644, 0.07469808, -0.03796302, -0.113674246, 0.09655993, -0.022184994, -0.0065475367, -0.013761155, 0.0027958273, 0.06523008, 0.03823709, 0.055957273, 0.023804443, -0.0576508, 0.13591085, -0.057902783, 0.01001421, 0.018008217, -0.016461775, 0.06540777, 6.925706E-4, 0.018430619, 0.061112557, 0.06985165, 0.009995331, 0.04858914, -0.042218007, 0.0044896277, -0.005997536, -0.05338316, 0.07298752, -0.041667152, 0.00891381, -0.029491594, -0.03575739, 0.015153735, -0.0037409582, 0.020239748, 0.019305153, 0.02208365, -0.0049091927, -0.022433205, 0.0051460215, -0.09826178, -0.053939372, 0.05534406, -0.035578918, -0.08876322, 0.037632033, 0.010103798, 0.024539717, 0.06350928, 0.05122902, 0.02177254, -0.05097138, -0.0046913642, -0.061871856, -0.056224722, 6.098462E-33, 0.013381439, 0.06222028, -0.012046396, -0.04829028, 0.03778995, -0.02556679, -0.041738424, -0.0057326322, -0.06616455, 0.06276906, -0.06295241, 0.12878166, -0.010765186, -0.004588837, 0.023189303, 0.048034027, 0.047034286, 0.023608709, 0.026752438, -0.018723223, 0.089523114, -0.036197342, 0.01691886, 0.011478776, 0.06829055, -0.022548521, 0.05742319, 0.008319291, 0.103013635, 0.023801962, -0.028299987, 0.059575956, -0.030750694, 0.020505933, 0.05444873, -0.018156487, -0.07629525, -0.053061794, 0.010241013, -0.01776243, 6.6904165E-4, 0.015582273, 0.01544982, 0.02172715, 0.04542658, 0.02777145, 0.049802367, -0.015880823, 0.029966535, -0.008710416, -0.03229327, 0.00391692, 0.048388906, -0.017954055, -0.069141544, -0.0376994, 0.048655633, -0.04981254, -0.018785603, 0.0028481148, -0.050060734, -0.032038573, -0.041938066, -0.026226034, -0.062011145, -0.06226167, 0.008167414, 0.021215715, 0.10000681, -0.031998474, -0.01157814, 0.04659558, 0.0413425, 0.0120909875, -0.0059630997, -0.061113574, 0.015056668, 0.002046312, -0.007555149, 0.0065465234, -0.054647226, -0.010591014, 0.024643635, 0.02409728, -0.05319144, 0.012096853, 0.016166653, -0.029348109, -0.038792126, 0.047292944, -0.05190577, -0.03647121, 0.0417786, 0.058436934, -0.13710886, -6.167406E-33, -0.093684874, -0.090784356, -0.08963656, 0.14384626, 0.0502114, -0.005334812, 8.3816104E-4, 0.006448003, 0.018632542, 0.034852687, -0.03479257, -0.048031557, 0.07179026, -0.0044782595, 0.01159033, -0.018867213, -0.017753063, -0.023828123, -0.07721065, -0.037043437, -0.01898055, 0.08908281, -0.07103741, 0.055065993, 0.06964937, 0.06269305, 5.5833134E-5, -0.0075071887, 0.03857863, 0.07049997, 0.015468139, -0.008204014, -0.050700102, 0.030330477, -0.09295292, -0.027916519, 0.0945187, 0.025767062, -0.041009303, 0.005181734, 0.03695329, 0.04110959, -0.09496379, -0.064138755, 0.0043951822, -0.022017656, -0.006610499, 0.061162565, -0.008899818, -0.07595088, 0.03497511, 0.0223138, 0.037258834, -0.022569474, 0.02746722, -0.03961029, 0.040118903, 0.070141874, -0.015555283, 0.09874891, 0.0070778886, -0.06419266, 0.037072252, 0.10482665, -0.012456829, 0.04716042, 0.011621758, 0.055096082, -0.10871013, -0.050731946, 0.047308262, -0.027373847, -0.08163774, -0.057446472, 0.01690613, -0.03626415, -0.03104146, -0.07683744, -0.08939056, 0.041512787, -0.0064674346, 0.012051834, -0.044039972, 0.057913017, 0.09773524, 0.016597683, 0.08293302, -0.035249997, 0.01334753, -0.018245352, -0.13362627, -0.06774388, -0.1371837, 0.011445112, -0.08287677, -5.844996E-8, -0.0074923313, 0.047607984, -0.008574294, 0.04932184, 0.09548793, -0.022991665, -0.062390525, 0.0011072892, -0.046930786, 0.04392187, 0.039935052, -0.06669634, -0.08296595, -0.047279626, 0.09021157, 0.06746137, 0.021236284, -0.012537146, -0.0245657, 0.010611118, 0.07947723, 0.04050398, -0.10445365, -0.016352464, 0.03460021, 0.0025310926, 8.754946E-4, 0.053973865, 0.029708631, 0.017548995, -0.06966576, -0.08306728, 0.063008994, -0.082673915, -0.031218115, 0.043465804, 0.046377912, -0.015291857, 0.069226116, 0.027207697, -0.05170797, 0.033593338, 0.06664541, 0.0024743988, -0.09755682, -0.021687943, -0.03488433, -0.02378787, 0.0019476991, -0.0034185168, 0.019504573, -0.07545233, 0.014551777, 0.10775465, 0.089483455, -0.07307995, 0.030512756, -0.047035377, 0.022304554, 0.11683317, 0.073730156, -0.07675579, -0.024696965, 0.013263086)",data scientist
data scientist!,Horizon Technologies,"לארגון גדול ומוביל טכנולוגית בירושלים דרוש/ה data scientist! עבודה במתכונת היברידית מרובה! תיאור התפקיד: איש מדע נתונים והתממה ימלא תפקיד חשוב במחלקת ביג דאטה ויהיה אחראי על שורה של פעילויות שיאפשרו מחקרי נתונים על מאגרי המשרד וארגונים שעובדים עם המחלקה. יישום מדיניות ההתממה של מחלקת ביג דאטה באמצעות עבודה צמודה עם מהנדסי נתונים שיבצעו את פעולות ההתממה ואף באמצעות ביצוע פעולות אלה בעצמו בכלי ניתוח המוגדרים בתמנ""ע, כגון SQL, PYTHON, דאטאיקו ואחרים. כמו כן יהיה אחראי על תהליכי בדיקת איכות (QA) ההתממה בסביבה בה בוצעו האנליזות והעיבודים על הנתונים, או בליווי מהנדס נתונים שיבצע פעולות אלה. המערכות שבשימוש SQL, PYTHON, DATAIKU, CLOUDERA, לא מוגבלות לשימוש רק באלה. דרישות: 3 שנות ניסיון ומעלה בתפקידי דאטה מתוכן שנה אחת לפחות ב- data science (תפקידי דאטה: אנליטיקה של נתונים, הנדסת נתונים, מדע נתונים, פיתוח BI) ניסיון בהפעלת כלי התממה טבלאיים (למשל Privitar, Dataiku) - מינימום שנה ניסיון, מינימום עבודה על 30 טבלאות ניסיון בהתממה בתחום הבריאות - חובה. ניסיון בהתממת נתונים טבלאיים חובה. ⁠התממת טקסט חופשי - חובה. התממת טקסט חופשי בעברית - יתרון משמעותי. ניסיון בעבודה על התממה מול חוקרים באקדמיה - יתרון Opened URL: https://il.indeed.com/rc/clk?jk=f90aa7093544f010&bb=r6UIYw2mow7Pn8IzQwq2_xyaGdaoejrhyKNPDULo5mrjaIrPYiIvCcF52gDTKS5ckQ4Tob9j7S8rgIEWt6fB4Q2dVUswzFz8eib5DhxHjVmhwfPeuvoDONJ99fqrpBqv&xkcb=SoCk67M33wH7H8WWaB0BbzkdCdPP&fccid=e32508b84ec45c2b&vjs=3",data,"לארגון גדול ומוביל טכנולוגית בירושלים דרוש/ה data scientist! עבודה במתכונת היברידית מרובה! תיאור התפקיד: איש מדע נתונים והתממה ימלא תפקיד חשוב במחלקת ביג דאטה ויהיה אחראי על שורה של פעילויות שיאפשרו מחקרי נתונים על מאגרי המשרד וארגונים שעובדים עם המחלקה. יישום מדיניות ההתממה של מחלקת ביג דאטה באמצעות עבודה צמודה עם מהנדסי נתונים שיבצעו את פעולות ההתממה ואף באמצעות ביצוע פעולות אלה בעצמו בכלי ניתוח המוגדרים בתמנ""ע, כגון SQL, PYTHON, דאטאיקו ואחרים. כמו כן יהיה אחראי על תהליכי בדיקת איכות (QA) ההתממה בסביבה בה בוצעו האנליזות והעיבודים על הנתונים, או בליווי מהנדס נתונים שיבצע פעולות אלה. המערכות שבשימוש SQL, PYTHON, DATAIKU, CLOUDERA, לא מוגבלות לשימוש רק באלה. דרישות: 3 שנות ניסיון ומעלה בתפקידי דאטה מתוכן שנה אחת לפחות ב- data science (תפקידי דאטה: אנליטיקה של נתונים, הנדסת נתונים, מדע נתונים, פיתוח BI) ניסיון בהפעלת כלי התממה טבלאיים (למשל Privitar, Dataiku) - מינימום שנה ניסיון, מינימום עבודה על 30 טבלאות ניסיון בהתממה בתחום הבריאות - חובה. ניסיון בהתממת נתונים טבלאיים חובה. ⁠התממת טקסט חופשי - חובה. התממת טקסט חופשי בעברית - יתרון משמעותי. ניסיון בעבודה על התממה מול חוקרים באקדמיה - יתרון","A large, technologically leading organization in Jerusalem needs a data scientist! Work in a hybrid multi-tasking format! Job description: A data scientist and data scientist will play an important role in the Big Data department and will be responsible for a series of activities that will enable data research on the office's databases and organizations that work with the department. Implementing the Big Data department's data mining policy by working closely with data engineers who will perform the mining operations and even by performing these operations themselves in analysis tools defined in the Data Mining Act, such as SQL, PYTHON, Dataiku, and others. He will also be responsible for the quality assurance (QA) processes of the mining in the environment where the analyses and processing of the data were performed, or accompanied by a data engineer who will perform these operations. The systems used are SQL, PYTHON, DATAIKU, CLOUDERA, but are not limited to these. Requirements: 3 years or more of experience in data roles, including at least one year in data science (data roles: data analytics, data engineering, data science, BI development) Experience operating tabular mining tools (e.g. Privitar, Dataiku) - minimum one year of experience, minimum work on 30 tables Experience in mining in the healthcare field - mandatory. Experience in tabular data mining is mandatory. ⁠Free text mining - mandatory. Free text mining in Hebrew - Significant advantage. Experience working on Tama with researchers in academia - an advantage","3 years or more of experience in data roles, including at least one year in data science (data roles: data analytics, data engineering, data science, BI development) Experience operating tabular mining tools (e.g. Privitar, Dataiku) - minimum one year of experience, minimum work on 30 tables Experience in mining in the healthcare field - mandatory. Experience in tabular data mining is mandatory. ⁠Free text mining - mandatory. Free text mining in Hebrew - Significant advantage. Experience working on Tama with researchers in academia - an advantage",1,0,7,0,"List(0.0048395903, 0.013474981, -4.6108465E-4, 0.033169035, -0.0015383915, -0.03589451, -0.025359144, -0.04857117, -0.086602196, 0.03455871, -0.009233468, -0.060507506, 0.07188401, 0.008356633, -0.011003715, 0.06613946, -0.043422654, -0.03669183, 0.020024369, -0.11106292, -0.07324923, -0.035057608, 0.030420432, -0.0674165, 0.06169187, 0.06367073, 0.04096362, -0.025267603, 0.017368084, 0.0045006434, -0.021369968, 0.057924293, 0.052214414, 0.032727815, -0.063253954, 1.8203558E-4, 0.0074331937, 0.027551843, -0.055652414, 0.031571854, -0.117418915, -0.070105344, 0.02659749, 2.555617E-4, 0.04942464, -0.039711464, -0.016701607, -0.065611586, -0.011126336, 0.079763584, -0.09025868, 0.0030708879, 0.02391268, 0.024948454, 0.0038221711, -0.0024449953, -0.032904543, -0.045291156, -0.08935426, 0.006932749, -0.049685556, -0.043432415, 0.01114476, 4.895808E-4, 0.017004572, -0.039009545, 0.0011858903, -0.0019739994, -0.0037432362, -0.07639301, -0.047218323, 0.016359175, -0.05237865, 0.08992956, 0.039049022, 0.004011312, 0.08741041, 0.010110999, 0.06951277, -0.08430587, -0.002419618, 0.07324384, -0.03994256, 0.1310975, 0.004337059, -0.05777602, -0.08485005, 0.05382131, -0.057732508, 0.020484613, 0.14757474, -0.026841344, 0.021605369, -0.0034469718, 0.011984436, -0.009784261, 0.02196769, 0.04573576, 0.027811766, 0.018153084, -0.077609055, -0.030114781, 0.008694176, 0.11804576, -0.08375799, 0.0042285165, 0.020608911, -0.036467444, 0.014224153, -0.005334336, -0.006303663, -0.025486343, -0.01343423, -0.021703634, -0.013443224, 0.053187545, -0.05610276, -0.021196323, 0.06565742, 0.12380341, -0.038632285, 0.06607127, -0.03991126, -0.039314065, 0.03028141, 0.017697236, -0.06416469, 3.947448E-33, 0.045040336, -0.011378046, 0.020678809, 0.003652075, 0.009944002, -1.213742E-4, -0.021094672, -0.02392269, 0.0109726405, 0.011006044, -0.033641826, 0.11744101, -0.026485754, 0.0017525661, 0.10647632, 0.0010257988, 0.020566013, 0.0030210796, -0.04632511, -0.031165773, 0.05611223, -0.12323962, -0.0014521166, 0.10868754, 0.0013452282, 0.06591871, 0.034194715, 1.13058035E-4, 0.12684053, 0.03164963, -0.019945454, 0.0028874825, -0.07254802, -0.05893657, 0.042369943, 0.05748708, -0.015217194, 0.011801982, 0.037131585, 0.015884494, -0.0535933, 0.007103825, 0.05014516, -0.025902018, -0.05786789, -0.033407208, 0.058746543, -0.009930774, 0.033465922, 0.03136015, 9.5801236E-4, -0.10124956, 0.035412762, 0.018191293, 0.06427571, 0.11864245, 0.0039787595, -0.056581117, 0.009389266, 0.024724603, -0.029441549, 0.00514175, -0.028353598, 0.009766329, -0.049671624, 0.011742192, 0.017105294, -0.016483339, 0.12561902, -0.009663994, -0.030089494, 0.057191275, -0.081567675, -0.014387297, 0.0065987436, -0.0019839073, 0.032353934, -0.12630259, -0.0015982754, 0.024749344, 0.0016531039, 0.012488304, 0.01809853, -0.0022873622, -0.0017408885, 0.019560993, 0.03306641, -0.07904336, -0.046832807, 0.021329502, -0.11068023, 0.04484829, -0.011406869, 0.059663642, 0.042238202, -4.5442602E-33, -0.06374157, -0.031289194, 0.03526373, 0.048994467, 0.10186561, -7.9740526E-4, -0.027864385, 0.074478, 0.08457226, -0.042474817, -0.020645905, 0.013882689, 0.06272642, 0.019521348, 0.008821202, 0.07089933, -0.0829732, 0.0132857105, -0.092786245, 0.061982147, 0.012436218, 0.01598615, -0.07818111, -0.01502369, 0.010456022, -0.013016361, -0.029534943, -0.08279785, -0.045115203, 0.04009825, -0.06112738, -0.01617418, -0.05570779, -0.062831655, -0.033876404, -0.105197005, 0.048326932, -0.012153526, 0.014713715, 0.06811071, 0.033023596, 0.052606855, -0.046515793, -0.011061293, -0.0381001, -0.028592523, 0.027974276, -0.039428204, 0.049554825, -0.05499338, -0.002596004, -0.010957713, 0.03739715, -0.0389681, 0.026186004, -0.016037755, 0.05132767, 0.008604449, -0.1006004, 0.026933169, 0.02411355, 0.024413206, 0.068233304, 0.030705495, 2.8217628E-4, -0.04568935, 0.082492895, 0.0768759, -0.09907363, -0.042688787, 0.021241862, 0.0016781096, -0.097942844, 0.0024164114, -0.02217947, -0.07845772, -0.06643217, -0.017172974, -0.0708999, -0.037034377, 0.0026208414, -0.0458625, -0.033781674, -0.013137021, 0.019449322, 0.038254574, 0.0675316, -0.060759615, -0.016831232, -0.078328565, -0.067000635, -0.015942914, -0.136819, 0.059363566, -0.003658252, -4.202063E-8, -0.022479307, -0.039796926, 0.002340997, -0.0010500224, 0.110276274, 0.024905324, -0.06215274, 0.08433261, 0.016623728, 0.011312068, 0.091683835, 0.06498514, -0.052895166, -0.06171946, 0.082970105, 0.055608585, 0.07369733, 0.02402375, -0.024252389, 0.008873651, 0.07395524, 0.034142863, -0.05994689, -0.028322771, -0.023399577, -0.047040652, -0.004887025, 0.0032866092, -0.015483495, -0.062443383, -0.0038231127, -0.0031799208, 0.041988455, -0.113730125, 3.6035693E-4, -0.0073135206, 0.064032674, -0.08367593, -0.08863313, -0.040098447, -0.002110123, 0.1027698, 0.05202178, -0.032298986, -0.07098642, 0.020638833, -0.02499058, -0.006376706, 0.037952263, 0.012169182, 0.050350707, 0.008455765, 0.009524714, -0.0148132, 0.043597065, 0.14221296, 0.089372315, 0.0035243714, -0.024033424, 0.013684534, 0.09931712, -0.048276927, -0.027133787, 0.051545847)",data scientist
Data Scientist,Ticomsoft,"jb-19 מספר משרה : עם ניסיון ב Data Scientist דרוש איש/ת פיתוח Evaluating state of art statistical modeling and machine Learning. Big data analysis, Modeling and Reporting. Job Requirements:Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. השם שלך מספר המשרה(חובה) העלאת קו""ח שתף את המשרה ותן לחברים שלך לעזור Opened URL: https://il.indeed.com/rc/clk?jk=d93f96511474983d&bb=r6UIYw2mow7Pn8IzQwq2_5aHKW3wZhMixewaX6PYd3gfA9lT-ljRyZagyU-B6VRsq5WetHFTiwUEwJhRVi0EoeZ2l1gDnl0VKDEJDOrEiNtxwcGFGMy1pCHdHBhSYAxX&xkcb=SoAQ67M33wH7H8WWaB0AbzkdCdPP&fccid=9c02dc25b0c9c8ac&vjs=3",data,"jb-19 מספר משרה : עם ניסיון ב Data Scientist דרוש איש/ת פיתוח Evaluating state of art statistical modeling and machine Learning. Big data analysis, Modeling and Reporting. Job Requirements:Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. השם שלך מספר המשרה(חובה) העלאת קו""ח שתף את המשרה ותן לחברים שלך לעזור","jb-19 Job Number: With experience in Data Scientist Need a Development Person Evaluating state of art statistical modeling and machine Learning. Big data analysis, Modeling and Reporting. Job Requirements:Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. Your Name Job Number (Required) Upload Resume Share the job and let your friends help","Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. Your Name Job Number (Required) Upload Resume Share the job and let your friends help",1,10,0,0,"List(-0.04092557, -0.020160701, 1.7924055E-4, 0.012505377, -0.06535141, -0.11063129, -0.045046557, -0.050744884, -0.16898018, -0.02009028, -0.1333408, -0.075374976, 0.062356886, 0.013289067, -0.020168647, 0.024086565, 0.032737125, -0.030495089, 0.012634427, -0.14221705, -0.0064802836, 0.008082138, 0.044152442, -0.11083116, 0.0076307636, 9.839776E-4, 0.027103202, -0.0018489517, 0.0077641485, 0.015572382, 0.079635575, -0.037088837, 0.05225636, 0.029349996, 0.034570467, 0.093341455, 0.0355956, -0.065314144, 0.017861785, 0.040672973, 0.0060955747, -0.021015655, 6.763247E-4, -0.07784023, -0.0076990263, 0.027043324, 0.04104211, -0.009585866, 1.2408217E-4, 0.004802105, -0.0739171, -0.020141084, -0.011455317, -0.009942873, -0.053470224, 0.017779863, 0.016332572, 0.01626273, -0.039531425, -0.07934174, -0.038194586, -0.033280995, 0.0054489453, 0.021027436, 0.0082057575, 0.026223373, -0.04945125, 0.04250433, 0.027058892, -0.022017818, 0.005690158, 0.014736115, -0.09864827, 0.0879799, 0.042158235, -0.030810224, 0.103938065, -0.035973977, 0.028027406, 0.054565694, 0.028340539, -0.0012473143, -0.10793344, 0.09957721, -0.05848859, -0.07053785, -0.060091436, 0.036417186, -0.015993739, -0.026940381, 0.1207756, -0.01842962, -0.041186746, 0.03275695, 0.023144055, -0.015868168, 0.024496483, -0.01040622, -0.049238987, 0.008982613, -0.042768177, -0.08531911, 0.05319909, 0.07982203, -0.05343941, 0.07815546, 0.034868468, 0.029319895, 0.031012354, -0.011618789, -0.016687442, 0.002124714, -0.06373557, 0.0072545162, 0.059200477, 0.021319171, -0.1046346, 0.009483932, -0.00326053, 0.052100997, -0.031879395, 0.06572616, -0.023424476, -0.0228157, 0.02230857, -0.030649547, -0.070466615, 3.3767875E-33, -0.08676833, 0.046337795, 0.043504402, 0.007715151, 0.034666196, 0.0030803855, 0.0069413637, 0.04458905, -0.024270609, 0.044773415, -0.03406988, 0.050026886, -0.043561783, 0.029541278, 0.038764745, 0.040831435, 0.019838838, -0.0011742288, -0.01863255, -0.036313213, 0.016313422, -0.044315614, -0.044724356, 0.039594933, -0.016577164, 0.058962613, 0.045881134, -0.044143368, 0.13863884, -0.018361691, -0.016324745, -0.0022603818, -0.10374386, -0.034652095, 0.08838591, 0.040363226, -0.027754169, -0.05755519, 0.051701754, 0.038307022, -0.033531744, 0.07027368, 0.05773323, -0.052248288, -0.046753522, -0.016747637, 0.06961775, 0.006034486, 0.06329993, 0.055357784, -0.039535128, -0.08234962, 0.038335264, 0.003930382, 0.008496996, 0.104660824, 0.07433959, 0.001733802, -0.0396726, 0.05600437, -0.035259448, -0.07486254, -0.0037910682, -0.027716631, -0.049563557, -0.029013705, 0.07500893, 0.046639487, 0.119694285, 0.058798354, -0.06819337, -0.029676678, 0.008414186, 0.025281841, 0.016699243, 0.043609098, 0.03639196, -0.09565923, 0.04194374, 0.06695692, -0.007192127, 0.04533081, 0.014562074, -0.047925178, 0.07053153, 0.025163846, -0.007166438, -0.04669039, -0.05631646, 0.038601853, -0.02606758, -0.02425626, -0.011777362, 0.07616436, -0.045216665, -4.6347454E-33, -0.11076515, 0.011923874, -0.026820773, 0.053641893, 0.10110205, 0.020879531, 0.034375228, -0.0011270402, 0.05120444, 0.021814391, 0.021788074, 0.026547976, 0.050953094, 0.057450503, -0.044540677, 0.009504678, -0.076478824, 0.021436336, -0.036079403, 0.008197549, -0.0061922055, 0.053777914, -0.09410696, 0.017305123, 0.059407048, -0.026153013, -0.024485955, -0.060062647, 0.032768242, 0.030451417, -0.0053938627, 0.025910513, -0.07462083, 0.008180911, 0.047502875, -0.036398552, 0.041836835, -0.044949643, 0.013445011, 0.099063605, 0.07412437, 0.05381197, -0.031229671, -0.051814687, -0.06031427, 0.0015208415, 0.050162237, 0.04456532, 0.058834013, -0.09713462, -0.01480485, 0.019529901, 0.047347892, 0.013847136, -0.008941478, -0.047565673, 8.989993E-4, 0.00430485, 0.060513042, -0.047589228, -0.061253183, -0.0022376676, 0.048190035, 0.053239778, -0.00816062, -0.06505503, -0.023944901, 0.061506916, -0.11576777, -0.059143733, -0.02167941, 0.02494902, 0.06601477, 0.033002708, -0.16577312, -0.050804224, -0.06872538, -0.012432318, -0.01074412, 0.021279823, -0.034875195, 0.019605944, -0.064350285, -0.0010019243, 0.033660606, 0.060277753, 0.034921035, -0.10596398, 0.034974623, -0.13422464, -0.061964855, 0.060254633, -0.06021089, 0.0203073, -0.08177953, -4.300369E-8, -0.04447207, 0.013056857, 0.014242517, -0.024192598, 0.052242618, 0.086567536, -0.06901299, -4.6104155E-4, -0.022093572, -0.012083832, 0.029385826, -0.05850592, -0.025987452, 0.02960585, 0.02754474, 0.021982728, 0.056204222, 0.029171973, 0.033118814, 0.026825486, 0.05082226, -0.023952767, -0.04513667, 0.003287919, 0.019777445, -0.12197201, 0.022636348, -0.054477375, -0.039094195, 0.07143915, -0.043646403, -0.08131345, 0.062699676, -0.07265724, 0.011062325, 0.08267833, 0.035745293, -0.03102297, -0.043386567, -0.03843673, -0.0348406, 0.13857208, 0.005230959, -0.0109945685, 0.017254714, 0.017511647, 0.025063336, -0.0030690136, 0.014449242, 0.05182806, 0.034470394, -0.045593277, -0.018597424, -0.013150741, 0.0843246, 0.0036557189, 0.021021336, -0.008411995, -0.021962907, 0.07081728, 0.07152822, 0.032463443, -0.041099526, 0.039364435)",data scientist


# Skill List

## N-grams

In [0]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [0]:
from pyspark.ml.feature import NGram, Tokenizer
from pyspark.ml import Pipeline
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))
words_out = ['year', 'experience', 'degree', 'b.sc', 'bachelor', 'url', 'skill', 'bsc', 'salary', 'qualification']

tokenizer = Tokenizer(inputCol="Requirements_Text", outputCol="words")
tokenized = tokenizer.transform(requirements_clusters_new)
bigram = NGram(n=2, inputCol="words", outputCol="bigrams")
ngram_data = bigram.transform(tokenized)
trigram = NGram(n=3, inputCol="words", outputCol="trigrams")
ngram_data = trigram.transform(ngram_data)
quadrigram = NGram(n=4, inputCol="words", outputCol="quads")
ngram_data = quadrigram.transform(ngram_data)

# Function to clean individual tokens
def clean_tokens(token, ngram=False):
    tokens = token.split('/')
    tokens = [re.sub(r'[^a-zA-Z\+\-]', '', t).strip() for t in tokens if not any(w in t for w in words_out)]
    tokens = [t for t in tokens if t]  # Remove empty strings or tokens that are just spaces
    
    if ngram:
        return ' or '.join(tokens)
    tokens = [t for t in tokens if any(char.isalpha() for char in t)]  # Keep tokens with at least one letter
    return tokens

def clean_unigram(tokens):
    cleaned = []
    for token in tokens:
        for w in clean_tokens(token.lower()):
            if w in stop_words:
                continue
            cleaned.append(w)
    return cleaned

# Function to clean n-grams
def clean_ngram(ngrams, n=2):
    cleaned = []
    for x in ngrams:
        tokens = x.split()
        if not x or not tokens or len(tokens) < n or any(w in x.lower() for w in words_out):     # Accounted for in other columns
            continue
        # Remove if there are unwanted symbols - all n
        if any(sym in x for sym in ['_', '. ']) or x.count('"') % 2 == 1 or (x.count('(') + x.count(')')) % 2 == 1:
            continue
        # Remove if there are unwanted symbols - n = 2, 3
        if n <= 3 and (any(sym in x for sym in [', ', '- ', ' -', ' (']) or any(sym in tokens[0] for sym in [')'])):
            continue
        # Clean each token in the n-gram separately
        words = [clean_tokens(t.lower(), ngram=True) for t in tokens]
        words = [w for w in words if w]
        if len(words) < len(tokens) or (len(words) <= 2 and any(w in stop_words for w in words)) or (len(words) > 2 and (words[-1] in stop_words or words[0] in stop_words)):
            continue
        cleaned.append(' '.join(words))
    return cleaned

# Clean n-grams
clean_bigram_udf = udf(lambda bigram: clean_ngram(bigram, n=2), ArrayType(StringType()))
cleaned_ngram = ngram_data.withColumn("cleaned_bigrams", clean_bigram_udf(col("bigrams")))
clean_trigram_udf = udf(lambda trigram: clean_ngram(trigram, n=3), ArrayType(StringType()))
cleaned_ngram = cleaned_ngram.withColumn("cleaned_trigrams", clean_trigram_udf(col("trigrams")))
clean_quadrigram_udf = udf(lambda quad: clean_ngram(quad, n=4), ArrayType(StringType()))
cleaned_ngram = cleaned_ngram.withColumn("cleaned_quads", clean_quadrigram_udf(col("quads")))

# Clean individual words
cleaned_tokens_udf = udf(clean_unigram, ArrayType(StringType()))
cleaned_ngram = cleaned_ngram.withColumn("cleaned_unigrams", cleaned_tokens_udf(col("words")))


# cleaned_ngram.select("Requirements_Text", "cleaned_unigrams", "cleaned_bigrams", "cleaned_trigrams", "cleaned_quads").limit(10).display()
cleaned_ngram.select("Requirements_Text", "cleaned_trigrams", "cleaned_quads").limit(50).display()

Requirements_Text,cleaned_trigrams,cleaned_quads
"+3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)","List(ability to lead, lead data investigations, investigations and analysis, projects with ambiguous, ambiguous requirements advanced, requirements advanced statistic, statistic and modeling, visualizing and presenting, analyses in tools, tools like jupyter, jupyter and tableau, programming in python, professional setting familiarity, familiarity with distributed, databases and sql, sql highly organized, passionate about achieving, achieving results curious, analytical and problem, familiarity with distributed)","List(ability to lead data, data investigations and analysis, investigations and analysis projects, analysis projects with ambiguous, projects with ambiguous requirements, ambiguous requirements advanced statistic, advanced statistic and modeling, statistic and modeling knowledge, visualizing and presenting analyses, presenting analyses in tools, analyses in tools like, like jupyter and tableau, python in a professional, setting familiarity with distributed, familiarity with distributed computation, computation storage and workflow, storage and workflow management, splunk spark kubernetes kafka, spark kubernetes kafka hadoop, kubernetes kafka hadoop mapreduce, kafka hadoop mapreduce airflow, relational databases and sql, databases and sql highly, sql highly organized creative, highly organized creative motivated, creative motivated and passionate, passionate about achieving results, curious and have excellent, excellent analytical and problem, analytical and problem solving, written verbal and data, verbal and data communication, familiarity with distributed computation, computation storage and workflow, storage and workflow management, splunk spark kubernetes kafka, spark kubernetes kafka hadoop, kubernetes kafka hadoop mapreduce, kafka hadoop mapreduce airflow)"
"into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker","List(designs and tasks, production troubleshooting requirements, troubleshooting requirements -, developing data-oriented products, data-oriented products preferably, products preferably using, preferably using python or java, data processing fw, developing microservices-based architecture, working in container-based, container-based environments using, environments using tools, agile environment nice, machine learning fws, redhat openshift dev, openshift dev environment, dev environment knowledge, environment knowledge git)","List(actionable designs and tasks, designs and tasks development, development ut and production, ut and production troubleshooting, production troubleshooting requirements -, developing data-oriented products preferably, data-oriented products preferably using, products preferably using python or java, fw such as spark, spark pandas hadoop airflow, working in container-based environments, container-based environments using tools, tools such as ks, working as a software, developer in an agile, fws such as sklearn, redhat openshift dev environment, openshift dev environment knowledge, dev environment knowledge git, environment knowledge git jenkins, knowledge git jenkins docker)"
"A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.","List(attention to detail, commitment to maintaining, maintaining high-quality annotation, high-quality annotation standards, written and verbal, ability to work, group of individuals, availability to work, work full time, time when needed, familiarity with data, data annotation processes, especially non-structured data, working with seismic, familiarity with sql, sql and data, data visualization tools, matlab and python)","List(stem fields an advantage, strong attention to detail, detail and a commitment, commitment to maintaining high-quality, maintaining high-quality annotation standards, good written and verbal, ability to work effectively, effectively with a diverse, diverse group of individuals, availability to work full, full time when needed, nice-to-haves familiarity with data, familiarity with data annotation, data annotation processes especially, annotation processes especially non-structured, processes especially non-structured data, data images video audio, working with seismic data, seismic data heat maps, heat maps or spectrograms, maps or spectrograms familiarity, spectrograms familiarity with sql, sql and data visualization, coding languages preferably matlab, preferably matlab and python)"
"B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!","List(highly quantitative field, mathematics industrial engineering, industrial engineering economics, engineering economics statistics, statistics or equivalent, equivalent academic knowledge, knowledge at least, hands-on data analysis, analysis and story-telling, measuring online success, success and identifying, identifying optimization opportunities, optimization opportunities excellent, opportunities excellent understanding, understanding of online, online marketing metrics, marketing metrics high, metrics high proficiency, proficiency in sql, sql with demonstrated, querying large complex, large complex datasets, complex datasets strong, datasets strong excel, data visualization tools, sense and looker, looker a solid, grasp of statistical, statistical significance models, models and tools, tools desired marketing, desired marketing metrics, metrics of user, user acquisition kpis, offer a workplace, comfortable environment ergonomic, environment ergonomic chairs, chairs and desks, desks to support, support your health, beautifully designed spaces, spaces that inspire, on-site amenities gym, room for relaxation, relaxation and rejuvenation, delicious perks -star, perks -star breakfast, breakfast to kickstart, kickstart your day, freshly made coffee, wellness full health, full health insurance, health insurance coverage, peace of mind, growth opportunities learning, learning and training, programs to enhance, enhance your personal, personal and professional, vibrant culture happy, culture happy hours, plenty of opportunities, opportunities to connect, connect with colleagues, enjoyment come first)","List(field such as mathematics, mathematics industrial engineering economics, industrial engineering economics statistics, economics statistics or equivalent, statistics or equivalent academic, academic knowledge at least, data analysis and story-telling, online success and identifying, success and identifying optimization, identifying optimization opportunities excellent, optimization opportunities excellent understanding, excellent understanding of online, understanding of online marketing, online marketing metrics high, marketing metrics high proficiency, high proficiency in sql, querying large complex datasets, large complex datasets strong, complex datasets strong excel, tools such as qlik, qlik sense and looker, looker a solid grasp, solid grasp of statistical, grasp of statistical significance, significance models and tools, models and tools desired, tools desired marketing metrics, marketing metrics of user, metrics of user funnel, user funnel user acquisition, funnel user acquisition kpis, kpis what we offer, offer a workplace designed, designed for your well-being, well-being - comfortable environment, - comfortable environment ergonomic, comfortable environment ergonomic chairs, ergonomic chairs and desks, designed spaces that inspire, spaces that inspire creativity, - on-site amenities gym, on-site amenities gym yoga, amenities gym yoga room, yoga room and music, room and music room, music room for relaxation, - delicious perks -star, delicious perks -star breakfast, -star breakfast to kickstart, freshly made coffee shakes, coffee shakes and afternoon, shakes and afternoon salads, salads by our in-house, wellness full health insurance, full health insurance coverage, coverage for your peace, - growth opportunities learning, opportunities learning and training, learning and training programs, training programs to enhance, - vibrant culture happy, vibrant culture happy hours, culture happy hours team, happy hours team events, team events and plenty, workplace where your well-being, well-being growth and enjoyment, growth and enjoyment come)"
"B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.","List(proficiency in python, focus on libraries, libraries like pandas, pandas for data, knowledge of sql, sql for querying, querying and managing, managing large datasets, bi tools like, tools like tableau, tableau to create, dashboards and reports, fluency in english, written and verbal, data analyst role-, familiarity with snowflake, snowflake or aws, web or app product analytics, product analytics tools, knowledge of data, data science concepts, concepts and metrics, including speech ai, large language models, natural language understanding, automatic speech recognition, data to uncover, provide recommendations based, based on data, analysis to support, support business decisions, decisions and drive, drive product improvements, develop and maintain, dashboards and reports, reports using bi, using bi tools, optimize data workflows, workflows and pipelines, pipelines using python, python and sql, collaborate with cross-functional, support data-driven decision-making)","List(industrial engineering data analytics, engineering data analytics statistics, statistics or a related, python with a strong, strong focus on libraries, focus on libraries like, like pandas for data, pandas for data manipulation, solid knowledge of sql, querying and managing large, bi tools like tableau, like tableau to create, tableau to create dashboards, create dashboards and reports, english written and verbal, data analyst role- advantage, analyst role- advantage nice, nice to have familiarity, snowflake or aws athena, web or app product analytics tools, tools such as mixpanel, knowledge of data science, science concepts and metrics, concepts and metrics including, metrics including speech ai, including speech ai large, speech ai large language, ai large language models, large language models llms, language models llms natural, models llms natural language, llms natural language understanding, natural language understanding nlu, understanding nlu and automatic, nlu and automatic speech, automatic speech recognition asr, extract clean and analyze, clean and analyze data, analyze data to uncover, data to uncover trends, trends patterns and actionable, patterns and actionable insights, recommendations based on data, based on data analysis, data analysis to support, analysis to support business, business decisions and drive, decisions and drive product, develop and maintain dashboards, maintain dashboards and reports, dashboards and reports using, reports using bi tools, tools such as tableau, tableau or other platforms, data workflows and pipelines, workflows and pipelines using, using python and sql, collaborate with cross-functional teams, cross-functional teams including product, teams including product data, including product data science, data science and engineering, engineering to support data-driven)"
"Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.","List(relevant technical field, primary responsibility involves, responsibility involves working, working with data, could include roles, similar positions +, minimum of +, least one programming, one programming language, meta meta builds, meta builds technologies, technologies that help, help people connect, changed the way, way people connect, apps like messenger, instagram and whatsapp, whatsapp further empowered, empowered billions around, around the world, meta is moving, beyond d screens, screens toward immersive, augmented and virtual, reality to help, build the next, evolution in social, people who choose, choose to build, build their careers, careers by building, building with us, us at meta, meta help shape, shape a future, take us beyond, beyond what digital, digital connection makes, connection makes possible, makes possible todaybeyond, todaybeyond the constraints, constraints of screens, limits of distance, even the rules, rules of physics, compensation is determined, compensation details listed, reflect the base, equity or sales, addition to base, meta offers benefits, benefits at meta)","List(computer science engineering relevant, science engineering relevant technical, engineering relevant technical field, technical field or equivalent, field or equivalent +, primary responsibility involves working, involves working with data, roles such as data, data analyst data scientist, analyst data scientist data, data scientist data engineer, data engineer or similar, engineer or similar positions, sql etl data modeling, modeling and at least, least one programming language, python c++ c scala, meta meta builds technologies, builds technologies that help, technologies that help people, help people connect find, people connect find communities, find communities and grow, communities and grow businesses, changed the way people, apps like messenger instagram, messenger instagram and whatsapp, whatsapp further empowered billions, billions around the world, meta is moving beyond, moving beyond d screens, beyond d screens toward, like augmented and virtual, augmented and virtual reality, virtual reality to help, reality to help build, help build the next, build the next evolution, next evolution in social, evolution in social technology, us at meta help, help shape a future, future that will take, us beyond what digital, beyond what digital connection, digital connection makes possible, connection makes possible todaybeyond, possible todaybeyond the constraints, individual compensation is determined, listed in this posting, posting reflect the base, monthly rate or annual, bonus equity or sales, equity or sales incentives, sales incentives if applicable, addition to base compensation, base compensation meta offers, compensation meta offers benefits, learn more about benefits)"
"2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.","List(industrial or information systems engineering, statistics or equivalent, excel + sql, advantage a team, ability to deliver, deliver high-quality results, high-quality results fluent, results fluent english, share our values, values these guiding, guiding values inspire, values inspire every, inspire every aquant, every aquant employee, operate and enable, enable our company, impact and achieve, achieve our vision, vision be humble, humble and respectful, important than individual, treat your colleagues, colleagues with respect, respect and empathy, act with good, value everyones contributions, curious keep learning, questions to understand, understand complex situations, embrace new challenges, keep an open, receptive to feedback, take ownership focus, focus on achieving, measurable and meaningful, take decisive action, solve problems proactively, ownership of projects, projects and results, act with integrity, act with integrity, customers and colleagues, vacation your way, way we want, want to make, time to meet, meet your personal, needs with generous, pto and flexible, flexible vacation planning, owner we want, everyone to feel, equity to full-time, build your career, career every aquant, every aquant employee, must be authorized, authorized to work, unable to sponsor, sponsor or take, take over sponsorship, aquant is committed, committed to hiring, hiring a diverse, diverse and talented)","List(industrial or information systems engineering statistics, engineering statistics or equivalent, statistics or equivalent hands-on, excel + sql -, + sql - must, sql - must hands-on, azure services - advantage, - advantage a team, advantage a team player, team player with excellent, player with excellent collaboration, communicator with the ability, ability to deliver high-quality, deliver high-quality results fluent, high-quality results fluent english, english its not enough, cool product join us, us because you share, values these guiding values, guiding values inspire every, values inspire every aquant, inspire every aquant employee, company as a whole, greater impact and achieve, respectful we are one, one team where collective, team where collective success, success is more important, important than individual ego, respect and empathy act, empathy act with good, act with good intent, good intent and value, intent and value everyones, keep learning and asking, learning and asking questions, asking questions to understand, questions to understand complex, keep an open mind, mind and be receptive, ownership focus on achieving, focus on achieving measurable, achieving measurable and meaningful, measurable and meaningful outcomes, take decisive action solve, decisive action solve problems, action solve problems proactively, problems proactively and take, proactively and take ownership, take ownership of projects, act with integrity act, integrity act with integrity, thing for your customers, benefits vacation your way, want to make sure, sure you have time, meet your personal needs, personal needs with generous, needs with generous pto, generous pto and flexible, pto and flexible vacation, everyone is an owner, owner we want everyone, want everyone to feel, everyone to feel ownership, ownership over their work, offer equity to full-time, equity to full-time employees, build your career every, career every aquant employee, employee is in control, control of their career, development with our learning, stipends tools and training, applicants must be authorized, work for any employer, employer in the israel, sponsorship of an employment, visa at this time, diverse and talented workforce)"
"Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.","List(related field familiarity, familiarity with cloud, working with llms, security industry join, industry join us, us in securing, securing the worlds, worlds data rubrik, mission to secure, secure the worlds, zero trust data, trust data security, help organizations achieve, organizations achieve business, achieve business resilience, resilience against cyberattacks, rubrik security cloud, powered by machine, secures data across, data across enterprise, help organizations uphold, organizations uphold data, uphold data integrity, deliver data availability, availability that withstands, withstands adverse conditions, continuously monitor data, monitor data risks, risks and threats, data when infrastructure, infrastructure is attacked, rubrik at rubrik, committed to building, building and sustaining, sustaining a culture, culture where people, backgrounds are valued, know they belong, hire and promote, promote the best, matter their background, rubrik is committed, committed to correcting, correcting systemic processes, processes and cultural, prevented equal representation, means we review, review our current, intent to offer, offer fair hiring, opportunities to people, people from historically, historically underrepresented communities, strive to create, create a company, bring their authentic, selves to work, dei strategy focuses, focuses on three, three core areas, business and culture, culture our company, build a diverse, company that provides, provides equitable access, access to growth, growth and success, create an inclusive, environment where authenticity, thrives and people, backgrounds feel like, like they belong, expand our commitment, commitment to diversity, within and beyond, beyond our company, walls to invest, invest in future, generations of underrepresented, talent and bring, equal opportunity employer or veterans or disabled, opportunity employer or veterans or disabled rubrik, equal opportunity employer, applicants will receive, consideration for employment, employment without regard, regard to race, protected veteran status, basis of disability, rubrik provides equal, provides equal employment, equal employment opportunities, employees and applicants, applicants for employment, employment without regard, regard to race, disability or genetics, addition to federal, federal law requirements, complies with applicable, state and local, local laws governing, laws governing nondiscrimination, nondiscrimination in employment, employment in every, company has facilities, terms and conditions, conditions of employment, leaves of absence, compensation and training, federal law requires, law requires employers, employers to provide, provide reasonable accommodation, accommodation to qualified, individuals with disabilities, please contact us, us at hrrubrikcom, require a reasonable, accommodation to apply, perform your job, examples of reasonable, reasonable accommodation include, accommodation include making, making a change, process or work, using a sign, sign language interpreter, using specialized equipment)","List(science or other related, field familiarity with cloud, familiarity with cloud platforms, platforms such as aws, google cloud or azure, working with llms previous, security industry join us, join us in securing, securing the worlds data, data rubrik nyse rbrk, secure the worlds data, zero trust data security, data security we help, security we help organizations, help organizations achieve business, organizations achieve business resilience, business resilience against cyberattacks, resilience against cyberattacks malicious, malicious insiders and operational, insiders and operational disruptions, rubrik security cloud powered, cloud powered by machine, powered by machine learning, machine learning secures data, learning secures data across, secures data across enterprise, data across enterprise cloud, enterprise cloud and saas, cloud and saas applications, help organizations uphold data, organizations uphold data integrity, uphold data integrity deliver, data integrity deliver data, integrity deliver data availability, data availability that withstands, availability that withstands adverse, withstands adverse conditions continuously, adverse conditions continuously monitor, conditions continuously monitor data, continuously monitor data risks, data risks and threats, threats and restore businesses, businesses with their data, rubrik we are committed, people of all backgrounds, backgrounds are valued know, valued know they belong, believe they can succeed, goal is to hire, promote the best person, person for the job, committed to correcting systemic, systemic processes and cultural, processes and cultural norms, norms that have prevented, review our current efforts, efforts with the intent, intent to offer fair, offer fair hiring promotion, hiring promotion and compensation, promotion and compensation opportunities, compensation opportunities to people, people from historically underrepresented, underrepresented communities and strive, create a company culture, culture where all employees, feel they can bring, bring their authentic selves, authentic selves to work, work and be successful, strategy focuses on three, focuses on three core, areas of our business, culture our company build, company build a diverse, build a diverse company, diverse company that provides, company that provides equitable, equitable access to growth, success for all employees, culture create an inclusive, create an inclusive environment, inclusive environment where authenticity, environment where authenticity thrives, authenticity thrives and people, people of all backgrounds, feel like they belong, communities expand our commitment, commitment to diversity equity, inclusion within and beyond, beyond our company walls, company walls to invest, invest in future generations, future generations of underrepresented, generations of underrepresented talent, underrepresented talent and bring, talent and bring innovation, innovation to our clients, equal opportunity employer or veterans or disabled rubrik, rubrik is an equal, qualified applicants will receive, applicants will receive consideration, receive consideration for employment, consideration for employment without, without regard to race, regard to race color, race color religion sex, color religion sex sexual, religion sex sexual orientation, sex sexual orientation gender, sexual orientation gender identity, orientation gender identity national, gender identity national origin, national origin or protected, origin or protected veteran, rubrik provides equal employment, provides equal employment opportunities, equal employment opportunities eeo, eeo to all employees, applicants for employment without, without regard to race, regard to race color, race color religion sex, color religion sex national, religion sex national origin, sex national origin age, national origin age disability, age disability or genetics, addition to federal law, federal law requirements rubrik, law requirements rubrik complies, rubrik complies with applicable, complies with applicable state, applicable state and local, state and local laws, local laws governing nondiscrimination, governing nondiscrimination in employment, employment in every location, applies to all terms, conditions of employment including, employment including recruiting hiring, including recruiting hiring placement, recruiting hiring placement promotion, hiring placement promotion termination, placement promotion termination layoff, promotion termination layoff recall, termination layoff recall transfer, layoff recall transfer leaves, transfer leaves of absence, leaves of absence compensation, absence compensation and training, federal law requires employers, requires employers to provide, employers to provide reasonable, reasonable accommodation to qualified, accommodation to qualified individuals, qualified individuals with disabilities, contact us at hrrubrikcom, hrrubrikcom if you require, require a reasonable accommodation, reasonable accommodation to apply, apply for a job, job or to perform, examples of reasonable accommodation, reasonable accommodation include making, include making a change, change to the application, application process or work, process or work procedures, work procedures providing documents, documents in an alternate, format using a sign, using a sign language, language interpreter or using, interpreter or using specialized)"
"3 years or more of experience in data roles, including at least one year in data science (data roles: data analytics, data engineering, data science, BI development) Experience operating tabular mining tools (e.g. Privitar, Dataiku) - minimum one year of experience, minimum work on 30 tables Experience in mining in the healthcare field - mandatory. Experience in tabular data mining is mandatory. ⁠Free text mining - mandatory. Free text mining in Hebrew - Significant advantage. Experience working on Tama with researchers in academia - an advantage","List(including at least, roles data analytics, operating tabular mining, tabular mining tools, tabular data mining, mining is mandatory, free text mining, free text mining, mining in hebrew, working on tama, tama with researchers, researchers in academia)","List(roles including at least, including at least one, roles data analytics data, data analytics data engineering, analytics data engineering data, data engineering data science, engineering data science bi, operating tabular mining tools, mining in the healthcare, healthcare field - mandatory, data mining is mandatory, free text mining -, text mining - mandatory, text mining in hebrew, mining in hebrew -, hebrew - significant advantage, researchers in academia -, academia - an advantage)"
"Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. Your Name Job Number (Required) Upload Resume Share the job and let your friends help","List(requirements a minimum, computer science applied, science applied math, applied math statistics, strong algorithm development, machine learning algorithms, supervised and unsupervised, name job number, upload resume share, share the job, job and let, let your friends)","List(requirements a minimum first, computer science applied math, science applied math statistics, one of the following, following python r matlab, creative results driven quick, results driven quick learner, big data distributed processing, data distributed processing nosql, distributed processing nosql databases, name job number required, job number required upload, number required upload resume, required upload resume share, resume share the job, let your friends help)"


In [0]:
from pyspark.sql.functions import collect_list, array_distinct
from collections import Counter

# Collect cleaned n-grams per cluster
clustered_data = cleaned_ngram.groupBy("Cluster_Title").agg(
    collect_list("cleaned_unigrams").alias("unigrams"),
    collect_list("cleaned_bigrams").alias("bigrams"),
    collect_list("cleaned_trigrams").alias("trigrams"),
    collect_list("cleaned_quads").alias("quads")
)

# Create bag of words for each cluster
def create_cluster_bag(unigrams, bigrams, trigrams, quads):
    # all_words = set(unigrams + bigrams + trigrams + quads)
    all_ngrams = [word for sublist in unigrams + bigrams + trigrams + quads for word in sublist]
    ngram_counts = Counter(all_ngrams)
    filtered_ngrams = [ngram for ngram, count in ngram_counts.items() if count > 15]
    return filtered_ngrams

cluster_bag_udf = udf(create_cluster_bag, ArrayType(StringType()))
clustered_data = clustered_data.withColumn("skills_bag", cluster_bag_udf(
    col("unigrams"), col("bigrams"), col("trigrams"), col("quads")
))

clustered_data.select("Cluster_Title", "skills_bag").limit(10).display()


Cluster_Title,skills_bag
data scientist,"List(ability, lead, data, analysis, projects, requirements, advanced, modeling, knowledge, tools, like, tableau, programming, python, professional, setting, familiarity, distributed, computation, storage, workflow, management, eg, splunk, spark, kubernetes, kafka, hadoop, mapreduce, airflow, etc, relational, databases, sql, highly, organized, creative, motivated, passionate, results, curious, excellent, analytical, problem, written, verbal, communication, masters, computer, science, preferred, industrial, engineering, analytics, statistics, related, field, proficiency, strong, focus, libraries, manipulation, querying, managing, large, datasets, bi, create, dashboards, reports, english, analyst, advantage, snowflake, aws, web, product, concepts, including, ai, language, models, llms, understanding, trends, patterns, actionable, insights, provide, recommendations, based, support, business, decisions, drive, develop, using, platforms, optimize, pipelines, collaborate, cross-functional, teams, data-driven, decision-making, relevant, technical, equivalent, responsibility, working, include, roles, scientist, engineer, similar, minimum, phd, etl, least, one, c++, c, scala, meta, technologies, help, people, connect, find, communities, grow, businesses, instagram, around, world, beyond, screens, build, social, technology, careers, building, us, future, take, digital, connection, makes, possible, even, physics, individual, compensation, location, details, posting, base, rate, annual, bonus, equity, applicable, addition, offers, benefits, learn, mathematics, cloud, google, azure, previous, security, industry, join, securing, worlds, rubrik, nyse, rbrk, mission, secure, zero, trust, organizations, achieve, resilience, cyberattacks, malicious, insiders, operational, disruptions, powered, machine, learning, secures, across, enterprise, saas, applications, uphold, integrity, deliver, availability, withstands, adverse, conditions, continuously, monitor, risks, threats, restore, infrastructure, attacked, linkedin, x, formerly, twitter, rubrikcom, diversity, inclusion, committed, sustaining, culture, backgrounds, valued, know, belong, believe, succeed, rubriks, goal, hire, promote, best, person, job, matter, background, correcting, systemic, processes, cultural, norms, prevented, equal, representation, means, review, current, efforts, intent, offer, fair, hiring, promotion, opportunities, historically, underrepresented, strive, company, employees, feel, bring, authentic, selves, work, successful, dei, strategy, focuses, three, core, areas, diverse, provides, equitable, access, growth, success, globally, inclusive, environment, authenticity, thrives, expand, commitment, within, walls, invest, generations, talent, innovation, clients, opportunity, employer, veterans, disabled, qualified, applicants, receive, consideration, employment, without, regard, race, color, religion, sex, sexual, orientation, gender, identity, national, origin, protected, veteran, status, discriminated, basis, disability, eeo, age, genetics, federal, law, complies, state, local, laws, governing, nondiscrimination, every, facilities, policy, applies, terms, recruiting, placement, termination, layoff, recall, transfer, leaves, absence, training, requires, employers, reasonable, accommodation, individuals, disabilities, please, contact, hrrubrikcom, require, apply, perform, examples, making, change, application, process, procedures, providing, documents, alternate, format, sign, interpreter, specialized, equipment, development, operating, tabular, mining, privitar, dataiku, tables, healthcare, mandatory, free, text, hebrew, significant, researchers, academia, first, applied, math, algorithm, proven, algorithms, supervised, unsupervised, optimization, following, r, matlab, driven, quick, learner, big, processing, nosql, name, number, required, upload, share, let, friends, graduate, must, exceptional, high, attention, detail, multiple, interpersonal, self-learning, problem-solving, mssql, hands-on, git, emphasis, stock, prediction, statistical, languages, manipulate, draw, sets, variety, techniques, clustering, decision, tree, artificial, neural, networks, auto, sklearn, catboost, arima, lstm, deep, gan, coding, javascript, mongodb, team, developer, another, financial, services, forensic, customers, scripting, pyspark, well, ds, mlflow, linux, os, fluent, spoken, competencies, able, solve, tech, problems, improvise, cope, unexpected, challenges, internal, functions, partners, self-starter, demonstrate, design, solutions, complex, independently, minimal, supervision, relationship, execute, key, tight, deadlines, title, junior, marketing, customer, economics, developing, sense, advantages, banking, systems, creating, platform, code, player, abilities, new, translate, needs, quickly, identify, reporting, time, ml, performance, position, comprehensive, control, technological, automation, actively, manage, environments, pipeline, deploying, good, warehousing, demonstrated, adapt, understand, clear, tasks, things, global, computing, role, frameworks, practices, pytorch, tensorflow, production, vision, architecture, passion, expertise, interest, type, full-time, pay, per, visualization, nlp, preferably, gcp, present, non-technical, stakeholders, real-time, bigquery, looker, generous, sick, days, accessible, state-of-the-art, made, vertex, msc, segmentation, cleaning, feature, products, youre, impact, wed, love, hear, methods, findings, power, exposure, devops, communicate, concise, manner, latest, framework, standards, research, quantitative, toolkits, end, model, testing, innovative, great, li-hybrid, real-world, scikit-learn, medical, used, regulations, studies, approaches, align, initiatives, organization, collaborative, relationships, program, effectively, different, desirable, health, various, disease, make, programs, effective, delivering, quality, compliance, governance, clinical, area, team-oriented, approach, live, enjoy, life, united, part, transformation, collaboration, ready, difference, plus, deployment, experts, leveraging, cloud-based, independent, mindset, prior, fiverr, search, members, improvement, ideas, group, prioritize, workplace, citizenship, characteristic, need, media, fast-paced, career, parental, leave, remote, expression, continuous, integration, impactful, ensuring, portfolio, accommodations, candidates, personal, discrimination, assistance, information, notice, meet, discipline, essential, database, improve, redshift, github, flexible, competitive, considered, creed, pregnancy, marital, ancestry, military, physical, legally, ensure, states, standard, requirement, strongly, office, ways, education, paid, care, matching, k, employee, plan, dental, insurance, dun, bradstreet, found, page, use, genetic, poster, transparency, participate, others, request, basic, user, range, java, pto, dedicated, may, fit, startup, software, gain, enhance, months, contribute, mentor, contributing, designated, phone, internet, screen, reliable, helper, bees, day, interview, aware, factors, specific, package, specifications, retirement, schedule, eligible, also, restful, feedback, tools like, distributed computation, workflow management, relational databases, excellent written, computer science, industrial engineering, data analytics, related field, data manipulation, like tableau, excellent communication, data analyst, data science, data analysis, cross-functional teams, technical field, data scientist, data engineer, data modeling, least one, one programming, programming language, people connect, base compensation, field familiarity, cloud platforms, google cloud, llms previous, security industry, industry join, join us, worlds data, data rubrik, nyse rbrk, zero trust, trust data, data security, help organizations, organizations achieve, achieve business, business resilience, malicious insiders, operational disruptions, rubrik security, security cloud, machine learning, secures data, data across, across enterprise, saas applications, organizations uphold, uphold data, data integrity, deliver data, data availability, withstands adverse, adverse conditions, continuously monitor, monitor data, data risks, restore businesses, formerly twitter, rubrikcom diversity, rubriks goal, best person, correcting systemic, systemic processes, cultural norms, prevented equal, equal representation, current efforts, offer fair, fair hiring, compensation opportunities, historically underrepresented, underrepresented communities, company culture, employees feel, authentic selves, dei strategy, strategy focuses, three core, core areas, company build, diverse company, provides equitable, equitable access, employees globally, culture create, inclusive environment, authenticity thrives, backgrounds feel, feel like, communities expand, inclusion within, company walls, future generations, underrepresented talent, bring innovation, equal opportunity, opportunity employer, qualified applicants, receive consideration, employment without, without regard, sexual orientation, gender identity, national origin, protected veteran, veteran status, rubrik provides, provides equal, equal employment, employment opportunities, federal law, law requirements, rubrik complies, applicable state, local laws, laws governing, governing nondiscrimination, every location, policy applies, including recruiting, law requires, requires employers, provide reasonable, reasonable accommodation, qualified individuals, please contact, contact us, accommodation include, include making, application process, work procedures, providing documents, alternate format, sign language, language interpreter, using specialized, specialized equipment, data roles, data engineering, operating tabular, mining tools, minimum one, minimum work, healthcare field, tabular data, data mining, free text, text mining, significant advantage, minimum first, science applied, applied math, math statistics, strong algorithm, algorithm development, learning algorithms, optimization algorithms, following python, strong communication, results driven, quick learner, big data, distributed processing, nosql databases, name job, job number, friends help, highly motivated, multiple projects, strong interpersonal, strong problem-solving, product development, development advantage, using statistical, statistical computer, computer languages, manipulate data, draw insights, large data, data sets, learning techniques, decision tree, tree learning, artificial neural, neural networks, auto sklearn, sklearn catboost, catboost arima, arima lstm, lstm deep, deep learning, learning gan, gan coding, coding knowledge, javascript python, product management, management team, another related, proven professional, financial services, strong data, financial customers, scripting languages, ds workflow, management tools, tools including, including mlflow, linux os, fluent english, core competencies, competencies able, solve tech, tech problems, customers improvise, improvise cope, unexpected challenges, challenges able, drive relevant, relevant internal, internal functions, demonstrate high, high analytical, design creative, creative solutions, complex requirements, requirements ability, lead projects, projects independently, minimal supervision, interpersonal relationship, relationship building, building ability, execute multiple, drive key, key business, business results, tight deadlines, title data, title junior, analyst company, scientist company, junior data, engineer company, learn independently, data infrastructure, learn new, relevant field, statistical analysis, data tools, data warehousing, multiple tasks, strong understanding, best practices, computer vision, data visualization, present complex, non-technical stakeholders, data processing, statistical modeling, feature engineering, using data, wed love, programming languages, like python, technical solutions, learning frameworks, visualization tools, power bi, devops practices, concise manner, science toolkits, complex data, develop innovative, innovative solutions, healthcare data, translate complex, excellent problem-solving, strong attention, work effectively, different teams, excellent verbal, written communication, work environment, team members, characteristic protected, parental leave, using git, data pipelines, remote work, marital status, citizenship status, legally protected, hiring process, united states, strong knowledge, paid time, vision insurance, job posting, employment opportunity, disability status, genetic information, current poster, pay transparency, time management, database management, management systems, professional development, ai tools, software development, impactful solutions, months contribute, helper bees, reasonable accommodations, work location, technical specifications, strongly preferred, familiarity with distributed, databases and sql, proficiency in python, knowledge of sql, tools like tableau, knowledge of data, python and sql, least one programming, one programming language, related field familiarity, familiarity with cloud, working with llms, security industry join, industry join us, us in securing, securing the worlds, worlds data rubrik, mission to secure, secure the worlds, zero trust data, trust data security, help organizations achieve, organizations achieve business, achieve business resilience, resilience against cyberattacks, rubrik security cloud, powered by machine, secures data across, data across enterprise, help organizations uphold, organizations uphold data, uphold data integrity, deliver data availability, availability that withstands, withstands adverse conditions, continuously monitor data, monitor data risks, risks and threats, data when infrastructure, infrastructure is attacked, rubrik at rubrik, committed to building, building and sustaining, sustaining a culture, culture where people, backgrounds are valued, know they belong, hire and promote, promote the best, matter their background, rubrik is committed, committed to correcting, correcting systemic processes, processes and cultural, prevented equal representation, means we review, review our current, intent to offer, offer fair hiring, opportunities to people, people from historically, historically underrepresented communities, strive to create, create a company, bring their authentic, selves to work, dei strategy focuses, focuses on three, three core areas, business and culture, culture our company, build a diverse, company that provides, provides equitable access, access to growth, growth and success, create an inclusive, environment where authenticity, thrives and people, backgrounds feel like, like they belong, expand our commitment, commitment to diversity, within and beyond, beyond our company, walls to invest, invest in future, generations of underrepresented, talent and bring, equal opportunity employer, applicants will receive, consideration for employment, employment without regard, regard to race, protected veteran status, basis of disability, rubrik provides equal, provides equal employment, equal employment opportunities, employees and applicants, applicants for employment, disability or genetics, addition to federal, federal law requirements, complies with applicable, state and local, local laws governing, laws governing nondiscrimination, nondiscrimination in employment, employment in every, company has facilities, terms and conditions, conditions of employment, leaves of absence, compensation and training, federal law requires, law requires employers, employers to provide, provide reasonable accommodation, accommodation to qualified, individuals with disabilities, please contact us, us at hrrubrikcom, require a reasonable, accommodation to apply, perform your job, examples of reasonable, reasonable accommodation include, accommodation include making, making a change, process or work, using a sign, sign language interpreter, using specialized equipment, including at least, tabular data mining, free text mining, mining in hebrew, researchers in academia, requirements a minimum, computer science applied, science applied math, applied math statistics, strong algorithm development, machine learning algorithms, supervised and unsupervised, name job number, share the job, job and let, let your friends, engineering and management, attention to detail, ability to work, interpersonal and communication, emphasis on product, product development advantage, using statistical computer, statistical computer languages, data and draw, insights from large, large data sets, variety of machine, machine learning techniques, decision tree learning, artificial neural networks, auto sklearn catboost, sklearn catboost arima, catboost arima lstm, arima lstm deep, lstm deep learning, deep learning gan, learning gan coding, gan coding knowledge, product management team, equivalent in mathematics, science or another, another related field, analytics for financial, services with strong, strong data mining, mining and forensic, working with customers, must have hands-on, knowledge of scripting, well as ds, ds workflow management, workflow management tools, management tools including, tools including mlflow, written and spoken, core competencies able, able to solve, solve tech problems, problems for customers, customers improvise cope, cope with unexpected, unexpected challenges able, able to drive, drive relevant internal, relevant internal functions, demonstrate high analytical, analytical and technical, ability to design, design creative solutions, solutions for complex, complex requirements ability, ability to learn, learn and lead, lead projects independently, work with minimal, supervision with customers, strong interpersonal relationship, interpersonal relationship building, relationship building ability, ability to execute, execute multiple projects, projects and drive, drive key business, key business results, results under tight, data analyst company, title data scientist, data scientist company, title junior data, data engineer company, title data engineer, sense of responsibility, independently and strong, world of banking, ability to translate, mining is required, using data science, love to hear, proficiency in programming, requirements into technical, familiarity with data, data visualization tools, visualization tools like, insights to non-technical, clear and concise, data science toolkits, end to end, develop innovative solutions, machine learning frameworks, relationships with stakeholders, effectively with different, teams and multiple, verbal and written, familiarity with cloud-based, committed to providing, protected by law, identity or expression, sql and python, equal employment opportunity, ability to communicate, database management systems, tools for coding, coding and development, effectively with cross-functional, must be able, based on race, familiarity with distributed computation, computation storage and workflow, storage and workflow management, splunk spark kubernetes kafka, spark kubernetes kafka hadoop, kubernetes kafka hadoop mapreduce, kafka hadoop mapreduce airflow, least one programming language, science or other related, field familiarity with cloud, familiarity with cloud platforms, platforms such as aws, google cloud or azure, working with llms previous, security industry join us, join us in securing, securing the worlds data, data rubrik nyse rbrk, secure the worlds data, zero trust data security, data security we help, security we help organizations, help organizations achieve business, organizations achieve business resilience, business resilience against cyberattacks, resilience against cyberattacks malicious, malicious insiders and operational, insiders and operational disruptions, rubrik security cloud powered, cloud powered by machine, powered by machine learning, machine learning secures data, learning secures data across, secures data across enterprise, data across enterprise cloud, enterprise cloud and saas, cloud and saas applications, help organizations uphold data, organizations uphold data integrity, uphold data integrity deliver, data integrity deliver data, integrity deliver data availability, data availability that withstands, availability that withstands adverse, withstands adverse conditions continuously, adverse conditions continuously monitor, conditions continuously monitor data, continuously monitor data risks, data risks and threats, threats and restore businesses, businesses with their data, rubrik we are committed, people of all backgrounds, backgrounds are valued know, valued know they belong, believe they can succeed, goal is to hire, promote the best person, person for the job, committed to correcting systemic, systemic processes and cultural, processes and cultural norms, norms that have prevented, review our current efforts, efforts with the intent, intent to offer fair, offer fair hiring promotion, hiring promotion and compensation, promotion and compensation opportunities, compensation opportunities to people, people from historically underrepresented, underrepresented communities and strive, create a company culture, culture where all employees, feel they can bring, bring their authentic selves, authentic selves to work, work and be successful, strategy focuses on three, focuses on three core, areas of our business, culture our company build, company build a diverse, build a diverse company, diverse company that provides, company that provides equitable, equitable access to growth, success for all employees, culture create an inclusive, create an inclusive environment, inclusive environment where authenticity, environment where authenticity thrives, authenticity thrives and people, feel like they belong, communities expand our commitment, commitment to diversity equity, inclusion within and beyond, beyond our company walls, company walls to invest, invest in future generations, future generations of underrepresented, generations of underrepresented talent, underrepresented talent and bring, talent and bring innovation, innovation to our clients, rubrik is an equal, qualified applicants will receive, applicants will receive consideration, receive consideration for employment, consideration for employment without, without regard to race, regard to race color, race color religion sex, color religion sex sexual, religion sex sexual orientation, sex sexual orientation gender, sexual orientation gender identity, orientation gender identity national, gender identity national origin, national origin or protected, origin or protected veteran, rubrik provides equal employment, provides equal employment opportunities, equal employment opportunities eeo, eeo to all employees, applicants for employment without, color religion sex national, religion sex national origin, sex national origin age, national origin age disability, age disability or genetics, addition to federal law, federal law requirements rubrik, law requirements rubrik complies, rubrik complies with applicable, complies with applicable state, applicable state and local, state and local laws, local laws governing nondiscrimination, governing nondiscrimination in employment, employment in every location, applies to all terms, conditions of employment including, employment including recruiting hiring, including recruiting hiring placement, recruiting hiring placement promotion, hiring placement promotion termination, placement promotion termination layoff, promotion termination layoff recall, termination layoff recall transfer, layoff recall transfer leaves, transfer leaves of absence, leaves of absence compensation, absence compensation and training, federal law requires employers, requires employers to provide, employers to provide reasonable, reasonable accommodation to qualified, accommodation to qualified individuals, qualified individuals with disabilities, contact us at hrrubrikcom, hrrubrikcom if you require, require a reasonable accommodation, reasonable accommodation to apply, apply for a job, job or to perform, examples of reasonable accommodation, reasonable accommodation include making, include making a change, change to the application, application process or work, process or work procedures, work procedures providing documents, documents in an alternate, format using a sign, using a sign language, language interpreter or using, interpreter or using specialized, roles including at least, including at least one, analytics data engineering data, data engineering data science, engineering data science bi, mining in the healthcare, text mining in hebrew, requirements a minimum first, computer science applied math, science applied math statistics, one of the following, following python r matlab, creative results driven quick, results driven quick learner, big data distributed processing, data distributed processing nosql, distributed processing nosql databases, name job number required, job number required upload, let your friends help, industrial engineering and management, strong interpersonal and communication, emphasis on product development, using statistical computer languages, statistical computer languages python, languages python to manipulate, python to manipulate data, manipulate data and draw, data and draw insights, draw insights from large, insights from large data, knowledge of a variety, variety of machine learning, decision tree learning artificial, tree learning artificial neural, learning artificial neural networks, auto sklearn catboost arima, sklearn catboost arima lstm, catboost arima lstm deep, arima lstm deep learning, lstm deep learning gan, deep learning gan coding, learning gan coding knowledge, equivalent in mathematics statistics, mathematics statistics data science, data science or another, science or another related, data analytics for financial, analytics for financial services, financial services with strong, services with strong data, data mining and forensic, knowledge of scripting languages, well as ds workflow, ds workflow management tools, workflow management tools including, management tools including mlflow, tools including mlflow airflow, including mlflow airflow etc, os - an advantage, fluent english - written, - written and spoken, competencies able to solve, able to solve tech, tech problems for customers, problems for customers improvise, improvise cope with unexpected, cope with unexpected challenges, challenges able to drive, able to drive relevant, drive relevant internal functions, self-starter who can demonstrate, high analytical and technical, ability to design creative, creative solutions for complex, solutions for complex requirements, requirements ability to learn, learn and lead projects, independently and to work, work with minimal supervision, minimal supervision with customers, strong interpersonal relationship building, interpersonal relationship building ability, building ability to execute, ability to execute multiple, multiple projects and drive, projects and drive key, drive key business results, business results under tight, results under tight deadlines, title data scientist company, ability to learn independently, learn independently and strong, independently and strong communication, banking is a significant, wed love to hear, computer science data science, mathematics or a related, proficiency in programming languages, familiarity with data visualization, data visualization tools like, visualization tools like tableau, insights to non-technical stakeholders, clear and concise manner, data science computer science, science computer science statistics, computer science statistics mathematics, languages such as python, ability to work effectively, work effectively with different, effectively with different teams, different teams and multiple, teams and multiple tasks, excellent verbal and written, verbal and written communication, characteristic protected by law, gender identity or expression, status sexual orientation gender, - the current poster, poster can be found, ai tools for coding, effectively with cross-functional teams, marital status veteran status)"
data center,"List(required, microsoft)"
junior backend,"List(development, developing, using, working, environment)"
data analyst,"List(science, strong, annotation, work, full, time, familiarity, data, working, sql, visualization, tools, field, statistics, analysis, opportunities, user, health, growth, us, job, hybrid, remote, using, background, posting, data visualization, visualization tools, data visualization tools)"
data engineer,"List(big, systems, hands-on, must, team, excellent, results, values, every, aquant, employee, company, colleagues, act, keep, learning, new, take, ownership, projects, integrity, vacation, want, time, everyone, work, career, development, data, including, processing, distributed, engineer, paid, every aquant, aquant employee, take ownership, every aquant employee, act with integrity)"
data junior,List()
manager sem,List()
manager junior,"List(requirements, tasks, specification, help, business, data, product, create, ability, insights, english, working, advantage, good, work, information, presentation, smsedgecom, video, apply, via, email, jobssmsedgecom, project, management, strong, communication, send, cv, building, teams, end, students, final, analytical, high, excel, level, team, business data, product information, information presentation, smsedgecom or presentation video, smsedgecom or video apply, apply via, via email, product information presentation, smsedgecom or video apply via, apply via email, email at jobssmsedgecom, product information presentation -, information presentation - smsedgecom or presentation, presentation - smsedgecom or presentation video, - smsedgecom or presentation video -, smsedgecom or presentation video - smsedgecom or video, video - smsedgecom or video apply, - smsedgecom or video apply via, smsedgecom or video apply via email, via email at jobssmsedgecom, students in their final)"
manager project,List(brainpop)
manager partnerships,List()


In [0]:
# Create bag of words for all the data
skills_bag_lists = clustered_data.select("skills_bag").rdd.flatMap(lambda row: row.skills_bag).collect()

# Flatten the list of lists and convert to a set for unique words
unique_skills_set = set(skills_bag_lists)

print(len(unique_skills_set))

print(unique_skills_set)

1700
{'learning frameworks', 'problems for customers improvise', 'future generations', 'passionate', 'job or to perform', 'problem', 'receive', 'sick', 'quickly', 'page', 'data and draw insights', 'secure', 'statistical computer languages python', 'believe', 'gan', 'supervised and unsupervised', 'documents in an alternate', 'withstands adverse conditions continuously', 'relational databases', 'talent', 'competencies able', 'querying', 'lead projects independently', 'federal law requires employers', 'technical solutions', 'application', 'align', 'termination layoff recall transfer', 'python to manipulate data', 'processing', 'variety', 'computation', 'field', 'rubrik', 'dei strategy focuses', 'tools including mlflow', 'ml', 'information presentation - smsedgecom or presentation', 'text mining', 'desirable', 'li-hybrid', 'secures data across enterprise', 'ability to execute', 'insights', 'local laws governing nondiscrimination', 'areas', 'initiatives', 'clear and concise', 'sql and pytho

## NER

In [0]:
%pip install spacy
!python -m spacy download en_core_web_sm

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/12.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/12.8 MB[0m [31m7.5 MB/s[0m eta [36m0:00:02[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m6.9/12.8 MB[0m [31m96.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m12.8/12.8 MB[0m [31m180.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m12.8/12.8 MB[0m [31m180.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [0]:
import spacy

nlp = spacy.load("en_core_web_sm")

def extract_skills(text):
    doc = nlp(text)
    skills = [ent.text for ent in doc.ents if ent.label_ in ['PRODUCT', 'ORG', 'SKILL']]
    return skills

extract_skills_udf = udf(lambda text: extract_skills(text), ArrayType(StringType()))
ner_df = requirements_clusters_new.withColumn("extracted_skills", extract_skills_udf(col("Requirements_Text")))

ner_df.select("Requirements_Text", "extracted_skills").limit(10).display()

Requirements_Text,extracted_skills
"+3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)","List(Hadoop, MapReduce, AirFlow, SQL Highly, Bachelor, Hadoop, MapReduce, AirFlow)"
"into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker","List(Python/Java Experience, Spark, Hadoop, SKLearn, Tensorflow, RedHat)"
"A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.","List(SQL, MATLAB)"
"B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!","List(B.A, SQL, Strong Excel /, Google Sheets, Desired Marketing, KPI, - Health & Wellness)"
"B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.","List(Industrial Engineering, Data Analytics, Pandas, BI, Data Analyst, Athena, Data Science, Large Language Models, Natural Language Understanding, NLU, Automatic Speech Recognition, BI, Python, SQL)"
"Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.","List(Bachelor, Computer Science, Engineering, SQL, ETL, WhatsApp, Meta, 2D, Meta, Meta, Learn, Meta)"
"2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.","List(BA/B.Sc., Excel, Aquant, PTO)"
"Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.","List(Mathematics, Computer Science, AWS, Zero Trust Data Security, Instagram, Rubrik.com Diversity, DEI)"
"3 years or more of experience in data roles, including at least one year in data science (data roles: data analytics, data engineering, data science, BI development) Experience operating tabular mining tools (e.g. Privitar, Dataiku) - minimum one year of experience, minimum work on 30 tables Experience in mining in the healthcare field - mandatory. Experience in tabular data mining is mandatory. ⁠Free text mining - mandatory. Free text mining in Hebrew - Significant advantage. Experience working on Tama with researchers in academia - an advantage",List(BI)
"Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. Your Name Job Number (Required) Upload Resume Share the job and let your friends help",List()


In [0]:
from transformers import pipeline

# Load a pretrained pipeline for token classification
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")

# Function to extract skills using the NER pipeline
def extract_skills_with_ner(text):
    ner_results = ner_pipeline(text)
    skills = [res['word'] for res in ner_results if res['entity'] in ['B-SKILL', 'I-SKILL']]
    return skills

# Apply the function to your data
extract_skills_udf = udf(lambda text: extract_skills_with_ner(text), ArrayType(StringType()))
ner_df = requirements_clusters_new.withColumn("extracted_skills", extract_skills_udf(col("Requirements_Text")))

ner_df.select("Requirements_Text", "extracted_skills").limit(10).display()

2025-02-04 11:10:11.771057: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-04 11:10:11.857913: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-04 11:10:12.219574: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Requirements_Text,extracted_skills
"+3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)",List()
"into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker",List()
"A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.",List()
"B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!",List()
"B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.",List()
"Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.",List()
"2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.",List()
"Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.",List()
"3 years or more of experience in data roles, including at least one year in data science (data roles: data analytics, data engineering, data science, BI development) Experience operating tabular mining tools (e.g. Privitar, Dataiku) - minimum one year of experience, minimum work on 30 tables Experience in mining in the healthcare field - mandatory. Experience in tabular data mining is mandatory. ⁠Free text mining - mandatory. Free text mining in Hebrew - Significant advantage. Experience working on Tama with researchers in academia - an advantage",List()
"Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. Your Name Job Number (Required) Upload Resume Share the job and let your friends help",List()


## Q\A

In [0]:
from transformers import pipeline

qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

q = "What are all the job requirements and qualifications?"

def extract_skills_from_text(text):
    answer = qa_pipeline(question=q, context=text)
    a = answer['answer']
    return a

def extract_skill_list(a):
    if a:
        a = a.split(", ")
        if a[-1].startswith("and"):
            a[-1] = a[-1][4:]
    return a

extract_skills_udf = udf(extract_skills_from_text, StringType())
extract_skill_list_udf = udf(extract_skill_list, ArrayType(StringType()))

cleaned_ngram_with_skills = cleaned_ngram.withColumn("extracted_skills_qa", extract_skills_udf(col("Requirements_Text")))
cleaned_ngram_with_skills = cleaned_ngram_with_skills.withColumn("skill_list", extract_skill_list_udf(col("extracted_skills_qa")))

cleaned_ngram_with_skills.select("Requirements_Text", "extracted_skills_qa", "skill_list").display()


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Requirements_Text,extracted_skills_qa,skill_list
"+3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)","distributed computation, storage, and workflow management","List(distributed computation, storage, workflow management)"
"into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker",actionable designs,List(actionable designs)
"A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.",Good written and verbal skills,List(Good written and verbal skills)
"B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!",Growth Opportunities: Learning and training programs,List(Growth Opportunities: Learning and training programs)
"B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.",Excellent communication skills,List(Excellent communication skills)
"Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.",Individual compensation,List(Individual compensation)
"2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.","learning stipends, tools, and training","List(learning stipends, tools, training)"
"Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.",all employees feel they can bring their authentic selves to work and be successful,List(all employees feel they can bring their authentic selves to work and be successful)
"3 years or more of experience in data roles, including at least one year in data science (data roles: data analytics, data engineering, data science, BI development) Experience operating tabular mining tools (e.g. Privitar, Dataiku) - minimum one year of experience, minimum work on 30 tables Experience in mining in the healthcare field - mandatory. Experience in tabular data mining is mandatory. ⁠Free text mining - mandatory. Free text mining in Hebrew - Significant advantage. Experience working on Tama with researchers in academia - an advantage",minimum work on 30 tables Experience in mining in the healthcare field - mandatory,List(minimum work on 30 tables Experience in mining in the healthcare field - mandatory)
"Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. Your Name Job Number (Required) Upload Resume Share the job and let your friends help","Strong communication skills, creative, results driven, quick learner","List(Strong communication skills, creative, results driven, quick learner)"


## Semantic Search

In [0]:
skills_bow = list(unique_skills_set)

In [0]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('all-MiniLM-L6-v2')

skills_embeddings = model.encode(skills_bow, convert_to_tensor=True)

def extract_skills_from_text2(text):
    # Encode the job description (context)
    context_embedding = model.encode(text, convert_to_tensor=True)
    
    # Perform semantic search to find the most relevant skills
    results = util.semantic_search(context_embedding, skills_embeddings, top_k=5)
    
    # Extract the top skills based on the semantic search results
    top_skills = [skills_bow[result['corpus_id']] for result in results[0]]
    
    return ", ".join(top_skills)

def extract_skill_list2(a):
    if a:
        a = a.split(", ")
    return a

extract_skills_udf2 = udf(extract_skills_from_text2, StringType())
extract_skill_list_udf2 = udf(extract_skill_list2, ArrayType(StringType()))

cleaned_ngram_with_skills2 = cleaned_ngram.withColumn("extracted_skills_semantic", extract_skills_udf2(col("Requirements_Text")))
cleaned_ngram_with_skills2 = cleaned_ngram_with_skills2.withColumn("skill_list", extract_skill_list_udf2(col("extracted_skills_semantic")))

cleaned_ngram_with_skills2.select("Requirements_Text", "extracted_skills_semantic", "skill_list").display()


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Requirements_Text,extracted_skills_semantic,skill_list
"+3 years of experience Ability to lead data investigations and analysis projects with ambiguous requirements Advanced statistic and modeling knowledge Experience visualizing and presenting analyses in tools like Jupyter and Tableau Experience programming in Python in a professional setting Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.) Experience with relational databases and SQL Highly organized, creative, motivated, and passionate about achieving results Curious and have excellent analytical and problem solving skills Excellent written, verbal, and data communication skills Bachelor's or Master's degree in computer science. Preferred Qualifications Familiarity with distributed computation, storage, and workflow management (e.g. Splunk, Spark, Kubernetes, Kafka, Hadoop, MapReduce, AirFlow etc.)","data engineer, data scientist, data science computer science, proficiency in programming, computer science data science","List(data engineer, data scientist, data science computer science, proficiency in programming, computer science data science)"
"into actionable designs and tasks, development, UT, and production troubleshooting Requirements 1-2 years of experience of hands-on development experience Experience developing data-oriented products preferably using Python/Java Experience in data processing FW such as Spark, Pandas, Hadoop, Airflow Experience in Distributed Databases (Such as Elasticsearch, Mongo, Redis, etc) Experience with developing microservices-based architecture Experience working in container-based environments using tools such as K8s, helm Experience working as a software developer in an Agile environment Nice to have: Experience in machine learning FWs such as SKLearn, Tensorflow Experience with RedHat OpenShift Dev Environment knowledge: GIT, Jenkins, Docker","devops, devops practices, management tools including mlflow, software development, spark kubernetes kafka hadoop","List(devops, devops practices, management tools including mlflow, software development, spark kubernetes kafka hadoop)"
"A Bachelor of Science degree or equivalent (STEM fields an advantage). Strong attention to detail and a commitment to maintaining high-quality annotation standards. Good written and verbal skills, with the ability to work effectively with a diverse group of individuals. Availability to work full time when needed. Nice-to-haves: Familiarity with data annotation processes, especially non-structured data (images, video, audio). Experience working with seismic data, heat maps, or spectrograms Familiarity with SQL and data visualization tools. Experience with coding languages, preferably MATLAB and Python.","title data engineer, data scientist, data engineer, title data scientist, data science or another","List(title data engineer, data scientist, data engineer, title data scientist, data science or another)"
"B.A/BSc in a highly quantitative field such as mathematics industrial engineering economics statistics or equivalent academic knowledge At least 2 years of hands-on data analysis and story-telling experience Proven experience with measuring online success and identifying optimization opportunities Excellent understanding of online marketing metrics High proficiency in SQL with demonstrated experience in querying large complex datasets Strong Excel / Google Sheets skills Experience with data visualization tools such as Qlik Sense and Looker A solid grasp of statistical significance models and tools Desired Marketing metrics of user funnel, user acquisition KPI’s What we offer A Workplace Designed for Your Well-Being - Comfortable Environment: Ergonomic chairs and desks to support your health. Beautifully designed spaces that inspire creativity. - On-Site Amenities: Gym, yoga room, and music room for relaxation and rejuvenation. - Delicious Perks: 5-star breakfast to kickstart your day. Freshly made coffee, shakes, and afternoon salads by our in-house barista. - Health & Wellness: Full health insurance coverage for your peace of mind. - Growth Opportunities: Learning and training programs to enhance your personal and professional skills. - Vibrant Culture: Happy hours, team events, and plenty of opportunities to connect with colleagues. Join us and experience a workplace where your well-being, growth, and enjoyment come first!","key business results, data analyst company, businesses with their data, business data, business results","List(key business results, data analyst company, businesses with their data, business data, business results)"
"B.Sc in Industrial Engineering, Data Analytics, Statistics, or a related field. Proficiency in Python, with a strong focus on libraries like Pandas for data manipulation. Solid knowledge of SQL for querying and managing large datasets. Experience with BI tools like Tableau to create dashboards and reports. Excellent communication skills, with fluency in English (written and verbal). 0-1 years of experience in a Data Analyst role- advantage Nice To have: Familiarity with Snowflake or AWS Athena. Experience with Web/App product analytics tools such as Mixpanel. Knowledge of Data Science concepts and metrics, including Speech AI, Large Language Models (LLMs), Natural Language Understanding (NLU), and Automatic Speech Recognition (ASR). Responsibilities: Extract, clean, and analyze data to uncover trends, patterns, and actionable insights. Provide recommendations based on data analysis to support business decisions and drive product improvements. Develop and maintain dashboards and reports using BI tools such as Tableau or other platforms. Optimize data workflows and pipelines using Python and SQL. Collaborate with cross-functional teams, including Product, Data Science, and Engineering, to support data-driven decision-making.","data analyst, engineering data science bi, data analyst company, data engineer, data scientist","List(data analyst, engineering data science bi, data analyst company, data engineer, data scientist)"
"Bachelor's degree in Computer Science, Engineering, relevant technical field, or equivalent 4+ years of experience where the primary responsibility involves working with data. This could include roles such as data analyst, data scientist, data engineer, or similar positions 4+ years of experience (or a minimum of 2+ years with a Ph.D) with SQL, ETL, data modeling, and at least one programming language (e.g., Python, C++, C#, Scala, etc.) About Meta: Meta builds technologies that help people connect, find communities, and grow businesses. When Facebook launched in 2004, it changed the way people connect. Apps like Messenger, Instagram and WhatsApp further empowered billions around the world. Now, Meta is moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. People who choose to build their careers by building with us at Meta help shape a future that will take us beyond what digital connection makes possible today—beyond the constraints of screens, the limits of distance, and even the rules of physics. Individual compensation is determined by skills, qualifications, experience, and location. Compensation details listed in this posting reflect the base hourly rate, monthly rate, or annual salary only, and do not include bonus, equity or sales incentives, if applicable. In addition to base compensation, Meta offers benefits. Learn more about benefits at Meta.","careers, data scientist, tech, meta, career","List(careers, data scientist, tech, meta, career)"
"2-3 years experience with big datasets BA/B.Sc. in industrial/information systems engineering, statistics or equivalent Hands-on experience with Excel + SQL - must Hands-on experience with Python – must Experience with Azure services - advantage A team player, with excellent collaboration skills Excellent communicator with the ability to deliver high-quality results Fluent English It's not enough to have a cool product, join us because you share our values: These guiding values inspire every Aquant employee. They are how we operate and enable our company as a whole to have a greater impact and achieve our vision: Be Humble and Respectful: We are one team, where collective success is more important than individual ego. Treat your colleagues with respect and empathy, act with good intent, and value everyone's contributions. Be Curious: Keep learning, and asking questions to understand complex situations. Embrace new challenges. Keep an open mind, and be receptive to feedback. Take Ownership: Focus on achieving measurable and meaningful outcomes. Take decisive action, solve problems proactively, and take ownership of projects and results. Act with Integrity: Act with Integrity. Do great things while doing the right thing for your customers and colleagues. Some of our benefits: Vacation your way: We want to make sure you have time to meet your personal needs with generous PTO and flexible vacation planning. Everyone is an owner: We want everyone to feel ownership over their work and what we are building here, which is why we offer equity to full-time employees. Build your career: Every Aquant employee is in control of their career development with our learning stipends, tools, and training. Applicants must be authorized to work for any employer in the Israel. We are unable to sponsor or take over sponsorship of an employment Visa at this time. Aquant is committed to hiring a diverse and talented workforce.","success for all employees, azure, work and be successful, competencies able to solve, learn and lead projects","List(success for all employees, azure, work and be successful, competencies able to solve, learn and lead projects)"
"Master’s degree in Mathematics, Computer Science, or other related field Familiarity with cloud platforms such as AWS, Google Cloud, or Azure Experience working with LLMs Previous experience in the security industry Join Us in Securing the World's Data Rubrik (NYSE: RBRK) is on a mission to secure the world’s data. With Zero Trust Data Security™, we help organizations achieve business resilience against cyberattacks, malicious insiders, and operational disruptions. Rubrik Security Cloud, powered by machine learning, secures data across enterprise, cloud, and SaaS applications. We help organizations uphold data integrity, deliver data availability that withstands adverse conditions, continuously monitor data risks and threats, and restore businesses with their data when infrastructure is attacked. Linkedin | X (formerly Twitter) | Instagram | Rubrik.com Diversity, Equity & Inclusion @ Rubrik At Rubrik we are committed to building and sustaining a culture where people of all backgrounds are valued, know they belong, and believe they can succeed here. Rubrik's goal is to hire and promote the best person for the job, no matter their background. In doing so, Rubrik is committed to correcting systemic processes and cultural norms that have prevented equal representation. This means we review our current efforts with the intent to offer fair hiring, promotion, and compensation opportunities to people from historically underrepresented communities, and strive to create a company culture where all employees feel they can bring their authentic selves to work and be successful. Our DEI strategy focuses on three core areas of our business and culture: Our Company: Build a diverse company that provides equitable access to growth and success for all employees globally. Our Culture: Create an inclusive environment where authenticity thrives and people of all backgrounds feel like they belong. Our Communities: Expand our commitment to diversity, equity, & inclusion within and beyond our company walls to invest in future generations of underrepresented talent and bring innovation to our clients. Equal Opportunity Employer/Veterans/Disabled Rubrik is an Equal Opportunity Employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, sexual orientation, gender identity, national origin, or protected veteran status and will not be discriminated against on the basis of disability. Rubrik provides equal employment opportunities (EEO) to all employees and applicants for employment without regard to race, color, religion, sex, national origin, age, disability or genetics. In addition to federal law requirements, Rubrik complies with applicable state and local laws governing nondiscrimination in employment in every location in which the company has facilities. This policy applies to all terms and conditions of employment, including recruiting, hiring, placement, promotion, termination, layoff, recall, transfer, leaves of absence, compensation and training. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please contact us at hr@rubrik.com if you require a reasonable accommodation to apply for a job or to perform your job. Examples of reasonable accommodation include making a change to the application process or work procedures, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment.","rubrik security cloud, rubrik security cloud powered, security cloud, data security we help, securing the worlds data","List(rubrik security cloud, rubrik security cloud powered, security cloud, data security we help, securing the worlds data)"
"3 years or more of experience in data roles, including at least one year in data science (data roles: data analytics, data engineering, data science, BI development) Experience operating tabular mining tools (e.g. Privitar, Dataiku) - minimum one year of experience, minimum work on 30 tables Experience in mining in the healthcare field - mandatory. Experience in tabular data mining is mandatory. ⁠Free text mining - mandatory. Free text mining in Hebrew - Significant advantage. Experience working on Tama with researchers in academia - an advantage","data scientist, data science or another, tabular data mining, data science, using data science","List(data scientist, data science or another, tabular data mining, data science, using data science)"
"Requirements: A minimum first degree in computer science/ applied math/ statistics. Strong algorithm development skills. Proven experience with machine learning algorithms – both supervised and unsupervised. Proven experience with optimization algorithms. 5+ years of relevant experience with one of the following: Python, R, Matlab. Strong communication skills, creative, results driven, quick learner. Experience with big data, distributed processing, NoSQL databases – a significant advantage. Your Name Job Number (Required) Upload Resume Share the job and let your friends help","requirements ability to learn, data engineer, requirements, computer science applied, data scientist","List(requirements ability to learn, data engineer, requirements, computer science applied, data scientist)"


In [0]:
from pyspark.sql.functions import collect_list, struct, map_from_entries, count

exploded_skills = cleaned_ngram_with_skills2.withColumn("skill", explode(col("skill_list")))

# Count the occurrences of each skill in each cluster
skill_counts = exploded_skills.groupBy("Cluster_Title", "skill").agg(
    count("Requirements_Text").alias("skill_count")
)

# Get the total number of jobs per cluster
cluster_sizes = cleaned_ngram_with_skills2.groupBy("Cluster_Title").agg(
    count("Requirements_Text").alias("cluster_size")
)

skill_stats = skill_counts.join(cluster_sizes, "Cluster_Title")

# Calculate the percentage of jobs requiring each skill
skill_stats = skill_stats.withColumn(
    "percentage", (col("skill_count") / col("cluster_size")) * 100
)

# Create a dictionary for skills and their percentages
skill_dict_percent = skill_stats.groupBy("Cluster_Title").agg(
    map_from_entries(collect_list(struct(col("skill"), col("percentage")))).alias("skills_statistics")
)

skill_dict_percent.select("Cluster_Title", "skills_statistics").display()


Cluster_Title,skills_statistics
data scientist,"Map(data scientist -> 71.8232044198895, kubernetes kafka hadoop mapreduce -> 4.143646408839778, data analyst -> 22.65193370165746, devops -> 2.7624309392265194, hiring -> 3.314917127071823, data security we help -> 5.524861878453039, familiarity with cloud platforms -> 3.591160220994475, cloud platforms -> 0.8287292817679558, data pipelines -> 1.3812154696132597, inclusive environment -> 0.5524861878453038, computer science applied math -> 3.314917127071823, data engineer company -> 18.784530386740332, career -> 1.3812154696132597, data infrastructure -> 0.8287292817679558, recruiting hiring placement promotion -> 0.5524861878453038, full-time -> 1.1049723756906076, data roles -> 1.3812154696132597, qualified applicants -> 1.3812154696132597, tabular data mining -> 5.524861878453039, data tools -> 4.143646408839778, applicants for employment -> 1.6574585635359116, google cloud or azure -> 0.8287292817679558, core competencies able -> 0.8287292817679558, familiarity with cloud -> 1.3812154696132597, qualified individuals with disabilities -> 0.2762430939226519, thrives and people -> 0.5524861878453038, provides equal employment opportunities -> 2.209944751381215, securing the worlds data -> 2.209944751381215, computer science data science -> 2.209944751381215, requirements ability -> 0.8287292817679558, competencies able -> 0.8287292817679558, build a diverse company -> 0.8287292817679558, data science computer science -> 17.403314917127073, equal employment opportunities -> 1.6574585635359116, zero trust data security -> 3.314917127071823, compensation opportunities -> 1.6574585635359116, title data engineer -> 12.154696132596685, software development -> 3.591160220994475, data science toolkits -> 3.314917127071823, equal employment -> 1.6574585635359116, free text mining -> 3.314917127071823, engineering data science bi -> 11.049723756906078, equal opportunity employer -> 1.6574585635359116, data analyst company -> 11.049723756906078, title data scientist company -> 2.209944751381215, employment opportunities -> 2.7624309392265194, pyspark -> 2.7624309392265194, equal employment opportunities eeo -> 1.3812154696132597, spark kubernetes kafka hadoop -> 4.143646408839778, meta -> 3.0386740331491713, title data scientist -> 18.784530386740332, requirements ability to learn -> 11.878453038674033, cloud and saas applications -> 0.8287292817679558, hadoop -> 4.143646408839778, rubrik security cloud -> 5.524861878453039, spark -> 2.7624309392265194, competencies able to solve -> 7.734806629834254, build a diverse -> 0.2762430939226519, competencies -> 5.801104972375691, requirements -> 4.972375690607735, familiarity with cloud-based -> 3.0386740331491713, core competencies -> 0.8287292817679558, tech -> 3.0386740331491713, commitment to diversity -> 0.5524861878453038, analytics data engineering data -> 0.5524861878453038, data scientist company -> 2.209944751381215, data mining -> 3.314917127071823, requirements into technical -> 4.696132596685083, computer science -> 6.629834254143646, analytics -> 1.9337016574585635, diverse company -> 0.5524861878453038, proficiency in programming languages -> 3.591160220994475, computer science applied -> 5.248618784530387, data science or another -> 29.2817679558011, proficiency in python -> 1.3812154696132597, using data science -> 2.209944751381215, careers -> 9.116022099447514, company build a diverse -> 1.3812154696132597, diverse company that provides -> 0.5524861878453038, rubrik security cloud powered -> 5.524861878453039, proficiency in programming -> 15.745856353591158, data engineer -> 69.61325966850829, data science -> 2.209944751381215, commitment to diversity equity -> 0.8287292817679558, security cloud -> 5.524861878453039)"
etl developer,"Map(oracle -> 100.0, data across enterprise -> 100.0, data engineer company -> 100.0, knowledge of sql -> 100.0, data engineer -> 100.0)"
manager sem,"Map(qualified individuals -> 100.0, hire and promote -> 100.0, recruiting hiring placement promotion -> 100.0, qualified applicants -> 100.0, marketing -> 100.0)"
engineer software,"Map(location -> 100.0, work location -> 100.0, employment in every location -> 100.0, remote work -> 100.0, work environment -> 100.0)"
junior manager,"Map(growth and success -> 100.0, marketing -> 100.0, customer success -> 100.0, competencies -> 100.0, working with customers -> 100.0)"
software engineer,"Map(data tools -> 55.55555555555556, protected veteran status -> 44.44444444444444, marital status veteran status -> 44.44444444444444, accommodation to qualified individuals -> 44.44444444444444, accommodation to apply -> 44.44444444444444, engineering data science bi -> 55.55555555555556, data engineering -> 55.55555555555556, data engineering data science -> 55.55555555555556, accommodation to qualified -> 44.44444444444444, data engineer -> 55.55555555555556)"
information entry,"Map(data scientist -> 100.0, data analyst -> 100.0, healthcare data -> 100.0, data roles -> 100.0, data engineer -> 100.0)"
manager junior,"Map(msc -> 19.148936170212767, complex requirements ability -> 38.297872340425535, smsedgecom or video apply via email -> 21.27659574468085, qualified applicants -> 10.638297872340425, requirements ability -> 38.297872340425535, competencies able -> 29.78723404255319, smsedgecom -> 21.27659574468085, ability to learn -> 10.638297872340425, requirements ability to learn -> 10.638297872340425, competencies able to solve -> 38.297872340425535, ability to work -> 21.27659574468085, competencies -> 38.297872340425535, ability to communicate -> 10.638297872340425, talent and bring innovation -> 10.638297872340425, requirements -> 19.148936170212767, information presentation - smsedgecom or presentation -> 21.27659574468085, ability to work effectively -> 21.27659574468085, - smsedgecom or video apply via -> 21.27659574468085, smsedgecom or video apply -> 21.27659574468085, requirements into technical -> 19.148936170212767, careers -> 19.148936170212767, data engineer -> 38.297872340425535)"
manager team8,"Map(data scientist -> 100.0, data analyst -> 100.0, fluent english -> 100.0, careers -> 100.0, data engineer -> 100.0)"
manager project,"Map(devops -> 50.0, enterprise cloud and saas -> 50.0, devops practices -> 50.0, achieve business -> 50.0, hire and promote -> 50.0, software development -> 50.0, cloud and saas applications -> 50.0, marketing -> 50.0, talent and bring innovation -> 50.0, underrepresented talent and bring -> 50.0)"
