In [62]:
from elasticsearch import Elasticsearch
import pandas as pd
import plotly.express as px

In [61]:
pip install plotly

Collecting plotly
  Downloading plotly-5.18.0-py3-none-any.whl.metadata (7.0 kB)
Collecting tenacity>=6.2.0 (from plotly)
  Downloading tenacity-8.2.3-py3-none-any.whl.metadata (1.0 kB)
Downloading plotly-5.18.0-py3-none-any.whl (15.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.6/15.6 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading tenacity-8.2.3-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.18.0 tenacity-8.2.3
[0mNote: you may need to restart the kernel to use updated packages.


In [52]:
# connection to the cluster
es = Elasticsearch(hosts="http://localhost:9200")

In [53]:
# index from which to retrieve data
index_name = 'jobs_stepstone'

In [54]:
# query
query = {
    "query": {
        "match_all": {}
    }
}

In [55]:
# perform search

result = es.search(index=index_name, body=query, size=1000)

In [56]:
# extract result

hits = result.get('hits', {}).get('hits', [])

In [57]:
# retrieve data

data = []

for hit in hits:
    source_data = hit.get('_source', {})
    data.append(source_data)

In [58]:
df = pd.DataFrame(data)

In [63]:
df.head()

Unnamed: 0,job_title,company,date,location,contract_type,work_type,description_title,description,salary_range,benefits
0,Senior Performance Marketing Manager(m/w/d),neuefische GmbH,2023-12-01T14:19:00Z,Hamburg oder Berlin,Feste Anstellung,"Vollzeit, Home Office möglich","['Einleitung', 'Ihre Aufgaben', 'Ihr Profil', ...",['Die neue fische GmbH ist Deutschlands führen...,,
1,Performance Marketing Manager(m/w/d),neuefische GmbH,2023-12-01T14:15:47Z,Hamburg oder Berlin,Feste Anstellung,"Vollzeit, Home Office möglich","['Einleitung', 'Ihre Aufgaben', 'Ihr Profil', ...",['\xa0Die neue fische GmbH ist Deutschlands fü...,,
2,Recruiting Teamlead (m/f/d),neuefische GmbH,2023-12-01T14:10:26Z,"Home-Office, Hamburg, Köln, Frankfurt, München...",Feste Anstellung,"Vollzeit, Home Office möglich","['Einleitung', 'Your tasks', 'So you inspire u...",['neue fische GmbH is Germany\'s leading provi...,60.000 - 99.000 €,"['Betriebliche Altersvorsorge', 'Flexible Arbe..."
3,Werkstudent*in - Admission / Recruiting,neuefische GmbH,2023-12-01T14:10:14Z,"Hamburg, Köln","Studentenjobs, Werkstudent","Teilzeit, Home Office möglich","['Einleitung', 'Deine Aufgaben', 'Damit begeis...",['Die neue fische GmbH ist Deutschlands führen...,,"['Betriebliche Altersvorsorge', 'Flexible Arbe..."
4,(Senior) Paid Social Media Manager (w/m/d),emetriq GmbH,2023-12-01T14:03:19Z,Hamburg,Feste Anstellung,"Vollzeit, Home Office möglich","['Werde Teil eines starken Teams', 'Wie du uns...",['emetriq ist ein Anbieter für datengetriebene...,41.000 - 60.000 €,


In [72]:
df['day'] = pd.to_datetime(df['date']).dt.date

In [73]:
# Split the 'location' column into separate cities
df['city'] = df['location'].apply(lambda x: x.split(',')[0])

In [74]:
# Group by date and city, and count the number of job offers
grouped_df = df.groupby(['day', 'city']).size().reset_index(name='count')

In [75]:
# Create a Plotly figure
fig = px.bar(grouped_df, x='day', y='count', color='city',
             labels={'count': 'Number of Job Offers', 'date': 'Date'},
             title='Number of Job Offers Over Time for Each City')

# Update the layout for better readability
fig.update_layout(xaxis_title='Date',
                  yaxis_title='Number of Job Offers',
                  legend_title='City')

# Show the plot
fig.show()

In [76]:
filtered_df = grouped_df[grouped_df['count'] > 5]

In [78]:
# Create a Plotly figure
fig = px.bar(filtered_df, x='day', y='count', color='city',
             labels={'count': 'Number of Job Offers', 'day': 'Date'},
             title='Number of Job Offers Over Time for Each City (Cities with >5 offers)')

# Update the layout for better readability
fig.update_layout(xaxis_title='Date',
                  yaxis_title='Number of Job Offers',
                  legend_title='City')

# Show the plot
fig.show()