In [136]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

In [137]:
load_dotenv()

user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')
host = os.getenv('DB_HOST')
port = os.getenv('DB_PORT')
db = os.getenv('DB_NAME')

engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}")

In [97]:
df = pd.read_sql_table('offers', engine)

# Job Title for Junior Spec

In [138]:
query = """
SELECT job_title, position_level FROM offers WHERE position_level = 'Młodszy specjalista (Junior)'
"""

df = pd.read_sql_query(query,engine)
df

Unnamed: 0,job_title,position_level
0,Junior System Integration Test Engineer,Młodszy specjalista (Junior)
1,"Software Engineer II - Infrastructure, EasyGCP",Młodszy specjalista (Junior)
2,Junior Data Consultant,Młodszy specjalista (Junior)
3,Junior Data Scientist (Credit Risk),Młodszy specjalista (Junior)
4,Python Junior Developer,Młodszy specjalista (Junior)
5,Software Engineer II - Data Lineage,Młodszy specjalista (Junior)
6,Software Engineer II - Chrome Browser Enterprise,Młodszy specjalista (Junior)
7,"Software Engineer - Early Career, 2025 Start",Młodszy specjalista (Junior)
8,Junior System Integration Test Engineer,Młodszy specjalista (Junior)
9,"Software Engineer II - Infrastructure, EasyGCP",Młodszy specjalista (Junior)


# Jobs in specific country

In [139]:
query = """
SELECT job_title, city FROM offers WHERE city = 'Warszawa'
"""

df = pd.read_sql_query(query,engine)
df

Unnamed: 0,job_title,city
0,Python Developer,Warszawa
1,Network Consulting Engineer,Warszawa
2,Analityk Danych,Warszawa
3,Analityk Biznesowy / obszar BI,Warszawa
4,Programista Big Data,Warszawa
5,Big Data Engineer,Warszawa
6,Python Developer,Warszawa
7,Network Consulting Engineer,Warszawa
8,Analityk Danych,Warszawa
9,Analityk Biznesowy / obszar BI,Warszawa


# The city with the most job offers

In [117]:
query = """
SELECT COUNT(*) AS number_of_jobs,
CASE
    WHEN city LIKE '%%Warszawa%%' THEN 'Warszawa'
    WHEN city LIKE '%%Kraków%%' THEN 'Kraków'
    WHEN city LIKE '%%Poznań%%' THEN 'Poznań'
    WHEN city LIKE '%%Wrocław%%' THEN 'Wrocław'
    WHEN city LIKE '%%Gdynia%%' THEN 'Gdynia'
    WHEN city LIKE '%%Gdańsk%%' THEN 'Gdańsk'
    WHEN city LIKE '%%Białystok%%' THEN 'Białystok'
    ELSE city
END AS city_sum
FROM offers
GROUP BY city_sum
ORDER BY number_of_jobs DESC
LIMIT 1
"""

df = pd.read_sql_query(query, engine)
df

Unnamed: 0,number_of_jobs,simplified_city
0,182,Warszawa


# TOP 20 TECHNOLOGIES

In [131]:
df = pd.read_sql_table('offers', engine)
all_tech = df['technologies'].str.split(',').explode().str.strip()
count = all_tech.value_counts().head(20)
print(count)

technologies
Python        350
SQL           126
JavaScript     70
Java           70
C++            63
Bash           56
C              56
Docker         49
Kubernetes     42
Git            42
Hadoop         28
Grafana        28
Ansible        28
PyTorch        28
PowerShell     28
TensorFlow     21
Jira           21
R              21
Linux          21
Confluence     21
Name: count, dtype: int64


# The highest model work


In [134]:
df = pd.read_sql_table('offers', engine)
model_work = df['model_work'].str.split(',').explode().str.strip()
count = model_work.value_counts()
print(count)

model_work
Praca hybrydowa      259
Praca zdalna         112
Praca stacjonarna     56
Name: count, dtype: int64
