In [136]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

In [163]:
load_dotenv()

user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')
host = os.getenv('DB_HOST')
port = os.getenv('DB_PORT')
db = os.getenv('DB_NAME')

engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}")

In [164]:
df = pd.read_sql_table('offers', engine)

# Job Title for Junior Spec

In [165]:
query = """
SELECT job_title, position_level FROM offers WHERE position_level = 'Młodszy specjalista (Junior)'
"""

df = pd.read_sql_query(query,engine)
df

Unnamed: 0,job_title,position_level
0,"Software Engineer - Early Career, Cloud AI",Młodszy specjalista (Junior)
1,Software Engineer II - Ambient Platform,Młodszy specjalista (Junior)
2,Junior Data Engineer with SQL,Młodszy specjalista (Junior)
3,"Software Engineer - Early Career, Cloud AI",Młodszy specjalista (Junior)
4,Software Engineer II - Ambient Platform,Młodszy specjalista (Junior)
5,Junior Data Engineer with SQL,Młodszy specjalista (Junior)
6,"Software Engineer - Early Career, Cloud AI",Młodszy specjalista (Junior)
7,Software Engineer II - Ambient Platform,Młodszy specjalista (Junior)
8,Junior Data Engineer with SQL,Młodszy specjalista (Junior)
9,"Software Engineer - Early Career, Cloud AI",Młodszy specjalista (Junior)


# The city with the most job offers

In [None]:
query = """
SELECT COUNT(*) AS number_of_jobs,
CASE
    WHEN city LIKE '%%Warszawa%%' THEN 'Warszawa'
    WHEN city LIKE '%%Kraków%%' THEN 'Kraków'
    WHEN city LIKE '%%Poznań%%' THEN 'Poznań'
    WHEN city LIKE '%%Wrocław%%' THEN 'Wrocław'
    WHEN city LIKE '%%Gdynia%%' THEN 'Gdynia'
    WHEN city LIKE '%%Gdańsk%%' THEN 'Gdańsk'
    WHEN city LIKE '%%Białystok%%' THEN 'Białystok'
    ELSE city
END AS city_sum
FROM offers
GROUP BY city_sum
ORDER BY number_of_jobs DESC
LIMIT 1
"""

df = pd.read_sql_query(query, engine)
df

Unnamed: 0,number_of_jobs,simplified_city
0,182,Warszawa


# Average number of offers per junior/mid/senior

# TOP 20 TECHNOLOGIES except Python

In [172]:
query = """
SELECT 
    TRIM(tech) AS technology,
    COUNT(*) AS occurrences
FROM (
    SELECT unnest(string_to_array(technologies, ',')) AS tech
    FROM offers
    WHERE technologies IS NOT NULL
) AS techs
WHERE TRIM(tech) ILIKE '%%' AND TRIM(tech) NOT ILIKE 'python'
GROUP BY technology
ORDER BY occurrences DESC
LIMIT 20;
"""

df = pd.read_sql_query(query, engine)
df

Unnamed: 0,technology,occurrences
0,SQL,160
1,C++,120
2,Java,104
3,C,64
4,JavaScript,64
5,Git,56
6,R,48
7,Docker,48
8,AWS,40
9,Kubernetes,40
