In [None]:
# Importing libraries
import pandas as pd
import ast
import matplotlib.pyplot as plt
from datasets import load_dataset
import seaborn as sns

In [None]:
# Loading the dataset
data = load_dataset("lukebarousse/data_jobs")
df = data["train"].to_pandas()  # type: ignore
df

In [None]:
# Cleaning the datafile
df["job_posted_date"] = pd.to_datetime(df["job_posted_date"])  # type: ignore
df["job_skills"] = df["job_skills"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) and pd.notna(x) else x)  # type: ignore
df

<h1>Filter for Germany Data Analyst Roles</h1>


In [None]:
df_DA_Ger = df[(df["job_country"] == "Germany") & (df["job_title_short"] == "Data Analyst")].copy()  # type: ignore
df_DA_Ger

<h2>Analyzing job postings by location in Germany</h2>


In [None]:
# Creating custom pallette
color_list = ["#2F0B53", "#A26FD4"][::-1]
cpal = sns.blend_palette(colors=color_list, n_colors=8, as_cmap=True)

In [None]:
df_plot = df_DA_Ger["job_location"].value_counts().head(10).to_frame()
sns.set_theme(style="ticks")
sns.barplot(
    data=df_plot, x="count", y="job_location", hue="count", palette=cpal, legend=False
)
sns.despine()
plt.title(
    "Data Analyst Job Opening by Location in Germany", fontsize=18, loc="center", pad=30
)
plt.xlabel("Number of Jobs")
plt.ylabel("")
plt.show()

In [None]:
plot_list = {
    "job_work_from_home": "Work From Home",
    "job_no_degree_mention": "Degree Requirement",
    "job_health_insurance": "Health Insurance Availability",
}

truthy_vals = df[list(plot_list.keys())].apply(pd.Series.value_counts)  # type: ignore
truthy_vals

In [None]:
fig, ax = plt.subplots(1, 3)
fig.set_size_inches((12, 5))

for i, (column, title) in enumerate(plot_list.items()):
    ax[i].pie(
        df_DA_Ger[column].value_counts(),
        labels=["False", "True"],
        autopct="%1.1f%%",
        startangle=90,
    )
    ax[i].set_title(title)
plt.show()

In [None]:
df_plot = df_DA_Ger["company_name"].value_counts().head(10).to_frame()
sns.set_theme(style="ticks")
sns.barplot(
    data=df_plot, x="count", y="company_name", hue="count", palette=cpal, legend=False
)
sns.despine()
plt.title(
    "Data Analyst Job Opening by Companies in Germany",
    fontsize=18,
    loc="center",
    pad=30,
)
plt.xlabel("Number of Jobs")
plt.ylabel("")
plt.show()