In [1]:
import sys
import os

PROJECT_ROOT = os.path.abspath("..\\..")
sys.path.append(PROJECT_ROOT)

In [2]:
from src.engines.bigquery_engine import run_query, run_update_query, write_df_to_bq

In [3]:
query_2020="SELECT DatabaseWorkedWith, LanguageWorkedWith, MiscTechWorkedWith, NEWCollabToolsWorkedWith, PlatformWorkedWith FROM `data-analysis-project-481112.dataset_collection.SO_data_2020`"
query_2025="SELECT LanguageHaveWorkedWith, DatabaseHaveWorkedWith, PlatformHaveWorkedWith, DevEnvsHaveWorkedWith, AIModelsHaveWorkedWith FROM `data-analysis-project-481112.dataset_collection.SO_data_2025`"

In [4]:
df_20=run_query(query_2020)
df_25=run_query(query_2025)



In [5]:
def top_skills(df, column_name):
    df1=df[[column_name]]
    df1=df1.dropna(subset=[column_name])
    df1[column_name]=df1[column_name].str.split(';')
    df_exp=df1.explode(column_name)

    df_top_skills=(df_exp[column_name].value_counts().reset_index().rename(columns={'index':'skills', column_name:'count'}))

    return df_top_skills

In [6]:
df_20_database=top_skills(df_20, "DatabaseWorkedWith")
df_20_language=top_skills(df_20, "LanguageWorkedWith")
df_20_platform=top_skills(df_20, "PlatformWorkedWith")
df_20_collab=top_skills(df_20, "NEWCollabToolsWorkedWith")
df_20_misc=top_skills(df_20, "MiscTechWorkedWith")

In [7]:
df_25_language=top_skills(df_25, "LanguageHaveWorkedWith")
df_25_database=top_skills(df_25, "DatabaseHaveWorkedWith")
df_25_platform=top_skills(df_25, "PlatformHaveWorkedWith")
df_25_devenv=top_skills(df_25, "DevEnvsHaveWorkedWith")
df_25_ai=top_skills(df_25, "AIModelsHaveWorkedWith")

In [8]:
import pandas as pd

In [9]:
df_20_all = pd.concat(
    [
        df_20_language,
        df_20_database,
        df_20_platform,
        df_20_collab,
        df_20_misc
    ],
    ignore_index=True
)
df_20_all['year'] = 2020

In [10]:
df_20_all = df_20_all.reset_index(drop=True)

df_20_all.columns = ['skill', 'count', 'year']

tot_count_20=df_20_all['count'].sum()
df_20_all['percentage']=(df_20_all['count']/tot_count_20)*100

In [11]:
df_25_all = pd.concat(
    [
        df_25_language,
        df_25_database,
        df_25_platform,
        df_25_devenv,
        df_25_ai
    ],
    ignore_index=True
)
df_25_all['year'] = 2025

In [12]:
df_25_all = df_25_all.reset_index(drop=True)

df_25_all.columns = ['skill', 'count', 'year']

tot_count_25=df_25_all['count'].sum()
df_25_all['percentage']=(df_25_all['count']/tot_count_25)*100

In [13]:
df_all_skills = pd.concat(
    [df_20_all, df_25_all],
    ignore_index=True
)

In [14]:
write_df_to_bq(df_all_skills, dataset="dataset_collection", table="d4_skills_totals")



In [15]:
df_20 = df_20_all[['skill', 'count','percentage']].rename(
    columns={'count': 'count_2020', 'percentage':'percentage_2020'}
)

df_25 = df_25_all[['skill', 'count', 'percentage']].rename(
    columns={'count': 'count_2025', 'percentage':'percentage_2025'}
)

In [16]:
df_diff = df_20.merge(
    df_25,
    on='skill',
    how='inner'
)

In [17]:
df_diff['difference_percent'] = df_diff['percentage_2025'] - df_diff['percentage_2020']
df_diff['difference_count'] = df_diff['count_2025'] - df_diff['count_2020']

In [18]:
df_diff['pct_change_2020_to_2025'] = (
    (df_diff['percentage_2025'] - df_diff['percentage_2020'])
    / df_diff['percentage_2020']
)

df_diff['pct_change_2020_to_2025'] = df_diff['pct_change_2020_to_2025'] * 100

In [19]:
df_diff = df_diff.sort_values('difference_percent', ascending=False)

In [20]:
write_df_to_bq(df_diff, dataset="dataset_collection", table="d4_skill_difference")



In [None]:
q1=open('../sql/dashboard_4/d4_sat_v_compensation.sql').read()
q2=open('../sql/dashboard_4/d4_comp_v_exp').read()

In [None]:
df_sat_v_compensation=run_query(q1)
df_comp_v_exp=run_query(q2)



In [None]:
write_df_to_bq(df_comp_v_exp, dataset="dataset_collection", table="d4_comp_v_exp")
write_df_to_bq(df_sat_v_compensation, dataset="dataset_collection", table="d4_sat_v_compensation")

print("Dashboard 4 created successfully")



Dashboard 4 created successfully
