In [205]:
import pandas as pd
import numpy as np


In [206]:
df = pd.read_csv("stackoverflow_full.csv")
df["Country_raw"] = df["Country"]   # endast för stratifiering

counts = df["Country_raw"].value_counts()

rare = counts[counts < 2].index
df["Country_grouped"] = df["Country_raw"].replace(rare, "Other")

valid = counts[counts >= 2].index
df = df[df["Country_raw"].isin(valid)]

df.head()

Unnamed: 0.1,Unnamed: 0,Age,Accessibility,EdLevel,Employment,Gender,MentalHealth,MainBranch,YearsCode,YearsCodePro,Country,PreviousSalary,HaveWorkedWith,ComputerSkills,Employed,Country_raw,Country_grouped
0,0,<35,No,Master,1,Man,No,Dev,7,4,Sweden,51552.0,C++;Python;Git;PostgreSQL,4,0,Sweden,Sweden
1,1,<35,No,Undergraduate,1,Man,No,Dev,12,5,Spain,46482.0,Bash/Shell;HTML/CSS;JavaScript;Node.js;SQL;Typ...,12,1,Spain,Spain
2,2,<35,No,Master,1,Man,No,Dev,15,6,Germany,77290.0,C;C++;Java;Perl;Ruby;Git;Ruby on Rails,7,0,Germany,Germany
3,3,<35,No,Undergraduate,1,Man,No,Dev,9,6,Canada,46135.0,Bash/Shell;HTML/CSS;JavaScript;PHP;Ruby;SQL;Gi...,13,0,Canada,Canada
4,4,>35,No,PhD,0,Man,No,NotDev,40,30,Singapore,160932.0,C++;Python,2,0,Singapore,Singapore


In [207]:
cols_drop = ["Gender", "MentalHealth", "Accessibility", "Unnamed: 0"]
df = df.drop(columns=[c for c in cols_drop if c in df.columns])


In [208]:
df = df[df["YearsCodePro"] <= df["YearsCode"]]
df = df.drop(df[(df["Age"]=="<35") & (df["YearsCode"]>35)].index)

In [209]:
# PreviousSalary_norm är en standardiserad, relativ lön:

# 1.0 ≈ kandidatens lön = landets medianlön.

# >1.0 ≈ tjänar mer än medianen (t.ex. 2.4 = 240 % av median).

# <1.0 ≈ tjänar under medianen.

df['Country_original'] = df['Country'] 

median_salary_by_country = df.groupby("Country")["PreviousSalary"].median()
df["PreviousSalary_norm"] = df.apply(
    lambda r: r["PreviousSalary"] / median_salary_by_country[r["Country"]],
    axis=1
)

df = df.drop(columns="PreviousSalary")

In [210]:
haveworked_dummies = df["HaveWorkedWith"].str.get_dummies(sep=";")
df = pd.concat([df.drop(columns="HaveWorkedWith"), haveworked_dummies], axis=1)


In [211]:
df = pd.get_dummies(
    df,
    columns=["Age", "EdLevel", "MainBranch", "Country", "Employment"],
    drop_first=True
)

In [212]:
df.head()

Unnamed: 0,YearsCode,YearsCodePro,ComputerSkills,Employed,Country_raw,Country_grouped,Country_original,PreviousSalary_norm,APL,ASP.NET,ASP.NET Core,AWS,Angular,Angular.js,Ansible,Assembly,Bash/Shell,Blazor,C,C#,C++,COBOL,Cassandra,Chef,Clojure,Cloud Firestore,Colocation,CouchDB,Couchbase,Crystal,Dart,Delphi,Deno,DigitalOcean,Django,Docker,Drupal,DynamoDB,Elasticsearch,Elixir,Erlang,Express,F#,FastAPI,Fastify,Firebase,Firebase Realtime Database,Flask,Flow,Fortran,Gatsby,Git,Go,Google Cloud,Google Cloud Platform,Groovy,HTML/CSS,Haskell,Heroku,Homebrew,IBM Cloud or Watson,IBM DB2,Java,JavaScript,Julia,Kotlin,Kubernetes,LISP,Laravel,Linode,Lua,MATLAB,Managed Hosting,MariaDB,Matlab,Microsoft Azure,Microsoft SQL Server,MongoDB,MySQL,Neo4j,Next.js,Node.js,Nuxt.js,OCaml,OVH,Objective-C,OpenStack,Oracle,Oracle Cloud Infrastructure,PHP,Perl,Phoenix,Play Framework,PostgreSQL,PowerShell,Pulumi,Puppet,Python,R,React.js,Redis,Ruby,Ruby on Rails,Rust,SAS,SQL,SQLite,Scala,Solidity,Spring,Svelte,Swift,Symfony,Terraform,TypeScript,Unity 3D,Unreal Engine,VBA,VMware,Vue.js,Xamarin,Yarn,jQuery,npm,Age_>35,EdLevel_NoHigherEd,EdLevel_Other,EdLevel_PhD,EdLevel_Undergraduate,MainBranch_NotDev,Country_Albania,Country_Algeria,Country_Andorra,Country_Angola,Country_Argentina,Country_Armenia,Country_Australia,Country_Austria,Country_Azerbaijan,Country_Bahrain,Country_Bangladesh,Country_Barbados,Country_Belarus,Country_Belgium,Country_Belize,Country_Benin,Country_Bhutan,Country_Bolivia,Country_Bosnia and Herzegovina,Country_Botswana,Country_Brazil,Country_Bulgaria,Country_Cambodia,Country_Cameroon,Country_Canada,Country_Cape Verde,Country_Chile,Country_China,Country_Colombia,"Country_Congo, Republic of the...",Country_Costa Rica,Country_Croatia,Country_Cuba,Country_Cyprus,Country_Czech Republic,Country_Côte d'Ivoire,Country_Democratic Republic of the Congo,Country_Denmark,Country_Dominican Republic,Country_Ecuador,Country_Egypt,Country_El Salvador,Country_Estonia,Country_Ethiopia,Country_Fiji,Country_Finland,Country_France,Country_Georgia,Country_Germany,Country_Ghana,Country_Greece,Country_Guatemala,Country_Guinea,Country_Guyana,Country_Haiti,Country_Honduras,Country_Hong Kong (S.A.R.),Country_Hungary,Country_Iceland,Country_India,Country_Indonesia,"Country_Iran, Islamic Republic of...",Country_Iraq,Country_Ireland,Country_Isle of Man,Country_Israel,Country_Italy,Country_Jamaica,Country_Japan,Country_Jordan,Country_Kazakhstan,Country_Kenya,Country_Kosovo,Country_Kuwait,Country_Kyrgyzstan,Country_Lao People's Democratic Republic,Country_Latvia,Country_Lebanon,Country_Lesotho,Country_Libyan Arab Jamahiriya,Country_Lithuania,Country_Luxembourg,Country_Madagascar,Country_Malawi,Country_Malaysia,Country_Maldives,Country_Mali,Country_Malta,Country_Mauritius,Country_Mexico,Country_Mongolia,Country_Montenegro,Country_Morocco,Country_Mozambique,Country_Myanmar,Country_Namibia,Country_Nepal,Country_Netherlands,Country_New Zealand,Country_Nicaragua,Country_Niger,Country_Nigeria,Country_Nomadic,Country_Norway,Country_Oman,Country_Pakistan,Country_Palestine,Country_Panama,Country_Paraguay,Country_Peru,Country_Philippines,Country_Poland,Country_Portugal,Country_Qatar,Country_Republic of Korea,Country_Republic of Moldova,Country_Romania,Country_Russian Federation,Country_Rwanda,Country_Saint Lucia,Country_Saudi Arabia,Country_Senegal,Country_Serbia,Country_Singapore,Country_Slovakia,Country_Slovenia,Country_Somalia,Country_South Africa,Country_South Korea,Country_Spain,Country_Sri Lanka,Country_Sudan,Country_Suriname,Country_Swaziland,Country_Sweden,Country_Switzerland,Country_Syrian Arab Republic,Country_Taiwan,Country_Tajikistan,Country_Thailand,Country_The former Yugoslav Republic of Macedonia,Country_Timor-Leste,Country_Togo,Country_Trinidad and Tobago,Country_Tunisia,Country_Turkey,Country_Turkmenistan,Country_Uganda,Country_Ukraine,Country_United Arab Emirates,Country_United Kingdom of Great Britain and Northern Ireland,Country_United Republic of Tanzania,Country_United States of America,Country_Uruguay,Country_Uzbekistan,"Country_Venezuela, Bolivarian Republic of...",Country_Viet Nam,Country_Yemen,Country_Zambia,Country_Zimbabwe,Employment_1
0,7,4,4,0,Sweden,Sweden,Sweden,0.88105,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
1,12,5,12,1,Spain,Spain,Spain,1.089669,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
2,15,6,7,0,Germany,Germany,Germany,1.191662,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
3,9,6,13,0,Canada,Canada,Canada,0.590838,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
4,40,30,2,0,Singapore,Singapore,Singapore,2.458366,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,True,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [213]:
print("Antal rader efter rensning:", len(df))
print("Antal kolumner efter encoding:", len(df.columns))
print("Första kolumnerna:", df.columns[:20].tolist())
print(df.head())

Antal rader efter rensning: 72859
Antal kolumner efter encoding: 291
Första kolumnerna: ['YearsCode', 'YearsCodePro', 'ComputerSkills', 'Employed', 'Country_raw', 'Country_grouped', 'Country_original', 'PreviousSalary_norm', 'APL', 'ASP.NET', 'ASP.NET Core ', 'AWS', 'Angular', 'Angular.js', 'Ansible', 'Assembly', 'Bash/Shell', 'Blazor', 'C', 'C#']
   YearsCode  YearsCodePro  ComputerSkills  Employed Country_raw  \
0          7             4               4         0      Sweden   
1         12             5              12         1       Spain   
2         15             6               7         0     Germany   
3          9             6              13         0      Canada   
4         40            30               2         0   Singapore   

  Country_grouped Country_original  PreviousSalary_norm  APL  ASP.NET  \
0          Sweden           Sweden             0.881050    0        0   
1           Spain            Spain             1.089669    0        0   
2         Germany     

In [214]:
print(df.columns.tolist())

['YearsCode', 'YearsCodePro', 'ComputerSkills', 'Employed', 'Country_raw', 'Country_grouped', 'Country_original', 'PreviousSalary_norm', 'APL', 'ASP.NET', 'ASP.NET Core ', 'AWS', 'Angular', 'Angular.js', 'Ansible', 'Assembly', 'Bash/Shell', 'Blazor', 'C', 'C#', 'C++', 'COBOL', 'Cassandra', 'Chef', 'Clojure', 'Cloud Firestore', 'Colocation', 'CouchDB', 'Couchbase', 'Crystal', 'Dart', 'Delphi', 'Deno', 'DigitalOcean', 'Django', 'Docker', 'Drupal', 'DynamoDB', 'Elasticsearch', 'Elixir', 'Erlang', 'Express', 'F#', 'FastAPI', 'Fastify', 'Firebase', 'Firebase Realtime Database', 'Flask', 'Flow', 'Fortran', 'Gatsby', 'Git', 'Go', 'Google Cloud', 'Google Cloud Platform', 'Groovy', 'HTML/CSS', 'Haskell', 'Heroku', 'Homebrew', 'IBM Cloud or Watson', 'IBM DB2', 'Java', 'JavaScript', 'Julia', 'Kotlin', 'Kubernetes', 'LISP', 'Laravel', 'Linode', 'Lua', 'MATLAB', 'Managed Hosting', 'MariaDB', 'Matlab', 'Microsoft Azure', 'Microsoft SQL Server', 'MongoDB', 'MySQL', 'Neo4j', 'Next.js', 'Node.js', 'Nux

In [215]:

X = df.drop(columns=["Employed", "Country_raw"])
y = df["Employed"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=df["Country_raw"]
)

In [216]:
df.head()

Unnamed: 0,YearsCode,YearsCodePro,ComputerSkills,Employed,Country_raw,Country_grouped,Country_original,PreviousSalary_norm,APL,ASP.NET,ASP.NET Core,AWS,Angular,Angular.js,Ansible,Assembly,Bash/Shell,Blazor,C,C#,C++,COBOL,Cassandra,Chef,Clojure,Cloud Firestore,Colocation,CouchDB,Couchbase,Crystal,Dart,Delphi,Deno,DigitalOcean,Django,Docker,Drupal,DynamoDB,Elasticsearch,Elixir,Erlang,Express,F#,FastAPI,Fastify,Firebase,Firebase Realtime Database,Flask,Flow,Fortran,Gatsby,Git,Go,Google Cloud,Google Cloud Platform,Groovy,HTML/CSS,Haskell,Heroku,Homebrew,IBM Cloud or Watson,IBM DB2,Java,JavaScript,Julia,Kotlin,Kubernetes,LISP,Laravel,Linode,Lua,MATLAB,Managed Hosting,MariaDB,Matlab,Microsoft Azure,Microsoft SQL Server,MongoDB,MySQL,Neo4j,Next.js,Node.js,Nuxt.js,OCaml,OVH,Objective-C,OpenStack,Oracle,Oracle Cloud Infrastructure,PHP,Perl,Phoenix,Play Framework,PostgreSQL,PowerShell,Pulumi,Puppet,Python,R,React.js,Redis,Ruby,Ruby on Rails,Rust,SAS,SQL,SQLite,Scala,Solidity,Spring,Svelte,Swift,Symfony,Terraform,TypeScript,Unity 3D,Unreal Engine,VBA,VMware,Vue.js,Xamarin,Yarn,jQuery,npm,Age_>35,EdLevel_NoHigherEd,EdLevel_Other,EdLevel_PhD,EdLevel_Undergraduate,MainBranch_NotDev,Country_Albania,Country_Algeria,Country_Andorra,Country_Angola,Country_Argentina,Country_Armenia,Country_Australia,Country_Austria,Country_Azerbaijan,Country_Bahrain,Country_Bangladesh,Country_Barbados,Country_Belarus,Country_Belgium,Country_Belize,Country_Benin,Country_Bhutan,Country_Bolivia,Country_Bosnia and Herzegovina,Country_Botswana,Country_Brazil,Country_Bulgaria,Country_Cambodia,Country_Cameroon,Country_Canada,Country_Cape Verde,Country_Chile,Country_China,Country_Colombia,"Country_Congo, Republic of the...",Country_Costa Rica,Country_Croatia,Country_Cuba,Country_Cyprus,Country_Czech Republic,Country_Côte d'Ivoire,Country_Democratic Republic of the Congo,Country_Denmark,Country_Dominican Republic,Country_Ecuador,Country_Egypt,Country_El Salvador,Country_Estonia,Country_Ethiopia,Country_Fiji,Country_Finland,Country_France,Country_Georgia,Country_Germany,Country_Ghana,Country_Greece,Country_Guatemala,Country_Guinea,Country_Guyana,Country_Haiti,Country_Honduras,Country_Hong Kong (S.A.R.),Country_Hungary,Country_Iceland,Country_India,Country_Indonesia,"Country_Iran, Islamic Republic of...",Country_Iraq,Country_Ireland,Country_Isle of Man,Country_Israel,Country_Italy,Country_Jamaica,Country_Japan,Country_Jordan,Country_Kazakhstan,Country_Kenya,Country_Kosovo,Country_Kuwait,Country_Kyrgyzstan,Country_Lao People's Democratic Republic,Country_Latvia,Country_Lebanon,Country_Lesotho,Country_Libyan Arab Jamahiriya,Country_Lithuania,Country_Luxembourg,Country_Madagascar,Country_Malawi,Country_Malaysia,Country_Maldives,Country_Mali,Country_Malta,Country_Mauritius,Country_Mexico,Country_Mongolia,Country_Montenegro,Country_Morocco,Country_Mozambique,Country_Myanmar,Country_Namibia,Country_Nepal,Country_Netherlands,Country_New Zealand,Country_Nicaragua,Country_Niger,Country_Nigeria,Country_Nomadic,Country_Norway,Country_Oman,Country_Pakistan,Country_Palestine,Country_Panama,Country_Paraguay,Country_Peru,Country_Philippines,Country_Poland,Country_Portugal,Country_Qatar,Country_Republic of Korea,Country_Republic of Moldova,Country_Romania,Country_Russian Federation,Country_Rwanda,Country_Saint Lucia,Country_Saudi Arabia,Country_Senegal,Country_Serbia,Country_Singapore,Country_Slovakia,Country_Slovenia,Country_Somalia,Country_South Africa,Country_South Korea,Country_Spain,Country_Sri Lanka,Country_Sudan,Country_Suriname,Country_Swaziland,Country_Sweden,Country_Switzerland,Country_Syrian Arab Republic,Country_Taiwan,Country_Tajikistan,Country_Thailand,Country_The former Yugoslav Republic of Macedonia,Country_Timor-Leste,Country_Togo,Country_Trinidad and Tobago,Country_Tunisia,Country_Turkey,Country_Turkmenistan,Country_Uganda,Country_Ukraine,Country_United Arab Emirates,Country_United Kingdom of Great Britain and Northern Ireland,Country_United Republic of Tanzania,Country_United States of America,Country_Uruguay,Country_Uzbekistan,"Country_Venezuela, Bolivarian Republic of...",Country_Viet Nam,Country_Yemen,Country_Zambia,Country_Zimbabwe,Employment_1
0,7,4,4,0,Sweden,Sweden,Sweden,0.88105,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
1,12,5,12,1,Spain,Spain,Spain,1.089669,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
2,15,6,7,0,Germany,Germany,Germany,1.191662,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
3,9,6,13,0,Canada,Canada,Canada,0.590838,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
4,40,30,2,0,Singapore,Singapore,Singapore,2.458366,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,True,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [217]:
import pandas as pd

# Gruppindelning av antal färdigheter för överskådlighet
bins = [0, 5, 10, 15, 20, 30, 50, df["ComputerSkills"].max()]
labels = ["0–5", "6–10", "11–15", "16–20", "21–30", "31–50", "51+"]

skill_groups = pd.cut(df["ComputerSkills"], bins=bins, labels=labels, right=True)

# Beräkna anställningsgrad per grupp
hire_rate_by_skills = df.groupby(skill_groups)["Employed"].mean().round(3)

print(hire_rate_by_skills)

ComputerSkills
0–5      0.017
6–10     0.239
11–15    0.597
16–20    0.834
21–30    0.960
31–50    0.999
51+      1.000
Name: Employed, dtype: float64


  hire_rate_by_skills = df.groupby(skill_groups)["Employed"].mean().round(3)


In [219]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Definiera modellerna
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000, solver="lbfgs"),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1),
    "GradientBoosting": GradientBoostingClassifier(random_state=42)
}

# Träna och utvärdera
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f"\n=== {name} ===")
    print("Accuracy:", round(accuracy_score(y_test, preds), 3))
    print(classification_report(y_test, preds))



=== LogisticRegression ===
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6658
           1       1.00      1.00      1.00      7914

    accuracy                           1.00     14572
   macro avg       1.00      1.00      1.00     14572
weighted avg       1.00      1.00      1.00     14572


=== RandomForest ===
Accuracy: 0.966
              precision    recall  f1-score   support

           0       0.97      0.95      0.96      6658
           1       0.96      0.98      0.97      7914

    accuracy                           0.97     14572
   macro avg       0.97      0.96      0.97     14572
weighted avg       0.97      0.97      0.97     14572


=== GradientBoosting ===
Accuracy: 0.993
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      6658
           1       1.00      0.99      0.99      7914

    accuracy                           0.99     14572
   mac