# ðŸ“Š Stack Overflow Developer Survey 2025
## An In-Depth Exploratory Data Analysis of Developer Demographics, Skills, Compensation, and AI Adoption


# Data Checking and Cleaning

In [2]:
import pandas as pd 
import numpy as np

In [3]:
df_raw=pd.read_csv("dataset/survey_results_public.csv")

In [4]:
df_raw.columns.tolist()

['ResponseId',
 'MainBranch',
 'Age',
 'EdLevel',
 'Employment',
 'EmploymentAddl',
 'WorkExp',
 'LearnCodeChoose',
 'LearnCode',
 'LearnCodeAI',
 'AILearnHow',
 'YearsCode',
 'DevType',
 'OrgSize',
 'ICorPM',
 'RemoteWork',
 'PurchaseInfluence',
 'TechEndorseIntro',
 'TechEndorse_1',
 'TechEndorse_2',
 'TechEndorse_3',
 'TechEndorse_4',
 'TechEndorse_5',
 'TechEndorse_6',
 'TechEndorse_7',
 'TechEndorse_8',
 'TechEndorse_9',
 'TechEndorse_13',
 'TechEndorse_13_TEXT',
 'TechOppose_1',
 'TechOppose_2',
 'TechOppose_3',
 'TechOppose_5',
 'TechOppose_7',
 'TechOppose_9',
 'TechOppose_11',
 'TechOppose_13',
 'TechOppose_16',
 'TechOppose_15',
 'TechOppose_15_TEXT',
 'Industry',
 'JobSatPoints_1',
 'JobSatPoints_4',
 'JobSatPoints_5',
 'JobSatPoints_6',
 'JobSatPoints_7',
 'JobSatPoints_8',
 'JobSatPoints_9',
 'JobSatPoints_10',
 'JobSatPoints_11',
 'JobSatPoints_13',
 'JobSatPoints_14',
 'JobSatPoints_15',
 'JobSatPoints_16',
 'JobSatPoints_15_TEXT',
 'AIThreat',
 'NewRole',
 'ToolCountWor

In [5]:
df_raw.sample(5)

Unnamed: 0,ResponseId,MainBranch,Age,EdLevel,Employment,EmploymentAddl,WorkExp,LearnCodeChoose,LearnCode,LearnCodeAI,...,AIAgentOrchestration,AIAgentOrchWrite,AIAgentObserveSecure,AIAgentObsWrite,AIAgentExternal,AIAgentExtWrite,AIHuman,AIOpen,ConvertedCompYearly,JobSat
7535,7536,I am a developer by profession,45-54 years old,"Masterâ€™s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed,"Caring for dependents (children, elderly, etc....",26.0,"Yes, I am not new to coding but am learning ne...",Online Courses or Certification (includes all ...,"Yes, I learned how to use AI-enabled tools req...",...,,,,,,,,,,8.0
48055,48056,"I am not primarily a developer, but I write co...",18-24 years old,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)",Student,Attending school (full-time),2.0,"Yes, I am not new to coding but am learning ne...",Books / Physical media;Stack Overflow or Stack...,"Yes, I learned how to use AI-enabled tools for...",...,,,,,,,,,,
8084,8085,I am a developer by profession,18-24 years old,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)",Employed,Attending school (part-time),4.0,,,,...,,,,,,,,,,
31464,31465,I am a developer by profession,35-44 years old,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)",Employed,None of the above,12.0,"No, I am not new to coding and did not learn n...",,"No, I learned something that was not related t...",...,,,,,,,When I donâ€™t trust AIâ€™s answers;When I want to...,,103253.0,10.0
24842,24843,I am learning to code,45-54 years old,"Masterâ€™s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed,None of the above,19.0,"Yes, I am not new to coding but am learning ne...",,,...,,,,,,,,,,


In [6]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49123 entries, 0 to 49122
Columns: 170 entries, ResponseId to JobSat
dtypes: float64(50), int64(1), object(119)
memory usage: 63.7+ MB


## 1. Data Cleaning

In [7]:
important_cols = [
    # ID
    "ResponseId",
    
    # Demographics
    "Age", "Country", "EdLevel", "Employment",
    "WorkExp", "YearsCode", "OrgSize", "Industry",
    
    # Job & Work Style
    "MainBranch", "DevType", "ICorPM", "RemoteWork",
    
    # Learning & AI
    "LearnCode", "LearnCodeChoose", "LearnCodeAI",
    "AIThreat", "AISelect", "AISent", "AIOpen",
    
    # Tech Stack
    "LanguageHaveWorkedWith", "LanguageWantToWorkWith",
    "DatabaseHaveWorkedWith", "PlatformHaveWorkedWith",
    "WebframeHaveWorkedWith", "DevEnvsHaveWorkedWith",
    
    # Targets
    "ConvertedCompYearly", "JobSat"
]

df = df_raw[important_cols]

print("Shape after column selection:", df.shape)
df.head()


Shape after column selection: (49123, 28)


Unnamed: 0,ResponseId,Age,Country,EdLevel,Employment,WorkExp,YearsCode,OrgSize,Industry,MainBranch,...,AISent,AIOpen,LanguageHaveWorkedWith,LanguageWantToWorkWith,DatabaseHaveWorkedWith,PlatformHaveWorkedWith,WebframeHaveWorkedWith,DevEnvsHaveWorkedWith,ConvertedCompYearly,JobSat
0,1,25-34 years old,Ukraine,"Masterâ€™s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed,8.0,14.0,20 to 99 employees,Fintech,I am a developer by profession,...,Indifferent,"Troubleshooting, profiling, debugging",Bash/Shell (all shells);Dart;SQL,Dart,Cloud Firestore;PostgreSQL,Amazon Web Services (AWS);Cloudflare;Firebase;...,,Android Studio;Notepad++;Visual Studio;Visual ...,61256.0,10.0
1,2,25-34 years old,Netherlands,"Associate degree (A.A., A.S., etc.)",Employed,2.0,10.0,500 to 999 employees,Retail and Consumer Services,I am a developer by profession,...,Indifferent,All skills. AI is a flop.,Java,Java;Python;Swift,Dynamodb;MongoDB,Amazon Web Services (AWS);Datadog;Docker;Homeb...,Spring Boot,IntelliJ IDEA;PyCharm;Visual Studio Code;Xcode,104413.0,9.0
2,3,35-44 years old,Ukraine,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em...",10.0,12.0,,Software Development,I am a developer by profession,...,Favorable,"Understand how things actually work, problem s...",Dart;HTML/CSS;JavaScript;TypeScript,Dart;HTML/CSS;JavaScript;TypeScript,MongoDB;MySQL;PostgreSQL,Datadog;Firebase;npm;pnpm,Next.js;Node.js;React,Visual Studio Code,53061.0,8.0
3,4,35-44 years old,Ukraine,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)",Employed,4.0,5.0,"10,000 or more employees",Retail and Consumer Services,I am a developer by profession,...,Favorable,,Java;Kotlin;SQL,Java;Kotlin,,Amazon Web Services (AWS);Google Cloud,Spring Boot,,36197.0,6.0
4,5,35-44 years old,Ukraine,"Masterâ€™s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",21.0,22.0,,Software Development,I am a developer by profession,...,Favorable,"critical thinking, the skill to define the tas...",C;C#;C++;Delphi;HTML/CSS;Java;JavaScript;Lua;P...,C#;Java;JavaScript;Python;SQL;TypeScript,Elasticsearch;Microsoft SQL Server;MySQL;Oracl...,Amazon Web Services (AWS);APT;Docker;Make;Mave...,Angular;ASP.NET;ASP.NET Core;Flask;jQuery,Eclipse;IntelliJ IDEA;Jupyter Notebook/Jupyter...,60000.0,7.0


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49123 entries, 0 to 49122
Data columns (total 28 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ResponseId              49123 non-null  int64  
 1   Age                     49123 non-null  object 
 2   Country                 35402 non-null  object 
 3   EdLevel                 48087 non-null  object 
 4   Employment              48277 non-null  object 
 5   WorkExp                 42844 non-null  float64
 6   YearsCode               43000 non-null  float64
 7   OrgSize                 34144 non-null  object 
 8   Industry                33607 non-null  object 
 9   MainBranch              49123 non-null  object 
 10  DevType                 43636 non-null  object 
 11  ICorPM                  33211 non-null  object 
 12  RemoteWork              33747 non-null  object 
 13  LearnCode               33516 non-null  object 
 14  LearnCodeChoose         46801 non-null

In [9]:
df_clean = df.copy()


### Clean Age Column (Object â†’ Category)

In [10]:
df_clean["Age"] = df_clean["Age"].fillna("Unknown")
df_clean["Age"] = df_clean["Age"].astype("category")


In [11]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49123 entries, 0 to 49122
Data columns (total 28 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   ResponseId              49123 non-null  int64   
 1   Age                     49123 non-null  category
 2   Country                 35402 non-null  object  
 3   EdLevel                 48087 non-null  object  
 4   Employment              48277 non-null  object  
 5   WorkExp                 42844 non-null  float64 
 6   YearsCode               43000 non-null  float64 
 7   OrgSize                 34144 non-null  object  
 8   Industry                33607 non-null  object  
 9   MainBranch              49123 non-null  object  
 10  DevType                 43636 non-null  object  
 11  ICorPM                  33211 non-null  object  
 12  RemoteWork              33747 non-null  object  
 13  LearnCode               33516 non-null  object  
 14  LearnCodeChoose       

### Handle Numerical Columns

Columns:

`WorkExp`
`YearsCode`
`ConvertedCompYearly`
`JobSat`


In [12]:
num_cols = ["WorkExp", "YearsCode", "ConvertedCompYearly", "JobSat"]

# Fill missing with median
for col in num_cols:
    median_val = df_clean[col].median()
    df_clean[col] = df_clean[col].fillna(median_val)


### Handle all categorical columns
We will replace the `Nan` values with `Unknown` in all categorical columns

In [14]:
cat_cols = df_clean.select_dtypes(include="object").columns

df_clean[cat_cols] = df_clean[cat_cols].fillna("Unknown")


In [15]:
df_clean.isna().sum()


ResponseId                0
Age                       0
Country                   0
EdLevel                   0
Employment                0
WorkExp                   0
YearsCode                 0
OrgSize                   0
Industry                  0
MainBranch                0
DevType                   0
ICorPM                    0
RemoteWork                0
LearnCode                 0
LearnCodeChoose           0
LearnCodeAI               0
AIThreat                  0
AISelect                  0
AISent                    0
AIOpen                    0
LanguageHaveWorkedWith    0
LanguageWantToWorkWith    0
DatabaseHaveWorkedWith    0
PlatformHaveWorkedWith    0
WebframeHaveWorkedWith    0
DevEnvsHaveWorkedWith     0
ConvertedCompYearly       0
JobSat                    0
dtype: int64

In [16]:
df_clean.dtypes


ResponseId                   int64
Age                       category
Country                     object
EdLevel                     object
Employment                  object
WorkExp                    float64
YearsCode                  float64
OrgSize                     object
Industry                    object
MainBranch                  object
DevType                     object
ICorPM                      object
RemoteWork                  object
LearnCode                   object
LearnCodeChoose             object
LearnCodeAI                 object
AIThreat                    object
AISelect                    object
AISent                      object
AIOpen                      object
LanguageHaveWorkedWith      object
LanguageWantToWorkWith      object
DatabaseHaveWorkedWith      object
PlatformHaveWorkedWith      object
WebframeHaveWorkedWith      object
DevEnvsHaveWorkedWith       object
ConvertedCompYearly        float64
JobSat                     float64
dtype: object

In [18]:
df_clean["ConvertedCompYearly"].describe()

count    4.912300e+04
mean     8.824699e+04
std      3.226638e+05
min      1.000000e+00
25%      7.538350e+04
50%      7.538350e+04
75%      7.538350e+04
max      5.000000e+07
Name: ConvertedCompYearly, dtype: float64

### Note: There were many missing values in salary, so we can't just impute them since it is a real world data. We will retain the missing values.

In [21]:
# Keep salary missing for EDA
df_clean["ConvertedCompYearly"] = df["ConvertedCompYearly"]


In [22]:
df_clean["ConvertedCompYearly"].describe()


count    2.392800e+04
mean     1.017916e+05
std      4.619345e+05
min      1.000000e+00
25%      3.817100e+04
50%      7.538350e+04
75%      1.206302e+05
max      5.000000e+07
Name: ConvertedCompYearly, dtype: float64

In [19]:
df_clean["WorkExp"].describe()

count    49123.000000
mean        12.939947
std         10.150537
min          1.000000
25%          6.000000
50%         10.000000
75%         18.000000
max        100.000000
Name: WorkExp, dtype: float64

In [20]:
df_clean["YearsCode"].describe()

count    49123.000000
mean        16.254260
std         11.062818
min          1.000000
25%          8.000000
50%         14.000000
75%         21.000000
max        100.000000
Name: YearsCode, dtype: float64

## Checking our cleaned data columns

In [34]:
df_clean.iloc[:,:5]

Unnamed: 0,ResponseId,Age,Country,EdLevel,Employment
0,1,25-34 years old,Ukraine,"Masterâ€™s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed
1,2,25-34 years old,Netherlands,"Associate degree (A.A., A.S., etc.)",Employed
2,3,35-44 years old,Ukraine,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em..."
3,4,35-44 years old,Ukraine,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)",Employed
4,5,35-44 years old,Ukraine,"Masterâ€™s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em..."
...,...,...,...,...,...
49118,49119,25-34 years old,Unknown,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)",Employed
49119,49120,35-44 years old,Unknown,"Bachelorâ€™s degree (B.A., B.S., B.Eng., etc.)",Employed
49120,49121,25-34 years old,Unknown,"Secondary school (e.g. American high school, G...",Employed
49121,49122,25-34 years old,France,"Associate degree (A.A., A.S., etc.)",Employed


In [39]:
df_clean.iloc[:,5:10]

Unnamed: 0,WorkExp,YearsCode,OrgSize,Industry,MainBranch
0,8.0,14.0,20 to 99 employees,Fintech,I am a developer by profession
1,2.0,10.0,500 to 999 employees,Retail and Consumer Services,I am a developer by profession
2,10.0,12.0,Unknown,Software Development,I am a developer by profession
3,4.0,5.0,"10,000 or more employees",Retail and Consumer Services,I am a developer by profession
4,21.0,22.0,Unknown,Software Development,I am a developer by profession
...,...,...,...,...,...
49118,9.0,13.0,500 to 999 employees,Software Development,I am a developer by profession
49119,13.0,15.0,"1,000 to 4,999 employees",Unknown,I am a developer by profession
49120,2.0,14.0,Unknown,Unknown,I am a developer by profession
49121,10.0,14.0,20 to 99 employees,Manufacturing,I am a developer by profession


In [38]:
df_clean.iloc[:,10:15]

Unnamed: 0,DevType,ICorPM,RemoteWork,LearnCode,LearnCodeChoose
0,"Developer, mobile",People manager,Remote,Online Courses or Certification (includes all ...,"Yes, I am not new to coding but am learning ne..."
1,"Developer, back-end",Individual contributor,"Hybrid (some in-person, leans heavy to flexibi...",Online Courses or Certification (includes all ...,"Yes, I am not new to coding but am learning ne..."
2,"Developer, front-end",Unknown,Unknown,Online Courses or Certification (includes all ...,"Yes, I am not new to coding but am learning ne..."
3,"Developer, back-end",Individual contributor,Remote,"Other online resources (e.g. standard search, ...","Yes, I am not new to coding but am learning ne..."
4,Engineering manager,Unknown,Unknown,Unknown,"No, I am not new to coding and did not learn n..."
...,...,...,...,...,...
49118,"Developer, full-stack",Individual contributor,"Hybrid (some remote, leans heavy to in-person)",Online Courses or Certification (includes all ...,"Yes, I am not new to coding but am learning ne..."
49119,"Developer, full-stack",Individual contributor,"Hybrid (some remote, leans heavy to in-person)",Unknown,"No, I am not new to coding and did not learn n..."
49120,Unknown,Unknown,Unknown,"Other online resources (e.g. standard search, ...","Yes, I am not new to coding but am learning ne..."
49121,"Developer, full-stack",Individual contributor,"Hybrid (some remote, leans heavy to in-person)",Unknown,"No, I am not new to coding and did not learn n..."


In [40]:
df_clean.iloc[:,15:20]

Unnamed: 0,LearnCodeAI,AIThreat,AISelect,AISent,AIOpen
0,"Yes, I learned how to use AI-enabled tools for...",I'm not sure,"Yes, I use AI tools monthly or infrequently",Indifferent,"Troubleshooting, profiling, debugging"
1,"Yes, I learned how to use AI-enabled tools for...",I'm not sure,"Yes, I use AI tools weekly",Indifferent,All skills. AI is a flop.
2,"Yes, I learned how to use AI-enabled tools for...",No,"Yes, I use AI tools daily",Favorable,"Understand how things actually work, problem s..."
3,"Yes, I learned how to use AI-enabled tools for...",No,"Yes, I use AI tools weekly",Favorable,Unknown
4,"Yes, I learned how to use AI-enabled tools for...",No,"Yes, I use AI tools weekly",Favorable,"critical thinking, the skill to define the tas..."
...,...,...,...,...,...
49118,"Yes, I learned how to use AI-enabled tools req...",I'm not sure,Unknown,Unknown,Unknown
49119,"Yes, I learned how to use AI-enabled tools req...",Unknown,Unknown,Unknown,Unknown
49120,"No, I didn't spend time learning in the past year",Unknown,Unknown,Unknown,Unknown
49121,"Yes, I learned how to use AI-enabled tools for...",No,"Yes, I use AI tools daily",Favorable,Unknown


In [41]:
df_clean.iloc[:,20:25]

Unnamed: 0,LanguageHaveWorkedWith,LanguageWantToWorkWith,DatabaseHaveWorkedWith,PlatformHaveWorkedWith,WebframeHaveWorkedWith
0,Bash/Shell (all shells);Dart;SQL,Dart,Cloud Firestore;PostgreSQL,Amazon Web Services (AWS);Cloudflare;Firebase;...,Unknown
1,Java,Java;Python;Swift,Dynamodb;MongoDB,Amazon Web Services (AWS);Datadog;Docker;Homeb...,Spring Boot
2,Dart;HTML/CSS;JavaScript;TypeScript,Dart;HTML/CSS;JavaScript;TypeScript,MongoDB;MySQL;PostgreSQL,Datadog;Firebase;npm;pnpm,Next.js;Node.js;React
3,Java;Kotlin;SQL,Java;Kotlin,Unknown,Amazon Web Services (AWS);Google Cloud,Spring Boot
4,C;C#;C++;Delphi;HTML/CSS;Java;JavaScript;Lua;P...,C#;Java;JavaScript;Python;SQL;TypeScript,Elasticsearch;Microsoft SQL Server;MySQL;Oracl...,Amazon Web Services (AWS);APT;Docker;Make;Mave...,Angular;ASP.NET;ASP.NET Core;Flask;jQuery
...,...,...,...,...,...
49118,Unknown,Unknown,Unknown,Unknown,Unknown
49119,Unknown,Unknown,Unknown,Unknown,Unknown
49120,Unknown,Unknown,Unknown,Unknown,Unknown
49121,C#;SQL,C#;HTML/CSS;Java;JavaScript;Kotlin;PHP;Python;...,Microsoft SQL Server;MySQL,Unknown,Unknown


In [42]:
df_clean.iloc[:,25:]

Unnamed: 0,DevEnvsHaveWorkedWith,ConvertedCompYearly,JobSat
0,Android Studio;Notepad++;Visual Studio;Visual ...,61256.0,10.0
1,IntelliJ IDEA;PyCharm;Visual Studio Code;Xcode,104413.0,9.0
2,Visual Studio Code,53061.0,8.0
3,Unknown,36197.0,6.0
4,Eclipse;IntelliJ IDEA;Jupyter Notebook/Jupyter...,60000.0,7.0
...,...,...,...
49118,Unknown,,8.0
49119,Unknown,,8.0
49120,Unknown,,8.0
49121,Visual Studio,,7.0


### Saving the dataset

In [44]:
df_clean.to_csv("dataset/stack_overflow_2025_cleaned_eda.csv", index=False)

In [46]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49123 entries, 0 to 49122
Data columns (total 28 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   ResponseId              49123 non-null  int64   
 1   Age                     49123 non-null  category
 2   Country                 49123 non-null  object  
 3   EdLevel                 49123 non-null  object  
 4   Employment              49123 non-null  object  
 5   WorkExp                 49123 non-null  float64 
 6   YearsCode               49123 non-null  float64 
 7   OrgSize                 49123 non-null  object  
 8   Industry                49123 non-null  object  
 9   MainBranch              49123 non-null  object  
 10  DevType                 49123 non-null  object  
 11  ICorPM                  49123 non-null  object  
 12  RemoteWork              49123 non-null  object  
 13  LearnCode               49123 non-null  object  
 14  LearnCodeChoose       