In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'
df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='qname')
df.set_index('ResponseId', inplace=True)
pd.set_option('display.max_columns',85)# applys to both df's schema and results
pd.set_option('display.max_rows',85) # applys to both df's schema and results

df_s_test = pd.read_csv('df_salary_test2.csv')


In [7]:
df_s_test.shape


(64084, 80)

In [8]:
# Make filters, cleaning data
## Drop rows with zero values in 'CompTotal' column
df_s_test = df_s_test[df_s_test['YearlyTotConvertCOMP'] != 0]
display(df_s_test.shape)
## Exclude rows with NaN values in 'YearlyTotConvertCOMP' column
df_s_test = df_s_test.dropna(subset=['YearlyTotConvertCOMP']) #comment out this line to see NANs
display(df_s_test.shape)
# Group by 'Country' and calculate mean salary for each response category
mean_salary_by_frequency = df_s_test.groupby(['Country', 'CompFreq'])['YearlyTotConvertCOMP'].mean()
display(mean_salary_by_frequency)
# Create an empty DataFrame
Sal_eval_df = pd.DataFrame()

# Unpack the mean salary values and add them to Sal_eval_df
Sal_eval_df['MeanSalaryMonthly'] = mean_salary_by_frequency.unstack()['Monthly']
Sal_eval_df['MeanSalaryWeekly'] = mean_salary_by_frequency.unstack()['Weekly']
Sal_eval_df['MeanSalaryYearly'] = mean_salary_by_frequency.unstack()['Yearly']

# Taking a look of percetages if each pay frequency
#Sal_eval_df

big_ones = df_s_test[(df_s_test['Country'] == 'Portugal') & (df_s_test['YearlyTotConvertCOMP'] > 600000)]

big_ones.shape



(64014, 80)

(34811, 80)

Country    CompFreq
Argentina  Monthly     3.457929e+04
           Weekly      1.040000e+05
           Yearly      5.762488e+04
Australia  Monthly     6.412769e+05
           Weekly      1.396315e+06
                           ...     
Ukraine    Weekly      5.420889e+04
           Yearly      5.856377e+04
Viet Nam   Monthly     3.545256e+04
           Weekly      2.437236e+04
           Yearly      2.868280e+04
Name: YearlyTotConvertCOMP, Length: 133, dtype: float64

(7, 80)

In [10]:
# Merge the two DataFrames on the 'Country' column
merged_salary_data = df_s_test.merge(Sal_eval_df, left_on='Country', right_index=True)
display(merged_salary_data.shape)

(34811, 83)

In [11]:
merged_salary_data = df_s_test.merge(Sal_eval_df, left_on='Country', right_index=True)
# trying to filter 
merged_salary_data_filtered = merged_salary_data[(merged_salary_data['YearlyTotConvertCOMP'] < 2 * merged_salary_data['MeanSalaryYearly'])]
                                                
display(merged_salary_data_filtered.shape)


merged_salary_data_filtered2 = merged_salary_data_filtered[(merged_salary_data_filtered['YearlyTotConvertCOMP']>merged_salary_data_filtered['MeanSalaryYearly'])]
display(merged_salary_data_filtered2.shape)


(31953, 83)

(6577, 83)

In [34]:
mean_salary_by_frequency_filtered = merged_salary_data_filtered.groupby(['Country', 'CompFreq'])['YearlyTotConvertCOMP'].mean()
mean_salary_by_frequency_filtered['Portugal']



CompFreq
Monthly    76786.937456
Weekly     54478.853047
Yearly     60110.688724
Name: YearlyTotConvertCOMP, dtype: float64

In [35]:
# now i need to see if there are any portugal values larger than $120,000
merged_salary_data_filtered[(merged_salary_data_filtered['Country'] == 'Portugal') & (merged_salary_data_filtered['YearlyTotConvertCOMP'] > 120000)]


Unnamed: 0,MainBranch,Employment,RemoteWork,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,LearnCodeCoursesCert,YearsCode,YearsCodePro,DevType,OrgSize,PurchaseInfluence,BuyNewTool,Country,Currency,CompTotal,CompFreq,LanguageHaveWorkedWith,LanguageWantToWorkWith,DatabaseHaveWorkedWith,DatabaseWantToWorkWith,PlatformHaveWorkedWith,PlatformWantToWorkWith,WebframeHaveWorkedWith,WebframeWantToWorkWith,MiscTechHaveWorkedWith,MiscTechWantToWorkWith,ToolsTechHaveWorkedWith,ToolsTechWantToWorkWith,NEWCollabToolsHaveWorkedWith,NEWCollabToolsWantToWorkWith,OpSysProfessional use,OpSysPersonal use,VersionControlSystem,VCInteraction,VCHostingPersonal use,VCHostingProfessional use,OfficeStackAsyncHaveWorkedWith,OfficeStackAsyncWantToWorkWith,OfficeStackSyncHaveWorkedWith,OfficeStackSyncWantToWorkWith,Blockchain,NEWSOSites,SOVisitFreq,SOAccount,SOPartFreq,SOComm,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,TBranch,ICorPM,WorkExp,Knowledge_1,Knowledge_2,Knowledge_3,Knowledge_4,Knowledge_5,Knowledge_6,Knowledge_7,Frequency_1,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,Onboarding,ProfessionalTech,TrueFalse_1,TrueFalse_2,TrueFalse_3,SurveyLength,SurveyEase,ConvertedCompYearly,CurrencyConvert,YearlyTotConvertCOMP,MeanSalaryMonthly,MeanSalaryWeekly,MeanSalaryYearly
1009,I am a developer by profession,"Employed, full-time","Hybrid (some remote, some in-person)",Hobby;Bootstrapping a business,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Books / Physical media;School (i.e., Universit...",,,9.0,4,"Developer, embedded applications or devices","1,000 to 4,999 employees",I have little or no influence,Start a free trial;Ask developers I know/work ...,Portugal,EUR,35000.0,Monthly,Bash/Shell;C++;Python,C++;Kotlin;Python,SQLite,,,,,,,,Docker,Docker,Visual Studio Code,Android Studio;Visual Studio Code,Windows;Windows Subsystem for Linux (WSL),BSD;Linux-based,Git,Code editor;Command-line;Version control hosti...,,,Confluence,Confluence,Microsoft Teams,Microsoft Teams,Indifferent,Stack Overflow,A few times per month or weekly,No,,"No, not really",29.5,Man,No,Straight / Heterosexual,White;European,None of the above,None of the above,Yes,Independent contributor,4.0,Agree,Strongly disagree,Strongly agree,Agree,Agree,Neither agree nor disagree,Disagree,1-2 times a week,1-2 times a week,Never,15-30 minutes a day,15-30 minutes a day,Somewhat long,DevOps function;Microservices;Developer portal...,Yes,Yes,Yes,Appropriate in length,Neither easy nor difficult,447900.0,0.93,451612.903226,89442.184173,54478.853047,298741.519124
5999,I am a developer by profession,"Employed, full-time",Fully remote,Hobby;Contribute to open-source projects,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Other online resources ...,Technical documentation;Blogs;Written Tutorial...,Pluralsight;Other,6.0,2,"Developer, full-stack",10 to 19 employees,I have some influence,Other (please specify):;Ask developers I know/...,Portugal,EUR,21000.0,Monthly,Bash/Shell;C#;HTML/CSS;JavaScript;TypeScript,Dart;Go;JavaScript;Rust;TypeScript,,,Microsoft Azure,AWS;Firebase;Google Cloud,Angular.js;ASP.NET Core ;jQuery;React.js,Deno;Express;Next.js;Node.js;Nuxt.js;React.js;...,Capacitor;Ionic;React Native,Flutter;Ionic;React Native,npm,Docker,Neovim;Visual Studio Code,Neovim;Visual Studio Code,macOS,Linux-based;Windows,Git,Code editor;Command-line,,,Jira Work Management,,Microsoft Teams,Slack,Indifferent,Stack Overflow;Stack Exchange,Daily or almost daily,Yes,A few times per month or weekly,"No, not really",21.0,Man,,Straight / Heterosexual,European,None of the above,None of the above,No,,,,,,,,,,,,,,,,,,,,Appropriate in length,Easy,268740.0,0.93,270967.741935,89442.184173,54478.853047,298741.519124
7345,I am a developer by profession,"Employed, full-time",Fully remote,I don’t code outside of work,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Other online resources (e.g., videos, blogs, f...",Technical documentation;Stack Overflow;Online ...,,22.0,9,"Developer, full-stack",500 to 999 employees,I have little or no influence,Start a free trial;Visit developer communities...,Portugal,USD,140000.0,Yearly,Elixir,Elixir,PostgreSQL;Redis,PostgreSQL,AWS,,Phoenix,Phoenix,,,Docker;Homebrew;npm,Docker;Homebrew,Visual Studio Code,Visual Studio Code,Linux-based;macOS,macOS,Git,Command-line,,,Confluence;Jira Work Management,,Slack;Zoom,Slack;Zoom,Favorable,Stack Overflow;Stack Exchange,A few times per month or weekly,No,,"No, not really",39.5,Man,No,Straight / Heterosexual,White;European,None of the above,None of the above,No,,,,,,,,,,,,,,,,,,,,Appropriate in length,Neither easy nor difficult,140000.0,1.0,140000.0,89442.184173,54478.853047,298741.519124
10056,I am a developer by profession,"Employed, full-time",Fully remote,I don’t code outside of work,Some college/university study without earning ...,Books / Physical media;Other online resources ...,Technical documentation;Blogs;Stack Overflow;O...,,6.0,6,"Developer, full-stack;Cloud infrastructure eng...","1,000 to 4,999 employees",I have a great deal of influence,Start a free trial;Visit developer communities...,Portugal,USD,162000.0,Yearly,Bash/Shell;Go;HTML/CSS;JavaScript;Python;TypeS...,C#;Dart;F#;Perl;Python;Scala;TypeScript,Elasticsearch;PostgreSQL;Redis;SQLite,Cassandra;Elasticsearch;PostgreSQL;Redis;SQLite,AWS;DigitalOcean;Linode,Linode,Django;Express;Flask;Gatsby;Node.js;Nuxt.js;Re...,ASP.NET Core ;Deno;Django;Express;FastAPI;Node...,React Native,,Homebrew;npm;Terraform;Yarn,Ansible;Chef;Docker;Kubernetes;Pulumi;Puppet;T...,Vim;Visual Studio Code,Emacs;Vim;Visual Studio Code,Linux-based,Linux-based,Git,Command-line,,,,,Slack;Zoom,,Very unfavorable,Stack Overflow;Stack Exchange,Daily or almost daily,Yes,Less than once per month or monthly,"No, not at all",29.5,"Or, in your own words:;Woman;Non-binary, gende...",Yes,Bisexual;Gay or Lesbian;Queer,White;Hispanic or Latino/a,None of the above,"I have a mood or emotional disorder (e.g., dep...",Yes,Independent contributor,12.0,Strongly agree,Neither agree nor disagree,Strongly agree,Strongly agree,Strongly agree,Neither agree nor disagree,Agree,Never,3-5 times a week,Never,15-30 minutes a day,Less than 15 minutes a day,Very long,Developer portal or other central places to fi...,Yes,Yes,Yes,Appropriate in length,Neither easy nor difficult,162000.0,1.0,162000.0,89442.184173,54478.853047,298741.519124
13166,"I am not primarily a developer, but I write co...","Independent contractor, freelancer, or self-em...",Fully remote,Hobby;Contribute to open-source projects,"Secondary school (e.g. American high school, G...","School (i.e., University, College, etc);Online...",,Udemy,23.0,13,Other (please specify):,500 to 999 employees,I have a great deal of influence,Start a free trial;Ask developers I know/work ...,Portugal,EUR,135000.0,Yearly,Dart;Ruby,Dart;Ruby,SQLite,Cloud Firestore;SQLite,Linode,Linode,,,Flutter,Flutter,Docker,Docker,Android Studio;Visual Studio Code,Android Studio,Linux-based;macOS;Windows,Linux-based;macOS;Windows,Git,Code editor;Command-line,,,Confluence;Jira Work Management;Trello,Confluence;Jira Work Management;Trello,Microsoft Teams;Slack,Slack,Unfavorable,Stack Overflow for Teams (private knowledge sh...,Multiple times per day,Yes,A few times per week,"Yes, definitely",39.5,Man,No,Straight / Heterosexual,European,None of the above,I have an anxiety disorder,No,,,,,,,,,,,,,,,,,,,,Appropriate in length,Easy,143969.0,0.93,145161.290323,89442.184173,54478.853047,298741.519124
22729,I am a developer by profession,"Employed, full-time",Fully remote,Hobby,"Secondary school (e.g. American high school, G...",Books / Physical media,,,35.0,24,"Engineer, site reliability;Developer, full-sta...","1,000 to 4,999 employees",I have little or no influence,Start a free trial;Visit developer communities...,Portugal,EUR,130000.0,Yearly,Bash/Shell;Go;JavaScript,Dart;Go;JavaScript;Rust,MySQL,MongoDB;MySQL,AWS;DigitalOcean,AWS;DigitalOcean,,,,,Docker;Kubernetes;Terraform,Docker;Kubernetes;Terraform,Visual Studio Code,Android Studio;Visual Studio Code,Linux-based,Linux-based,Git,Code editor;Command-line,,,Confluence;Jira Work Management;Smartsheet,,Microsoft Teams;Slack;Zoom,Google Chat;Slack,Indifferent,Stack Overflow;Stack Exchange,A few times per week,Yes,Less than once per month or monthly,Neutral,39.5,Man,No,Straight / Heterosexual,Biracial,None of the above,None of the above,Yes,Independent contributor,24.0,Disagree,Strongly agree,Agree,Disagree,Neither agree nor disagree,Strongly agree,Strongly agree,Never,Never,Never,15-30 minutes a day,Less than 15 minutes a day,Somewhat long,DevOps function;Microservices;Continuous integ...,No,No,Yes,Appropriate in length,Easy,138637.0,0.93,139784.946237,89442.184173,54478.853047,298741.519124
23761,I am a developer by profession,"Employed, full-time",Fully remote,Hobby,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Books / Physical media;School (i.e., Universit...",,,10.0,5,"Engineer, data;Developer, back-end",20 to 99 employees,I have some influence,Start a free trial;Ask developers I know/work ...,Portugal,EUR,54000.0,Monthly,HTML/CSS;Python;SQL,Go;Python;SQL,MySQL,MySQL,,,Django;FastAPI;Flask,Django;FastAPI;Flask,Keras;NumPy;Pandas;TensorFlow;Torch/PyTorch;Hu...,Keras;NumPy;TensorFlow;Torch/PyTorch;Hugging F...,Ansible;Docker,Docker,PyCharm;Sublime Text,PyCharm;Sublime Text,Linux-based,Linux-based;Windows,Git,Command-line,,,Confluence,,Slack;Zoom,Slack,Favorable,Stack Overflow;Stack Exchange,Daily or almost daily,Yes,A few times per week,"Yes, somewhat",29.5,Man,No,Straight / Heterosexual,European,None of the above,None of the above,Yes,Independent contributor,5.0,Agree,Agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Neither agree nor disagree,1-2 times a week,1-2 times a week,1-2 times a week,30-60 minutes a day,Less than 15 minutes a day,Just right,DevOps function;Microservices;Continuous integ...,No,Yes,Yes,Appropriate in length,Easy,691056.0,0.93,696774.193548,89442.184173,54478.853047,298741.519124
24412,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Fully remote,Freelance/contract work,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Books / Physical media;School (i.e., Universit...",,,30.0,22,Engineering manager,"10,000 or more employees",I have little or no influence,Start a free trial;Visit developer communities...,Portugal,EUR,17000.0,Monthly,C#;HTML/CSS;TypeScript,C#;Dart;Go;Rust;Solidity;TypeScript,CouchDB;Cloud Firestore;Elasticsearch;PostgreSQL,Elasticsearch;PostgreSQL,DigitalOcean;Firebase,DigitalOcean;Microsoft Azure,Angular;React.js,,.NET,.NET;Flutter,Docker;Yarn,,Visual Studio;Visual Studio Code,Visual Studio Code,Windows,Windows,Git,Code editor;Command-line,,,,,Microsoft Teams;Zoom,Microsoft Teams,Very favorable,Stack Overflow;Stack Exchange,Less than once per month or monthly,Yes,Less than once per month or monthly,"Yes, definitely",49.5,Man,No,Straight / Heterosexual,White;European,None of the above,None of the above,Yes,People manager,22.0,Agree,Neither agree nor disagree,Neither agree nor disagree,Neither agree nor disagree,Disagree,Neither agree nor disagree,Neither agree nor disagree,1-2 times a week,1-2 times a week,1-2 times a week,Less than 15 minutes a day,Less than 15 minutes a day,Somewhat long,Innersource initiative;DevOps function;Microse...,Yes,No,No,Too long,Neither easy nor difficult,217548.0,0.93,219354.83871,89442.184173,54478.853047,298741.519124
25572,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Fully remote,Bootstrapping a business,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Other online resources (e.g., videos, blogs, f...",Technical documentation;Stack Overflow;Other (...,,20.0,20,"Developer, full-stack;Database administrator;D...","10,000 or more employees",I have some influence,Start a free trial;Visit developer communities...,Portugal,EUR,115000.0,Yearly,Bash/Shell;C#;Groovy;HTML/CSS;Java;JavaScript;...,,MariaDB;Microsoft SQL Server;MySQL;Oracle;Redis,,AWS;Google Cloud;Microsoft Azure,,ASP.NET;ASP.NET Core ;jQuery;Node.js;React.js,,.NET,,Docker;npm,,Emacs;Notepad++;Visual Studio;Visual Studio Code,,Linux-based;Windows,Linux-based;Windows;Windows Subsystem for Linu...,Git,Code editor;Dedicated version control GUI appl...,,,,,Google Chat;Microsoft Teams,,Unsure,Stack Overflow;Stack Exchange,A few times per week,Yes,A few times per month or weekly,Neutral,29.5,,,,,,,No,,,,,,,,,,,,,,,,,,,,Appropriate in length,Easy,122640.0,0.93,123655.913978,89442.184173,54478.853047,298741.519124
25900,I am a developer by profession,"Employed, full-time",Fully remote,Hobby,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","School (i.e., University, College, etc)",,,2.0,Less than 1 year,"Developer, QA or test",500 to 999 employees,I have little or no influence,Start a free trial;Ask developers I know/work ...,Portugal,EUR,28000.0,Monthly,Bash/Shell;Groovy;JavaScript;Python,Groovy;Java;JavaScript;Python,PostgreSQL,Cassandra;DynamoDB;MariaDB;MongoDB;PostgreSQL,AWS,AWS;Google Cloud,,,,,Ansible;Docker;npm,Ansible;Docker;npm,IntelliJ;PyCharm,IntelliJ;PyCharm,Linux-based,macOS,Git,Command-line,,,Confluence;Jira Work Management,,Google Chat;Zoom,,Indifferent,Stack Overflow,Daily or almost daily,No,,Neutral,29.5,Man,No,Straight / Heterosexual,European,None of the above,None of the above,No,,,,,,,,,,,,,,,,,,,,Appropriate in length,Neither easy nor difficult,358320.0,0.93,361290.322581,89442.184173,54478.853047,298741.519124
