## Software Engineer Salary 2024 USA

This is a dataset with salary information for Software Engineers in 2024, broken down across different states or regions in the United States.
Source from Kaggle: https://www.kaggle.com/datasets/emreksz/software-engineer-jobs-and-salaries-2024/data

In [1]:
import pandas as pd
import re
import json

Reduce rows of data to 50 and save it to *Software Engineer Salaries.csv*

In [2]:
df = pd.read_csv('Software Engineer Salaries.csv')
df = df.head(50)
def extract_salary_and_source(salary_column):
    # Regex to capture salary range and source
    salary_match = re.match(r'(\$\d+K\s*-\s*\$\d+K)\s*\((.*?)\)', salary_column)
    if salary_match:
        salary = salary_match.group(1)
        source = salary_match.group(2)
        return salary, source
    else:
        return salary_column, None

df['Salary'], df['Source'] = zip(*df['Salary'].apply(extract_salary_and_source))

df.head(10)

Unnamed: 0,Company,Company Score,Job Title,Location,Date,Salary,Source
0,ViewSoft,4.8,Software Engineer,"Manassas, VA",8d,$68K - $94K,Glassdoor est.
1,Workiva,4.3,Software Support Engineer,Remote,2d,$61K - $104K,Employer est.
2,"Garmin International, Inc.",3.9,C# Software Engineer,"Cary, NC",2d,$95K - $118K,Glassdoor est.
3,Snapchat,3.5,"Software Engineer, Fullstack, 1+ Years of Expe...","Los Angeles, CA",2d,$97K - $145K,Employer est.
4,Vitesco Technologies Group AG,3.1,Software Engineer,"Seguin, TX",2d,$85K - $108K,Glassdoor est.
5,Spotify,3.9,Backend Engineer II,"New York, NY",1d,$123K - $175K,Employer est.
6,Infor,4.0,Associate Software Engineer,"Alpharetta, GA",7d,$77K - $94K,Glassdoor est.
7,Amerisoft Corporation,5.0,Software Developers,"Farmington Hills, MI",30d+,$71K - $100K,Glassdoor est.
8,WHOOP,3.3,"Software Engineer II (Backend, Health)","Boston, MA",10d,$94K - $148K,Glassdoor est.
9,PFF,4.2,Sr. Software Engineer,Remote,1d,$147K - $189K,Employer est.


In [3]:
#Update columns
df.to_csv('Software Engineer Salaries (Updated).csv', index=False)
#Convert from csv to json format
json_data = df.to_json(orient='records', indent=4)

bulk_docs_format = {
    "docs": json.loads(json_data)
}

with open('Software Engineer Salaries.json', 'w') as json_file:
    json.dump(bulk_docs_format, json_file, indent=4)

print(json_data)

[
    {
        "Company":"ViewSoft",
        "Company Score":4.8,
        "Job Title":"Software Engineer",
        "Location":"Manassas, VA",
        "Date":"8d",
        "Salary":"$68K - $94K",
        "Source":"Glassdoor est."
    },
    {
        "Company":"Workiva",
        "Company Score":4.3,
        "Job Title":"Software Support Engineer",
        "Location":"Remote",
        "Date":"2d",
        "Salary":"$61K - $104K",
        "Source":"Employer est."
    },
    {
        "Company":"Garmin International, Inc.",
        "Company Score":3.9,
        "Job Title":"C# Software Engineer",
        "Location":"Cary, NC",
        "Date":"2d",
        "Salary":"$95K - $118K",
        "Source":"Glassdoor est."
    },
    {
        "Company":"Snapchat",
        "Company Score":3.5,
        "Job Title":"Software Engineer, Fullstack, 1+ Years of Experience",
        "Location":"Los Angeles, CA",
        "Date":"2d",
        "Salary":"$97K - $145K",
        "Source":"Employer est."
    },
 

### Mango Queries

1. {
   "selector": {
      "Company": {
         "$eq": "PFF"
      }
   },
   "fields": [
      "_id",
      "_rev",
      "Company",
      "Company Score",
      "Job Title",
      "Location",
      "Date",
      "Salary",
      "Source"
   ],
   "sort": [],
   "limit": 10,
   "skip": 0,
   "execution_stats": true
}

2. {
   "selector": {
      "Company Score": {
         "$gt": 4,
         "$lt": 4.3
      }
   },
   "fields": [
      "_id",
      "_rev",
      "Company",
      "Company Score",
      "Job Title",
      "Location",
      "Date",
      "Salary",
      "Source"
   ],
   "sort": [],
   "limit": 10,
   "skip": 0,
   "execution_stats": true
}

## Couch DB Client Set Up (PyCouchDB)

In [4]:
import pycouchdb

# Connect to the server with basic authentication
server = pycouchdb.Server("http://henry:henry123@localhost:5984/")
db = server.database("software-engineer-salary-data")

# Get a document by id, returns a Document object
# 4582d66be87fbe3f3bc94a162d00ccc2 as ID, company name would return "Boston Children's Hospital"
doc = db.get("4582d66be87fbe3f3bc94a162d00ccc2")
json_doc = json.dumps(doc, indent=4)
print(json_doc)

{
    "_id": "4582d66be87fbe3f3bc94a162d00ccc2",
    "_rev": "1-348f93da26e592362d03f9009e13ee53",
    "Company": "Boston Children's Hospital",
    "Company Score": 4.1,
    "Job Title": "Software Engineer - HA (Hybrid)",
    "Location": "Boston, MA",
    "Date": "30d+",
    "Salary": "$102K - $173K",
    "Source": "Employer est."
}
