### Function 1

In [1]:
import pandas as pd

def calculate_salary_impact(occupation_title, data):
    # Convert 'TOT_EMP' to numeric, setting errors to NaN
    data['TOT_EMP'] = pd.to_numeric(data['TOT_EMP'], errors='coerce')

    # Convert 'A_MEDIAN' to numeric, setting errors to NaN
    data['A_MEDIAN'] = pd.to_numeric(data['A_MEDIAN'], errors='coerce')

    # Filter the data for the specified occupation
    filtered_data = data[data['OCC_TITLE'] == occupation_title]

    # Exclude rows where 'TOT_EMP' or 'A_MEDIAN' is NaN
    filtered_data = filtered_data[filtered_data['TOT_EMP'].notna() & filtered_data['A_MEDIAN'].notna()]

    # Calculate total US employment for this occupation
    total_us_employment = filtered_data['TOT_EMP'].sum()

    # Calculate the percentage of employment for each state
    filtered_data['EMP_PERCENTAGE'] = (filtered_data['TOT_EMP'] / total_us_employment) * 100

    # Calculate the weighted median salary for each state based on employment percentage
    filtered_data['WEIGHTED_MEDIAN_SALARY'] = filtered_data['EMP_PERCENTAGE'] * filtered_data['A_MEDIAN'] / 100
    
#     return filtered_data.sort_values(by='WEIGHTED_MEDIAN_SALARY', ascending=False)

    return filtered_data[['AREA_TITLE', 'OCC_TITLE', 'TOT_EMP', 'EMP_PERCENTAGE', 'A_MEDIAN', 'WEIGHTED_MEDIAN_SALARY']].sort_values(by='WEIGHTED_MEDIAN_SALARY', ascending=False)

# Load your CSV file
file_path = 'filtered_state_M2022_dl.csv'  # Replace with the path to your CSV file
data = pd.read_csv(file_path)

# Specify the occupation title
occupation_title = "Computer and Mathematical Occupations"  # Replace with the desired occupation title

# Get the result
weighted_salary_result = calculate_salary_impact(occupation_title, data)
sorted_weighted_salary_result = calculate_salary_impact(occupation_title, data)
print(weighted_salary_result.head())  # This will display the first few rows of the result


      AREA_TITLE                              OCC_TITLE   TOT_EMP  \
95    California  Computer and Mathematical Occupations  695480.0   
992        Texas  Computer and Mathematical Occupations  444670.0   
739     New York  Computer and Mathematical Occupations  284680.0   
1061    Virginia  Computer and Mathematical Occupations  241590.0   
1084  Washington  Computer and Mathematical Occupations  204330.0   

      EMP_PERCENTAGE  A_MEDIAN  WEIGHTED_MEDIAN_SALARY  
95         13.862495    128690            17839.645126  
992         8.863283     94560             8381.119994  
739         5.674319    106360             6035.205332  
1061        4.815437    109910             5292.646966  
1084        4.072761    128220             5222.093845  


### Function3

In [2]:
import pandas as pd
import re

# Load the file
new_file_path = 'filtered_income_tax_2022_manual.csv'  # Replace with your file path
new_tax_data = pd.read_csv(new_file_path)

# Function to find the tax rate
def find_tax_rate(state, income, data):
    """
    Find the tax rate based on state and income.

    Args:
    state (str): The state name.
    income (float): The income amount.
    data (DataFrame): The DataFrame with tax information.

    Returns:
    float: The corresponding tax rate.
    """
    # Filter data for the specific state
    state_data = data[data['State'].str.contains(state, case=False, na=False)]

    # Sort by Bracket to ensure correct order
    state_data = state_data.sort_values(by='Bracket')

    # Initialize default rate
    rate = None

    # Find the correct bracket for the given income
    for index, row in state_data.iterrows():
        if income > row['Bracket']:
            rate = row['Rates']
        else:
            break

    return rate

# Example usage
test_state = 'California'
test_income = 100000
tax_rate = find_tax_rate(test_state, test_income, new_tax_data)
print(f"The tax rate for {test_income} in {test_state} is: {tax_rate * 100}%")
test_state = 'Federal'
tax_rate = find_tax_rate(test_state, test_income, new_tax_data)
print(f"The tax rate for {test_income} in {test_state} is: {tax_rate * 100}%")

The tax rate for 100000 in California is: 9.3%
The tax rate for 100000 in Federal is: 24.0%
