In [6]:
import pandas as pd

# Load the dataset
file_path = 'Korea Income and Welfare.csv'
data = pd.read_csv(file_path)

# Map region codes to region names
region_map = {
    1: "Seoul",
    2: "Kyeong-gi",
    3: "Kyoung-nam",
    4: "Kyoung-buk",
    5: "Chung-nam",
    6: "Gang-won & Chung-buk",
    7: "Jeolla & Jeju"
}

# Map gender codes to gender names
gender_map = {
    1: "Male",
    2: "Female"
}

# Map education level codes to education level names
education_level_map = {
    1: "No education (under 7 yrs-old)",
    2: "No education (7 & over 7 yrs-old)",
    3: "Elementary",
    4: "Middle school",
    5: "High school",
    6: "College",
    7: "University degree",
    8: "MA",
    9: "Doctoral degree"
}

# Map religion codes to religion names
religion_map = {
    1: "Have religion",
    2: "Do not have"
}

# Map reason_none_worker codes to reason names
reason_none_worker_map = {
    1: "No capable",
    2: "In military service",
    3: "Studying in school",
    4: "Prepare for school",
    5: "Prepare to apply job",
    6: "House worker",
    7: "Caring kids at home",
    8: "Nursing",
    9: "Giving-up economic activities",
    10: "No intention to work",
    11: "Others"
}

# Apply the mappings
data['region'] = data['region'].map(region_map)
data['gender'] = data['gender'].map(gender_map)
data['education_level'] = data['education_level'].map(education_level_map)
data['religion'] = data['religion'].map(religion_map)
data['reason_none_worker'] = data['reason_none_worker'].map(reason_none_worker_map)


# Create a column for age
data['age'] = data['year'] - data['year_born']

# Function to calculate migration for each person
def calculate_migration(df):
    df = df.sort_values('year')
    df['next_region'] = df['region'].shift(-1)
    df['moved'] = df['region'] != df['next_region']
    return df

# Apply the migration calculation
data = data.groupby('id').apply(calculate_migration)

# Filter out the rows where no movement occurred
migration_data = data[data['moved'] == True]

# Calculate increase/decrease rate for each column
def calculate_change_rate(df, column):
    count_before = df[df['year'] == df['year'].min()][column].value_counts()
    count_after = df[df['year'] == df['year'].max()][column].value_counts()
    change_rate = ((count_after - count_before) / count_before) * 100
    return change_rate

# List of columns to analyze
columns_to_analyze = ['gender', 'income', 'education_level', 'religion', 'occupation', 'company_size', 'reason_none_worker']

# Calculate and display change rates
for column in columns_to_analyze:
    change_rate = calculate_change_rate(migration_data, column)
    print(f"Change rate for {column}:\n{change_rate}\n")

# Display the migration data
migration_data.head()


Change rate for gender:
gender
Male       876.525822
Female    1709.166667
Name: count, dtype: float64

Change rate for income:
income
-46897.0    NaN
-10035.0    NaN
-7600.0     NaN
-7000.0     NaN
-1727.0     NaN
             ..
 34153.0    NaN
 45461.0    NaN
 66100.0    NaN
 66531.0    NaN
 170204.0   NaN
Name: count, Length: 4619, dtype: float64

Change rate for education_level:
education_level
College                              1144.736842
Doctoral degree                              NaN
Elementary                           2466.071429
High school                           684.090909
MA                                    659.090909
Middle school                        1387.931034
No education (7 & over 7 yrs-old)    3676.470588
University degree                     637.777778
Name: count, dtype: float64

Change rate for religion:
religion
Do not have      1108.791209
Have religion    1010.256410
Name: count, dtype: float64

Change rate for occupation:
occupation
        1820.47

Unnamed: 0_level_0,Unnamed: 1_level_0,id,year,wave,region,income,family_member,gender,year_born,education_level,marriage,religion,occupation,company_size,reason_none_worker,age,next_region,moved
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
10101,7,10101,2017,13,Seoul,851.0,1,Female,1936,No education (7 & over 7 yrs-old),2,Do not have,,,,81,,True
20101,21,20101,2018,14,Seoul,2330.0,1,Female,1945,Middle school,2,Have religion,,,,73,,True
30101,35,30101,2018,14,Seoul,815.0,1,Male,1948,Elementary,2,Do not have,,,,70,,True
40101,49,40101,2018,14,Seoul,2116.0,1,Male,1942,University degree,3,Have religion,762.0,2.0,,76,,True
50101,50,50101,2005,1,Seoul,4552.0,4,Male,1955,High school,1,Have religion,415.0,1.0,,50,,True
