In [2]:
!pip install pyreadstat

Collecting pyreadstat
  Downloading pyreadstat-1.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.0 kB)
Downloading pyreadstat-1.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: pyreadstat
Successfully installed pyreadstat-1.2.8

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [3]:
import pandas as pd
import pyreadstat

# Step 1: Create dummy data
data = {
    "UJNO": ["U12345", "U67890", "U11223", "U44556", "U78901"],
    "PERSONNO": [1, 2, 3, 1, 2],
    "Q12NIGHTS": [1, 0, 1, 1, 0],
    "Q13GENDER": [1, 2, 1, 2, 1],
    "Q15POPULATION": [1, 1, 2, 3, 4],
    "Q16MARITALSTATUS": [1, 0, 1, 1, 0],
    "Q17EDUCATION": [3, 4, 5, 6, 2],
    "Q18FIELD": [11, 12, 13, 14, 15],
    "Q19ATTE": [1, 1, 0, 0, 1],
    "Q110EDUI": [1, 2, 1, 3, 1],
    "Q20SELFRESPOND": [1, 1, 1, 0, 0],
    "Q24APDWRK": [1, 0, 1, 1, 1],
    "Q24BOWNBUSNS": [0, 1, 0, 1, 0],
    "Q24CUNPDWRK": [1, 1, 0, 1, 1],
    "Q25APDWRK": [0, 1, 1, 0, 1],
    "Q25BOWNBUSNS": [1, 0, 1, 0, 1],
    "Q25CUNPDWRK": [2, 1, 1, 2, 1],
    "Q27RSABSENT": [1, 2, 3, 4, 1],
    "Q27ATIME": [1, 1, 0, 0, 1],
    "Q27BRECPAY": [1, 1, 1, 0, 1],
    "Q31ALOOKWRK": [0, 1, 1, 0, 1],
    "Q31BSTARTBUSNS": [1, 0, 1, 1, 1],
    "Q31CTVPRWRK": [1, 1, 0, 1, 0],
    "Q3201REGISTER": [1, 0, 1, 1, 0],
}

# Step 2: Add variable labels and value labels
variable_labels = {
    "UJNO": "Unique number",
    "PERSONNO": "Person number",
    "Q12NIGHTS": "Stayed at least four nights",
    "Q13GENDER": "Gender",
    "Q15POPULATION": "Population group",
    "Q16MARITALSTATUS": "Marital status",
    "Q17EDUCATION": "Highest education level",
    "Q18FIELD": "Study field",
    "Q19ATTE": "Currently attending educational institution",
    "Q110EDUI": "Education Institution",
    "Q20SELFRESPOND": "Person himself/herself responding",
    "Q24APDWRK": "Paid work",
    "Q24BOWNBUSNS": "Own business",
    "Q24CUNPDWRK": "Unpaid work",
    "Q25APDWRK": "Have paid work to return to",
    "Q25BOWNBUSNS": "Have own business to return to",
    "Q25CUNPDWRK": "Have unpaid work to return to",
    "Q27RSABSENT": "Main reason absent from work",
    "Q27ATIME": "Time absent from work",
    "Q27BRECPAY": "Receive pay",
    "Q31ALOOKWRK": "Look for work",
    "Q31BSTARTBUSNS": "Start business",
    "Q31CTVPRWRK": "Working/business arrangement",
    "Q3201REGISTER": "Registered",
}

value_labels = {
    "Q12NIGHTS": {1: "Yes", 0: "No"},
    "Q13GENDER": {1: "Male", 2: "Female"},
    "Q15POPULATION": {
        1: "African/Black",
        2: "Coloured",
        3: "Indian/Asian",
        4: "White",
    },
    "Q16MARITALSTATUS": {1: "Married", 0: "Single"},
    "Q19ATTE": {1: "Yes", 0: "No"},
    "Q110EDUI": {
        1: "Pre-school",
        2: "Primary school",
        3: "High school",
        4: "University",
    },
    "Q27RSABSENT": {
        1: "Health reasons",
        2: "Family commitments",
        3: "Education",
        4: "Other",
    },
    "Q27ATIME": {1: "Within 3 months", 0: "More than 3 months"},
    "Q27BRECPAY": {1: "Yes", 0: "No"},
    "Q31ALOOKWRK": {1: "Yes", 0: "No"},
    "Q31BSTARTBUSNS": {1: "Yes", 0: "No"},
    "Q31CTVPRWRK": {1: "Full-time", 0: "Part-time"},
    "Q3201REGISTER": {1: "Yes", 0: "No"},
}

# Step 3: Convert data to DataFrame
df = pd.DataFrame(data)

# Step 4: Save as .sav file with metadata
sav_file_path = "../data/LabourForceSurvey_dummy.sav"  # Save in the 'data/' folder
pyreadstat.write_sav(
    df,
    sav_file_path,
    variable_value_labels=value_labels,
    column_labels=variable_labels
)

print(f"File saved as {sav_file_path}")


File saved as ../data/LabourForceSurvey_dummy.sav
