<h3>Analysing Food Security in Kenya</h3>

In [41]:
# import libraries
import pandas as pd
import pyreadstat as pr
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import  stats
import warnings
import statsmodels.api as sm
import logging

In [3]:
# Ignore warnings
warnings.filterwarnings("ignore")

In [6]:
# Check the data directory
import os
# List all files and directories in the data folder
for root, dirs, files in os.walk("data"):
    for file in files:
        file_path = os.path.join(root, file)
        print(f"File: {file_path}")
      



File: data/SPSS Dataset Baseline Current.sav


In [8]:
# Set file path
SAV_FILE_PATH = "data/SPSS Dataset Baseline Current.sav"

# Create a folder for plots if it doesn't exist inorder to:
if not os.path.exists("plots"):
    os.makedirs("plots")

# Load the SPSS FILE
try:
    df, meta = pr.read_sav(SAV_FILE_PATH)
    print("✅ SAV file loaded successfully!")
    print(f"📊 Dataset shape: {df.shape}")
except Exception as e:
    print(f"❌ Error loading SAV file: {e}")
    exit()


# Basic information about the DataFrame
print("\n--- DataFrame Info ---")
df.info()


# First few rows to get a glimpse of the data
print("\n--- First 5 Rows ---")
print(df.head())


✅ SAV file loaded successfully!
📊 Dataset shape: (944, 547)

--- DataFrame Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 944 entries, 0 to 943
Columns: 547 entries, Country to HOUSEHOLD_SIZE
dtypes: float64(527), object(20)
memory usage: 3.9+ MB

--- First 5 Rows ---
   Country  Studysite  HH_code  B_HH_type  B_HH_count  B_m1_gender  \
0      1.0       12.0  12009.0        1.0         7.0          1.0   
1      1.0       12.0  12008.0        3.0         5.0          2.0   
2      2.0       22.0  22177.0        1.0        14.0          1.0   
3      2.0       22.0  22265.0        1.0         7.0          1.0   
4      2.0       21.0  21226.0        1.0         8.0          1.0   

   B_m1_resp_age  B_m1_relation_head  B_m1_school_years  B_m1_home_occup  ...  \
0           62.0                 1.0                0.0              1.0  ...   
1           60.0                 1.0                0.0              1.0  ...   
2           39.0                 1.0                0.0

In [9]:

# Get descriptive statistics for all columns
print("\n--- Descriptive Statistics ---")
print(df.describe(include="all"))


--- Descriptive Statistics ---
           Country   Studysite       HH_code   B_HH_type  B_HH_count  \
count   944.000000  944.000000    944.000000  943.000000  944.000000   
unique         NaN         NaN           NaN         NaN         NaN   
top            NaN         NaN           NaN         NaN         NaN   
freq           NaN         NaN           NaN         NaN         NaN   
mean      1.549788   17.029661  17139.824153    1.193001    5.829449   
std       0.497779    5.041837   5055.312377    0.590866    2.048599   
min       1.000000   11.000000  11001.000000    1.000000    1.000000   
25%       1.000000   12.000000  12014.750000    1.000000    4.000000   
50%       2.000000   21.000000  21047.500000    1.000000    6.000000   
75%       2.000000   22.000000  22053.250000    1.000000    7.000000   
max       2.000000   22.000000  22289.000000    3.000000   15.000000   

        B_m1_gender  B_m1_resp_age  B_m1_relation_head  B_m1_school_years  \
count    944.000000     94

<h4>Accesing variable labels</h4>


In [23]:
# Print each column name alongside its descriptive label from the SPSS metadata
print("\n--- Variable Labels with Column Names ---")
for col_name, label in zip(meta.column_names, meta.column_labels):
    print(f"{col_name}: {label}")


--- Variable Labels with Column Names ---
Country: country of study site
Studysite: study site
HH_code: HH code
B_HH_type: HH type
B_HH_count: HH size
B_m1_gender: HH member1 gender
B_m1_resp_age: HH member1 age
B_m1_relation_head: HH member1 relation to HH head
B_m1_school_years: HH member1 school years
B_m1_home_occup: HH member1 at home
B_HH_m1_act_crop_farm: HH member1 crop farm
B_HH_m1_act_liv_keep: HH member1 keep livestock
B_HH_m1_act_poultry_keep: HH member1 keep poultry
B_HH_m1_act_salaried: HH member1 salary job
B_HH_m1_act_unemployed: HH member1 unemployed
B_m1_marital_status: HH member1 marital status
B_HH_m2_code: HH member2 code
B_m2_gender: HH member2 gender
B_m2_resp_age: HH member2 age
B_m2_relation_head: HH member2 relation to HH head
B_m2_school_years: HH member2 school years
B_m2_home_occup: HH member2 at home
B_m2_marital_status: HH member2 marital status
B_presence_child_6_59: index child aged 6-59m in HH
B_presence_mother_6_59: mother of child aged 6-59m availab

In [26]:
# Accesing individual labels
print("\n--- Variables with Value Labels ---")
print(list(meta.value_labels.keys()))



--- Variables with Value Labels ---
['labels0', 'labels1', 'labels2', 'labels3', 'labels4', 'labels5', 'labels6', 'labels7', 'labels8', 'labels9', 'labels10', 'labels11', 'labels12', 'labels13', 'labels14', 'labels15', 'labels16', 'labels17']


In [28]:
#  Print the contents of each label group
for key, value_map in meta.value_labels.items():
    print(f"{key}: {value_map}")

labels0: {1.0: 'Kenya', 2.0: 'Uganda'}
labels1: {11.0: 'West Pokot', 12.0: 'Turkana', 21.0: 'Napak', 22.0: 'Moroto'}
labels2: {1.0: 'male headed', 3.0: 'female headed', 6.0: 'child headed', 888.0: 'n.a.', 999.0: 'missing'}
labels3: {1.0: 'male', 2.0: 'female', 888.0: 'n.a.', 999.0: 'missing'}
labels4: {1.0: 'head', 2.0: 'spouse', 3.0: 'son', 4.0: 'daughter', 5.0: 'sister', 6.0: 'brother', 7.0: 'grandchild', 8.0: 'father', 9.0: 'mother', 66.0: 'other', 888.0: 'n.a.', 999.0: 'missing'}
labels5: {1.0: 'permanent home (10-12month/yr)', 2.0: 'freq.away (3-9m/yr)', 3.0: 'sometimes away(<3m/yr)=cat.1!'}
labels6: {0.0: 'no', 1.0: 'yes', 88.0: "don't know", 888.0: 'n.a.', 999.0: 'missing'}
labels7: {1.0: 'single', 2.0: 'monogamous', 3.0: 'polygamous', 4.0: 'divorced/separated', 5.0: 'widowed', 6.0: 'cohabiting (mono/polyg.)'}
labels8: {1.0: 'livestock', 2.0: 'crop', 3.0: 'livestock+crop', 4.0: 'other'}
labels9: {1.0: 'increase', 2.0: 'no change', 3.0: 'decrease'}
labels10: {1.0: 'male resp./hus

In [29]:
# Dictionary of all value labels
label_dictionary = {
    'labels0': {
        1.0: 'Kenya', 2.0: 'Uganda'
    },
    'labels1': {
        11.0: 'West Pokot', 12.0: 'Turkana', 21.0: 'Napak', 22.0: 'Moroto'
    },
    'labels2': {
        1.0: 'male headed', 3.0: 'female headed', 6.0: 'child headed',
        888.0: 'n.a.', 999.0: 'missing'
    },
    'labels3': {
        1.0: 'male', 2.0: 'female',
        888.0: 'n.a.', 999.0: 'missing'
    },
    'labels4': {
        1.0: 'head', 2.0: 'spouse', 3.0: 'son', 4.0: 'daughter', 5.0: 'sister',
        6.0: 'brother', 7.0: 'grandchild', 8.0: 'father', 9.0: 'mother',
        66.0: 'other', 888.0: 'n.a.', 999.0: 'missing'
    },
    'labels5': {
        1.0: 'permanent home (10-12month/yr)',
        2.0: 'freq.away (3-9m/yr)',
        3.0: 'sometimes away(<3m/yr)=cat.1!'
    },
    'labels6': {
        0.0: 'no', 1.0: 'yes',
        88.0: "don't know", 888.0: 'n.a.', 999.0: 'missing'
    },
    'labels7': {
        1.0: 'single', 2.0: 'monogamous', 3.0: 'polygamous',
        4.0: 'divorced/separated', 5.0: 'widowed',
        6.0: 'cohabiting (mono/polyg.)'
    },
    'labels8': {
        1.0: 'livestock', 2.0: 'crop',
        3.0: 'livestock+crop', 4.0: 'other'
    },
    'labels9': {
        1.0: 'increase', 2.0: 'no change', 3.0: 'decrease'
    },
    'labels10': {
        1.0: 'male resp./husband', 2.0: 'wife',
        3.0: 'both husband+wife', 66.0: 'other'
    },
    'labels11': {
        1.0: 'extremely', 2.0: 'much', 3.0: 'moderately',
        4.0: 'slightly', 5.0: 'not at all',
        888.0: 'n.a.', 999.0: '.'
    },
    'labels12': {
        1.0: 'Rarely (1-2 times)', 2.0: 'Sometimes (3-10 times)',
        3.0: 'Often (>10 times)'
    },
    'labels13': {
        1.0: 'Away', 2.0: 'Ill', 3.0: 'Dead'
    },
    'labels14': {
        1.0: 'severe', 2.0: 'moderate', 3.0: 'at risk',
        4.0: 'healthy', 888.0: 'n.a.'
    },
    'labels15': {
        1.0: 'severe-moderate, acute', 2.0: 'at risk', 3.0: 'healthy'
    },
    'labels16': {
        1.0: 'severe-moderate, acute', 2.0: 'at risk', 3.0: 'healthy'
    },
    'labels17': {
        1.0: 'underweight', 2.0: 'healthy',
        3.0: 'overweight', 4.0: 'obese'
    }
}


In [31]:
label = label_dictionary.get('labels3', {}).get(1.0, 'Unknown')
print(label)  # Output: male


male


In [36]:
# Loop through variables that have value labels and apply them directly
for column_name, label_mapping in meta.value_labels.items():
    if column_name in df.columns:
        df[f"{column_name}_label"] = df[column_name].map(label_mapping)
        print(f"Labeled column created: {column_name}_label")


In [42]:
# Save DataFrame with labeled columns to CSV
df.to_csv("labeled_data.csv", index=False)
print("Labeled data saved to labeled_data.csv")


Labeled data saved to labeled_data.csv


In [47]:

# --- Setup logging ---
logging.basicConfig(
    filename="data_analysis.log",
    level=logging.INFO,
    format='%(levelname)s: %(message)s'
)

# --- Load CSV with graceful error handling ---
"""
try:
    labelled_data = pd.read_csv("labeled_data.csv")
    logging.info("CSV file loaded successfully.")
    
except FileNotFoundError:
    logging.error("CSV file not found. Please check the file path.")
except pd.errors.ParserError:
    logging.error("Parsing error: File may be corrupt or not properly formatted.")
    
except Exception as e:
    logging.error(f"An unexpected error occurred: {e}")
"""

'\ntry:\n    labelled_data = pd.read_csv("labeled_data.csv")\n    logging.info("CSV file loaded successfully.")\n\nexcept FileNotFoundError:\n    logging.error("CSV file not found. Please check the file path.")\nexcept pd.errors.ParserError:\n    logging.error("Parsing error: File may be corrupt or not properly formatted.")\n\nexcept Exception as e:\n    logging.error(f"An unexpected error occurred: {e}")\n'

In [48]:
labelled_data = pd.read_csv("labeled_data.csv")
labelled_data.head()


Unnamed: 0,Country,Studysite,HH_code,B_HH_type,B_HH_count,B_m1_gender,B_m1_resp_age,B_m1_relation_head,B_m1_school_years,B_m1_home_occup,...,Q7F,Q8,Q8F,Q9,Q9F,HFIAS,MAHFP_total,EXPENDITURE_EXCEEDING_INCOME,HOUSEHOLD_HEAD_AGE,HOUSEHOLD_SIZE
0,1.0,12.0,12009.0,1.0,7.0,1.0,62.0,1.0,0.0,1.0,...,2.0,1.0,2.0,1.0,2.0,4.0,2.0,1.0,4.0,2.0
1,1.0,12.0,12008.0,3.0,5.0,2.0,60.0,1.0,0.0,1.0,...,2.0,1.0,3.0,1.0,2.0,4.0,2.0,0.0,3.0,1.0
2,2.0,22.0,22177.0,1.0,14.0,1.0,39.0,1.0,0.0,1.0,...,1.0,1.0,1.0,0.0,,4.0,3.0,1.0,3.0,2.0
3,2.0,22.0,22265.0,1.0,7.0,1.0,58.0,1.0,0.0,1.0,...,1.0,1.0,1.0,1.0,1.0,4.0,2.0,0.0,3.0,2.0
4,2.0,21.0,21226.0,1.0,8.0,1.0,36.0,1.0,0.0,1.0,...,1.0,1.0,1.0,1.0,1.0,4.0,1.0,0.0,3.0,2.0
