# Statistical Analysis

## Configuration

In [None]:
# Check free memory available
%system free -m

In [None]:
# Import the necessary libraries
import os

# Statistical libraries
from statsmodels.formula.api import logit
from sklearn.preprocessing import LabelEncoder

# Google cloud libraries
from google.cloud import bigquery
from google.cloud import storage

# Pandas and BigQuery
import pandas_gbq as pdg
import pandas as pd

In [None]:
# Current working directory
os.getcwd()

In [None]:
# Set output data and output locations
raw_data = "../data/raw/"
interim_data = "../data/interim/"
processed_data = "../data/processed/"

figures = "../reports/figures/"
config = "../config/"

## Data Loading

### neet_chd

In [None]:
# Set the filename for your CSV file
csv_filename = "neet_chd.csv"

# Combine the path and filename
csv_filepath = os.path.join(processed_data, csv_filename)

# Load the CSV file into a DataFrame
neet_chd_df = pd.read_csv(csv_filepath)

# Display the loaded DataFrame
neet_chd_df

## Statistical Analysis

### Logistic Reression

In [None]:
# Encode the columns of interest
label_encoder = LabelEncoder()

In [None]:
# Apply the encoding method to each of the columns
ever_neet_status_values = label_encoder.fit_transform(neet_chd_df["ever_neet_status"])
persistent_neet_status_values = label_encoder.fit_transform(neet_chd_df["persistent_neet_status"])
chd_status_values = label_encoder.fit_transform(neet_chd_df["chd_status"])

In [None]:
# Create the encoded DataFrame by initializing the DataFrame object for each column
ever_neet_status = pd.DataFrame(ever_neet_status_values, columns=["ever_neet_status"])
persistent_neet_status = pd.DataFrame(persistent_neet_status_values, columns=["persistent_neet_status"])
chd_status = pd.DataFrame(chd_status_values, columns=["chd_status"])

In [None]:
# Replace the initial columns with the encoded columns
neet_chd_df["ever_neet_status"] = ever_neet_status
neet_chd_df["persistent_neet_status"] = persistent_neet_status
neet_chd_df["chd_status"] = chd_status
neet_chd_df

In [None]:
mdl_ever_neet_vs_chd = logit("ever_neet_status ~ chd_status", data=neet_chd_df).fit()

In [None]:
mdl_ever_neet_vs_chd.params

In [None]:
conf_matrix = mdl_ever_neet_vs_chd.pred_table()
conf_matrix

In [None]:
mdl_ever_neet_vs_chd.summary()

In [None]:
mdl_persistent_neet_vs_chd = logit("persistent_neet_status ~ chd_status", data=neet_chd_df).fit()

In [None]:
mdl_persistent_neet_vs_chd.params

In [None]:
conf_matrix = mdl_persistent_neet_vs_chd.pred_table()
conf_matrix

In [None]:
mdl_persistent_neet_vs_chd.summary()