# Stepwise Regression using Logistic Regression 1

## import packages

In [240]:
import statsmodels.api as sm
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

## Import data

In [241]:
Encounters = pd.read_excel('/Users/suzejones/Downloads/Finalsbo-encounters-fy20-fy23-apr .xlsx')

In [242]:
Encounters.head()

Unnamed: 0,Fiscal Year,Month Grouping,Month (abbv),Component,Demographic,Citizenship Grouping,Title of Authority,Encounter Type,Encounter Count
0,2020,FYTD,OCT,Office of Field Operations,Accompanied Minors,El Salvador,Title 8,Inadmissibles,2
1,2020,FYTD,OCT,Office of Field Operations,Accompanied Minors,Guatemala,Title 8,Inadmissibles,1
2,2020,FYTD,OCT,Office of Field Operations,Accompanied Minors,Mexico,Title 8,Inadmissibles,91
3,2020,FYTD,OCT,Office of Field Operations,Accompanied Minors,Other,Title 8,Inadmissibles,8
4,2020,FYTD,OCT,Office of Field Operations,FMUA,El Salvador,Title 8,Inadmissibles,38


## Make sure all data types are Categorical except 'count'

In [243]:
Encounters.dtypes

Fiscal Year             object
Month Grouping          object
Month (abbv)            object
Component               object
Demographic             object
Citizenship Grouping    object
Title of Authority      object
Encounter Type          object
Encounter Count          int64
dtype: object

In [3]:
Encounters['Demographic'] =.astype(int)(Encounters['Demographic'])

NameError: name 'Encounters' is not defined

In [222]:
Encounters['Fiscal Year'] = pd.Categorical(Encounters['Fiscal Year'])

In [223]:
Encounters['Month (abbv)'] = pd.Categorical(Encounters['Month (abbv)'])

In [224]:
Encounters['Component'] = pd.Categorical(Encounters['Component'])

In [225]:
Encounters['Citizenship Grouping'] = pd.Categorical(Encounters['Citizenship Grouping'])

In [226]:
Encounters['Title of Authority'] = pd.Categorical(Encounters['Title of Authority'])

In [227]:
Encounters['Encounter Type'] = pd.Categorical(Encounters['Encounter Type'])

In [228]:
Encounters.dtypes

Fiscal Year             category
Month Grouping            object
Month (abbv)            category
Component               category
Demographic             category
Citizenship Grouping    category
Title of Authority      category
Encounter Type          category
Encounter Count            int64
dtype: object

## Drop columns not needed

In [229]:
Encounters1 = Encounters.drop(['Month Grouping'], axis=1)

In [230]:
Encounters1.head()

Unnamed: 0,Fiscal Year,Month (abbv),Component,Demographic,Citizenship Grouping,Title of Authority,Encounter Type,Encounter Count
0,2020,OCT,Office of Field Operations,Accompanied Minors,El Salvador,Title 8,Inadmissibles,2
1,2020,OCT,Office of Field Operations,Accompanied Minors,Guatemala,Title 8,Inadmissibles,1
2,2020,OCT,Office of Field Operations,Accompanied Minors,Mexico,Title 8,Inadmissibles,91
3,2020,OCT,Office of Field Operations,Accompanied Minors,Other,Title 8,Inadmissibles,8
4,2020,OCT,Office of Field Operations,FMUA,El Salvador,Title 8,Inadmissibles,38


## Map columns to numeric

#### Demographic Column

In [248]:
demographic_category_mapping = {
    'Accompanied Minors': 0,
    'FMUA': 1,
    'Single Adults': 2,
    'UC / Single Minors': 3
}

In [249]:
Encounters1['Demographic_encoded'] = Encounters1['Demographic'].map(demographic_category_mapping)

In [250]:
print(Encounters1[['Demographic', 'Demographic_encoded']].head())

          Demographic Demographic_encoded
0  Accompanied Minors                   0
1  Accompanied Minors                   0
2  Accompanied Minors                   0
3  Accompanied Minors                   0
4                FMUA                   1


#### Fiscal Year Column

In [234]:
unique_categories = Encounters1['Fiscal Year'].unique()

In [235]:
for category in unique_categories:
    print(category)

2020
2021
2022
2023 (FYTD)


In [236]:

Encounters1['Fiscal Year'] = Encounters1['Fiscal Year'].astype(int)



ValueError: Cannot cast object dtype to int64

In [216]:
Encounters1.dtypes

Fiscal Year             category
Month (abbv)            category
Component               category
Demographic             category
Citizenship Grouping    category
Title of Authority      category
Encounter Type          category
Encounter Count            int64
Demographic_encoded     category
FiscalYear_encoded       float64
dtype: object

In [211]:
Encounters1['FiscalYear_encoded'] = Encounters1['Fiscal Year'].map(fiscalyear_category_mapping)

In [212]:
print(Encounters1[['Fiscal Year', 'FiscalYear_encoded']].head())

  Fiscal Year  FiscalYear_encoded
0        2020                 NaN
1        2020                 NaN
2        2020                 NaN
3        2020                 NaN
4        2020                 NaN


In [213]:
Encounters1['FiscalYear_encoded'].fillna(0, inplace=True)

In [214]:
print(Encounters1)

      Fiscal Year Month (abbv)                   Component  \
0            2020          OCT  Office of Field Operations   
1            2020          OCT  Office of Field Operations   
2            2020          OCT  Office of Field Operations   
3            2020          OCT  Office of Field Operations   
4            2020          OCT  Office of Field Operations   
...           ...          ...                         ...   
2217  2023 (FYTD)          OCT          U.S. Border Patrol   
2218  2023 (FYTD)          OCT          U.S. Border Patrol   
2219  2023 (FYTD)          OCT          U.S. Border Patrol   
2220  2023 (FYTD)          OCT          U.S. Border Patrol   
2221  2023 (FYTD)          OCT          U.S. Border Patrol   

             Demographic Citizenship Grouping Title of Authority  \
0     Accompanied Minors          El Salvador            Title 8   
1     Accompanied Minors            Guatemala            Title 8   
2     Accompanied Minors               Mexico      

In [237]:
Encounters1.dtypes

Fiscal Year             category
Month (abbv)            category
Component               category
Demographic             category
Citizenship Grouping    category
Title of Authority      category
Encounter Type          category
Encounter Count            int64
Demographic_encoded     category
dtype: object

## Create a subset of predictor variables


In [165]:
predictors = Encounters1[['Fiscal Year', 'Month (abbv)', 'Demographic', 'Citizenship Grouping', 'Component', 
                         'Encounter Type', 'Title of Authority']]

In [166]:
print(predictors.dtypes)

Fiscal Year             category
Month (abbv)            category
Demographic             category
Citizenship Grouping    category
Component               category
Encounter Type          category
Title of Authority      category
dtype: object


## Add constant column if needed


In [167]:
predictors = sm.add_constant(predictors)


## Fit logistic regression model using stepwise selection

In [168]:
# Fit the logistic regression model with the encoded predictors
model = sm.Logit(Encounters1['Demographic'], predictors_encoded)
result = model.fit(method='step', direction='both')



ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data). The types seen wereNone and Fiscal Year                       category
Month (abbv)                      category
Demographic                         object
Citizenship Grouping              category
Component                         category
Encounter Type                    category
Title of Authority                category
Demographic_Accompanied Minors       uint8
Demographic_FMUA                     uint8
Demographic_Single Adults            uint8
Demographic_UC / Single Minors       uint8
dtype: object. The data was
0       Accompanied Minors
1       Accompanied Minors
2       Accompanied Minors
3       Accompanied Minors
4                     FMUA
               ...        
2217    UC / Single Minors
2218    UC / Single Minors
2219    UC / Single Minors
2220    UC / Single Minors
2221    UC / Single Minors
Name: Demographic, Length: 2222, dtype: category
Categories (4, object): ['Accompanied Minors', 'FMUA', 'Single Adults', 'UC / Single Minors']
and
       Fiscal Year Month (abbv)         Demographic Citizenship Grouping  \
0            2020          OCT  Accompanied Minors          El Salvador   
1            2020          OCT  Accompanied Minors            Guatemala   
2            2020          OCT  Accompanied Minors               Mexico   
3            2020          OCT  Accompanied Minors                Other   
4            2020          OCT                FMUA          El Salvador   
...           ...          ...                 ...                  ...   
2217  2023 (FYTD)          OCT  UC / Single Minors          El Salvador   
2218  2023 (FYTD)          OCT  UC / Single Minors            Guatemala   
2219  2023 (FYTD)          OCT  UC / Single Minors             Honduras   
2220  2023 (FYTD)          OCT  UC / Single Minors               Mexico   
2221  2023 (FYTD)          OCT  UC / Single Minors                Other   

                       Component Encounter Type Title of Authority  \
0     Office of Field Operations  Inadmissibles            Title 8   
1     Office of Field Operations  Inadmissibles            Title 8   
2     Office of Field Operations  Inadmissibles            Title 8   
3     Office of Field Operations  Inadmissibles            Title 8   
4     Office of Field Operations  Inadmissibles            Title 8   
...                          ...            ...                ...   
2217          U.S. Border Patrol  Apprehensions            Title 8   
2218          U.S. Border Patrol  Apprehensions            Title 8   
2219          U.S. Border Patrol  Apprehensions            Title 8   
2220          U.S. Border Patrol  Apprehensions            Title 8   
2221          U.S. Border Patrol  Apprehensions            Title 8   

      Demographic_Accompanied Minors  Demographic_FMUA  \
0                                  1                 0   
1                                  1                 0   
2                                  1                 0   
3                                  1                 0   
4                                  0                 1   
...                              ...               ...   
2217                               0                 0   
2218                               0                 0   
2219                               0                 0   
2220                               0                 0   
2221                               0                 0   

      Demographic_Single Adults  Demographic_UC / Single Minors  
0                             0                               0  
1                             0                               0  
2                             0                               0  
3                             0                               0  
4                             0                               0  
...                         ...                             ...  
2217                          0                               1  
2218                          0                               1  
2219                          0                               1  
2220                          0                               1  
2221                          0                               1  

[2222 rows x 11 columns]
before. After,
['Accompanied Minors' 'Accompanied Minors' 'Accompanied Minors' ...
 'UC / Single Minors' 'UC / Single Minors' 'UC / Single Minors']
[[2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 [2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 [2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 ...
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]].

## Encode Demographic column and add it in a new dataframe....

In [5]:
from sklearn.preprocessing import LabelEncoder

In [6]:
label_encoder = LabelEncoder()
Encounters1['Demographic_encoded'] = label_encoder.fit_transform(Encounters['Demographic'])

NameError: name 'Encounters' is not defined

In [7]:
Encounters1_encoded = Encounters1.copy()
Encounters1_encoded['Demographic_encoded'] = Encounters1['Demographic_encoded'].astype(float)

NameError: name 'Encounters1' is not defined

In [4]:
Encounters1_encoded.head()

NameError: name 'Encounters1_encoded' is not defined

## Create a dictionary mapping categories to numerical values

In [199]:

demographic_category_mapping = {
    'Accompanied Minors': 0,
    'FMUA': 1,
    'Single Adults': 2,
    'UC / Single Minors': 3
}

## Recode Predictor Variables to Numeric values

In [191]:
Predictors_label_encoder = LabelEncoder()

In [192]:
for column in predictors_encoded.columns:
    if predictors_encoded[column].dtype == 'object':
        predictors_encoded[column] = label_encoder.fit_transform(predictors_encoded[column])

## Add recode to dataframe

In [193]:
Encounters1['Demographic_encoded'] = Encounters1['Demographic'].map(category_mapping)

## Make sure it worked...

In [194]:
print(Encounters[['Demographic', 'Demographic_encoded']].head())

          Demographic  Demographic_encoded
0  Accompanied Minors                    0
1  Accompanied Minors                    0
2  Accompanied Minors                    0
3  Accompanied Minors                    0
4                FMUA                    1


## Recode Predictor variables to numeric

In [195]:
Predictors_label_encoder = LabelEncoder()

In [196]:
for column in predictors_encoded.columns:
    if predictors_encoded[column].dtype == 'object':
        predictors_encoded[column] = label_encoder.fit_transform(predictors_encoded[column])

### Make sure it recoded

In [197]:
print(predictors_encoded.dtypes)

Fiscal Year                       category
Month (abbv)                      category
Demographic                          int64
Citizenship Grouping              category
Component                         category
Encounter Type                    category
Title of Authority                category
Demographic_Accompanied Minors       uint8
Demographic_FMUA                     uint8
Demographic_Single Adults            uint8
Demographic_UC / Single Minors       uint8
dtype: object


In [189]:
model = sm.Logit(Encounters1_encoded['Demographic_encoded'], predictors_encoded.values)
result = model.fit(method='step', direction='both')

ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data). The types seen wereNone and None. The data was
0       0.0
1       0.0
2       0.0
3       0.0
4       1.0
       ... 
2217    3.0
2218    3.0
2219    3.0
2220    3.0
2221    3.0
Name: Demographic_encoded, Length: 2222, dtype: float64
and
 [[2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 [2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 [2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 ...
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]]
before. After,
[0. 0. 0. ... 3. 3. 3.]
[[2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 [2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 [2020 'OCT' 'Accompanied Minors' ... 0 0 0]
 ...
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]
 ['2023 (FYTD)' 'OCT' 'UC / Single Minors' ... 0 0 1]].

In [172]:
model = sm.Logit(Encounters1['Demographic_encoded'], predictors_encoded.values)
result = model.fit(method='step', direction='both')

KeyError: 'Demographic_encoded'

In [173]:
Encounters1['Demographic_encoded'] = Encounters1['Demographic_encoded'].astype(float)
model = sm.Logit(Encounters1['Demographic_encoded'], predictors_encoded.values)
result = model.fit(method='step', direction='both')

KeyError: 'Demographic_encoded'

In [174]:
Encounters1_encoded = Encounters1.copy()
Encounters1_encoded['Demographic_encoded'] = Encounters1['Demographic_encoded'].astype(float)

KeyError: 'Demographic_encoded'