In [1]:
# data handling
import numpy as np
import pandas as pd

# Get multiple outputs from one cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Ignore those ugly warning messages
import warnings
warnings.filterwarnings('ignore')

# Birth Weight Prediction

## Strategy

## Data exploration
- Upload the data ✅
- Descriptive statistics ✅
- Rename columns ✅
- Resort columns 
- Cross-features correlations

In [2]:
df = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [7]:
dfs = [df, df_test]
column_mapping = {
    'ATTEND': 'Attendant at Birth',
    'BFACIL': 'Birth Place',
    'BMI': 'Body Mass Index',
    'CIG_0': 'Cigarettes Before Pregnancy',
    'DLMP_MM': 'Last Normal Menses Month',
    'DMAR': 'Marital Status',
    'DOB_MM': 'Birth Month',
    'DOB_TT': 'Time of Birth',
    'DOB_WK': 'Birth Day of Week',
    'FAGECOMB': 'Father’s Combined Age',
    'FEDUC': 'Father’s Education',
    'ILLB_R': 'Interval Since Last Live Birth Recode',
    'ILOP_R': 'Interval Since Last Other Pregnancy Recode',
    'ILP_R': 'Interval Since Last Pregnancy Recode',
    'LD_INDL': 'Induction of Labor',
    'MAGER': 'Mother’s Single Years of Age',
    'MBSTATE_REC': 'Mother’s Nativity',
    'MEDUC': 'Mother’s Education',
    'M_Ht_In': 'Mother’s Height in Total Inches',
    'NO_INFEC': 'No Infections Reported',
    'NO_MMORB': 'No Maternal Morbidity Reported',
    'NO_RISKS': 'No Risk Factors Reported',
    'PAY': 'Payment Source for Delivery',
    'PAY_REC': 'Payment Recode',
    'PRECARE': 'Month Prenatal Care Began',
    'PREVIS': 'Number of Prenatal Visits',
    'PRIORDEAD': 'Prior Births Now Dead',
    'PRIORLIVE': 'Prior Births Now Living',
    'PRIORTERM': 'Prior Other Terminations',
    'PWgt_R': 'Pre-pregnancy Weight Recode',
    'RDMETH_REC': 'Delivery Method Recode',
    'RESTATUS': 'Residence Status',
    'RF_CESAR': 'Previous Cesarean',
    'RF_CESARN': 'Number of Previous Cesareans',
    'SEX': 'Sex of Infant',
    'WTGAIN': 'Weight Gain'
}

for df in dfs:
    df.rename(columns=column_mapping, inplace=True)

In [5]:
# Describing the dataset

print('--------------')
print('Rows, Columns:')
print('--------------')
df.shape
print('---------------')
print("Columns' names:")
print('---------------')
df.columns
print('-----------------')
print('Column / Datatype:')
print('-----------------')
df.dtypes
print('-----------------')
df.head(5)
print('---------------------')
print('Descriptive statistic:')
print('---------------------')
round(df.describe())
print('-----------------------')
print('The sum of null values:')
print('-----------------------')
print(df.isnull().sum())
print('-----------------------')
print('The sum of NaN values:')
print('-----------------------')
df.isna().sum()

--------------
Rows, Columns:
--------------


(5689, 37)

---------------
Columns' names:
---------------


Index(['id', 'Attendant at Birth', 'Birth Place', 'Body Mass Index',
       'Cigarettes Before Pregnancy', 'Last Normal Menses Month',
       'Marital Status', 'Birth Month', 'Time of Birth', 'Birth Day of Week',
       'Father’s Combined Age', 'Father’s Education',
       'Interval Since Last Live Birth Recode',
       'Interval Since Last Other Pregnancy Recode',
       'Interval Since Last Pregnancy Recode', 'Induction of Labor',
       'Mother’s Single Years of Age', 'Mother’s Nativity',
       'Mother’s Education', 'Mother’s Height in Total Inches',
       'No Infections Reported', 'No Maternal Morbidity Reported',
       'No Risk Factors Reported', 'Payment Source for Delivery',
       'Payment Recode', 'Month Prenatal Care Began',
       'Number of Prenatal Visits', 'Prior Births Now Dead',
       'Prior Births Now Living', 'Prior Other Terminations',
       'Pre-pregnancy Weight Recode', 'Delivery Method Recode',
       'Residence Status', 'Previous Cesarean', 'Number of Previo

-----------------
Column / Datatype:
-----------------


id                                              int64
Attendant at Birth                              int64
Birth Place                                     int64
Body Mass Index                               float64
Cigarettes Before Pregnancy                     int64
Last Normal Menses Month                        int64
Marital Status                                 object
Birth Month                                     int64
Time of Birth                                   int64
Birth Day of Week                               int64
Father’s Combined Age                           int64
Father’s Education                              int64
Interval Since Last Live Birth Recode           int64
Interval Since Last Other Pregnancy Recode      int64
Interval Since Last Pregnancy Recode            int64
Induction of Labor                             object
Mother’s Single Years of Age                    int64
Mother’s Nativity                               int64
Mother’s Education          

-----------------


Unnamed: 0,id,Attendant at Birth,Birth Place,Body Mass Index,Cigarettes Before Pregnancy,Last Normal Menses Month,Marital Status,Birth Month,Time of Birth,Birth Day of Week,...,Prior Births Now Dead,Prior Births Now Living,Prior Other Terminations,Pre-pregnancy Weight Recode,Delivery Method Recode,Residence Status,Previous Cesarean,Number of Previous Cesareans,Sex of Infant,Weight Gain
0,108082,1,1,23.6,0,11,,8,1231,4,...,0,0,0,125,3,1,N,0,M,53
1,108083,1,1,20.1,0,7,1.0,4,851,5,...,0,2,2,103,4,2,Y,2,F,24
2,108084,1,1,34.8,30,3,2.0,12,36,6,...,0,2,1,203,1,1,N,0,F,0
3,108085,1,1,23.4,0,10,2.0,7,1452,3,...,0,1,0,128,1,1,N,0,F,29
4,108086,1,1,25.4,0,8,,5,803,4,...,0,3,0,130,4,1,Y,1,F,18


---------------------
Descriptive statistic:
---------------------


Unnamed: 0,id,Attendant at Birth,Birth Place,Body Mass Index,Cigarettes Before Pregnancy,Last Normal Menses Month,Birth Month,Time of Birth,Birth Day of Week,Father’s Combined Age,...,Month Prenatal Care Began,Number of Prenatal Visits,Prior Births Now Dead,Prior Births Now Living,Prior Other Terminations,Pre-pregnancy Weight Recode,Delivery Method Recode,Residence Status,Number of Previous Cesareans,Weight Gain
count,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,...,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0,5689.0
mean,110926.0,1.0,1.0,29.0,2.0,11.0,7.0,1233.0,4.0,40.0,...,5.0,14.0,0.0,1.0,1.0,174.0,2.0,1.0,0.0,32.0
std,1642.0,1.0,0.0,12.0,9.0,20.0,3.0,639.0,2.0,22.0,...,15.0,15.0,6.0,5.0,6.0,118.0,1.0,1.0,1.0,19.0
min,108082.0,1.0,1.0,14.0,0.0,1.0,1.0,0.0,1.0,15.0,...,0.0,0.0,0.0,0.0,0.0,75.0,1.0,1.0,0.0,0.0
25%,109504.0,1.0,1.0,22.0,0.0,4.0,4.0,800.0,3.0,28.0,...,2.0,9.0,0.0,0.0,0.0,130.0,1.0,1.0,0.0,20.0
50%,110926.0,1.0,1.0,26.0,0.0,7.0,7.0,1244.0,4.0,33.0,...,3.0,12.0,0.0,1.0,0.0,150.0,1.0,1.0,0.0,30.0
75%,112348.0,1.0,1.0,31.0,0.0,10.0,10.0,1728.0,6.0,38.0,...,3.0,14.0,0.0,2.0,1.0,183.0,3.0,2.0,0.0,40.0
max,113770.0,9.0,7.0,100.0,99.0,99.0,12.0,9999.0,7.0,99.0,...,99.0,99.0,99.0,99.0,99.0,999.0,4.0,4.0,99.0,99.0


-----------------------
The sum of null values:
-----------------------
id                                            0
Attendant at Birth                            0
Birth Place                                   0
Body Mass Index                               0
Cigarettes Before Pregnancy                   0
Last Normal Menses Month                      0
Marital Status                                0
Birth Month                                   0
Time of Birth                                 0
Birth Day of Week                             0
Father’s Combined Age                         0
Father’s Education                            0
Interval Since Last Live Birth Recode         0
Interval Since Last Other Pregnancy Recode    0
Interval Since Last Pregnancy Recode          0
Induction of Labor                            0
Mother’s Single Years of Age                  0
Mother’s Nativity                             0
Mother’s Education                            0
Mother’s Height 

id                                            0
Attendant at Birth                            0
Birth Place                                   0
Body Mass Index                               0
Cigarettes Before Pregnancy                   0
Last Normal Menses Month                      0
Marital Status                                0
Birth Month                                   0
Time of Birth                                 0
Birth Day of Week                             0
Father’s Combined Age                         0
Father’s Education                            0
Interval Since Last Live Birth Recode         0
Interval Since Last Other Pregnancy Recode    0
Interval Since Last Pregnancy Recode          0
Induction of Labor                            0
Mother’s Single Years of Age                  0
Mother’s Nativity                             0
Mother’s Education                            0
Mother’s Height in Total Inches               0
No Infections Reported                  

## Data Preparation
- Fill the null and NaN values
- Reduce skewness

## Feature Engineering
- Creating new values
- One-hot encoding
- Dropping the unnecessary
- Scaling

## Splitting the dataset

## Modeling
- Naive prediction
- Linear regression
- Random Forest
- CatBoost
- Gaussian Process Regression (GPR)
- Conformalized Quantile Regression

## Evaluating Models
- Winkler Score

## Uploading results