In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

In [30]:
def drop_null_cols(df_, columns_to_check=[], threshold=None):
    '''
    Drop columns with null values above a threshold if specified.
    Checks entire dataframe if no columns are passed.
    
    Parameters:
        df_ : DataFrame or 2-dimensional array
        columns_to_check : list or array-like
            List specifying column(s) to check for null values.
            Default is the entire DataFrame.
        threshold_num : int or float
            Number/percentage of null values above which that column will be dropped.
            Default is any null values.
    Returns:
        df_ : DataFrame
    '''
    if not isinstance(df_, pd.DataFrame):
        df_ = pd.DataFrame(df_)
    if not isinstance(columns_to_check, list):
        columns_to_check = list(columns_to_check)
    if not columns_to_check:
        columns_to_check = df_.columns
    nonnull_df = df_[columns_to_check]
    # Drops columns that have any null values
    if threshold is None:
        nonnull_df.dropna(axis=1, inplace=True)
    else: # Drops columns with null values above those specified
        if isinstance(threshold, int) and threshold >= 1:
            nonnull_df.dropna(axis=1, thresh=df_.shape[0]-threshold, inplace=True)
        elif isinstance(threshold, float) and threshold <= 1.0 and threshold >= 0.0:
            nonnull_df.dropna(axis=1, thresh=df_.shape[0]-int(df_.shape[0]*threshold), inplace=True)
        else:
            raise TypeError(f'{threshold} of wrong type or out of range')
    df_.drop(columns=columns_to_check, inplace=True)
    df_ = df_.join(nonnull_df) 
    return df_

In [33]:
total_df = pd.read_csv('../train.csv')

#total_df.isnull()["Embarked"].describe()


In [34]:
drop_null_cols(total_df, ["Age", "Embarked", "Sex"], threshold=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,PassengerId,Survived,Pclass,Name,SibSp,Parch,Ticket,Fare,Cabin,Sex
0,1,0,3,"Braund, Mr. Owen Harris",1,0,A/5 21171,7.2500,,male
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,0,PC 17599,71.2833,C85,female
2,3,1,3,"Heikkinen, Miss. Laina",0,0,STON/O2. 3101282,7.9250,,female
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,0,113803,53.1000,C123,female
4,5,0,3,"Allen, Mr. William Henry",0,0,373450,8.0500,,male
...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",0,0,211536,13.0000,,male
887,888,1,1,"Graham, Miss. Margaret Edith",0,0,112053,30.0000,B42,female
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",1,2,W./C. 6607,23.4500,,female
889,890,1,1,"Behr, Mr. Karl Howell",0,0,111369,30.0000,C148,male
