In [85]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from joblib import load
import joblib

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score


# import sys
# sys.path.append('..')
# from  data_processing.process_data import entire_data_processing, process_label, categorize_age,label_encoder, onehot_encoder,label_encoding_attribute

In [86]:
import os 
import logging

import pandas as pd
import numpy as np

from sklearn import preprocessing


logging.basicConfig(
    filename='./log',
    level=logging.INFO,
    filemode='a',
    format='%(name)s - %(levelname)s - %(asctime)s - %(message)s',
    datefmt='%d-%b-%y %H:%M:%S')


def get_age_group(age):
    if age < 18:
        return 'underage'
    elif (age >= 18) & (age < 60):
        return 'adult'
    else:
        return 'elder'


def get_salary_group(salary):
    if salary == '<=50K':
        return 1
    return 0


def process_label(df):
    df['salary'] = df['salary'].apply(get_salary_group)


def categorize_age(df):
    df['age'] = df['age'].apply(get_age_group)


def save_encoder_classes(path, attribute, le):
    if not os.path.exists(path):
        np.save(path, le.classes_)
        logging.info('init and save encoder classes_')


def label_encoder(df, attribute, training):
    le = preprocessing.LabelEncoder()
    
    if training:
        le.fit(list(set(df[attribute])))        
        save_encoder_classes(f'../data/label_encoders/{attribute}.npy', attribute, le)
    else:
        le.classes_ = np.load(f'../data/label_encoders/{attribute}.npy')
    df[attribute] = le.transform(df[attribute])


def label_encoding_attribute(df, training):
    label_encoder(df,'sex', training)
    label_encoder(df,'race', training)
    label_encoder(df,'occupation', training)
    label_encoder(df,'workclass', training)
    label_encoder(df,'marital-status', training)
    

def onehot_encoder(df, attribute, training):
    lb = preprocessing.LabelBinarizer()
    
    if training:
        lb.fit(list(set(df[attribute])))
        save_encoder_classes(f'../data/onehot_encoders/{attribute}.npy', attribute, lb)    
    else:
        lb.classes_ = np.load(f'../data/onehot_encoders/{attribute}.npy')
    return lb.transform(df[attribute])


def entire_data_processing(df, training):
    process_label(df)
    categorize_age(df)
    label_encoding_attribute(df, training)
    age_onehot_data = onehot_encoder(df, 'age', training)
    df.drop('age', axis=1, inplace=True)
    return np.concatenate([age_onehot_data, df.values], axis=1)



In [87]:
df

Unnamed: 0,age,workclass,marital-status,occupation,race,sex,hours-per-week,salary
0,adult,State-gov,Never-married,Adm-clerical,White,Male,40,1
1,adult,Self-emp-not-inc,Married-civ-spouse,Exec-managerial,White,Male,13,1
2,adult,Private,Divorced,Handlers-cleaners,White,Male,40,1
3,adult,Private,Married-civ-spouse,Handlers-cleaners,Black,Male,40,1
4,adult,Private,Married-civ-spouse,Prof-specialty,Black,Female,40,1
...,...,...,...,...,...,...,...,...
20307,adult,Private,Never-married,Tech-support,Asian-Pac-Islander,Male,11,1
20308,adult,Private,Never-married,Protective-serv,White,Male,40,1
20309,adult,Private,Married-civ-spouse,Tech-support,White,Female,38,1
20310,adult,Private,Widowed,Adm-clerical,White,Female,40,1


In [88]:
cat_features = [
    'age',
    'sex',
    'race',
    'occupation',
    'workclass',
    'marital-status',
    'salary'
]

In [89]:
path = '../data/census_cleaned.csv'

In [90]:
df = pd.read_csv(path)
process_label(df)
categorize_age(df)

In [91]:
cat_features.remove('salary')

In [92]:
cat_features

['age', 'sex', 'race', 'occupation', 'workclass', 'marital-status']

In [93]:
_, test = train_test_split(df, test_size = 0.2, shuffle=True)

In [94]:
test

Unnamed: 0,age,workclass,marital-status,occupation,race,sex,hours-per-week,salary
20259,adult,Private,Married-civ-spouse,Craft-repair,White,Male,48,1
8256,adult,Private,Separated,Transport-moving,White,Male,60,1
9530,adult,Local-gov,Divorced,Prof-specialty,White,Female,57,1
3987,adult,Private,Never-married,Sales,White,Male,65,1
17335,adult,Private,Divorced,Transport-moving,White,Male,56,0
...,...,...,...,...,...,...,...,...
410,adult,Private,Married-civ-spouse,Prof-specialty,White,Male,40,0
15898,adult,Private,Married-spouse-absent,Exec-managerial,White,Female,35,1
2716,elder,Private,Never-married,Other-service,White,Female,60,1
9049,adult,Federal-gov,Married-civ-spouse,Adm-clerical,White,Male,30,1


In [95]:
def encoder_attributes(df, training=False):
    label_encoding_attribute(df, training=False)
    age_onehot_data = onehot_encoder(df, 'age', training=False)
    
    df.drop('age', axis=1, inplace=True)
    return np.concatenate([age_onehot_data, df.values], axis=1)

In [96]:
all_scores_df = pd.DataFrame(
        columns=[
            "attribute",
            "category",
            "num_samples",
            "accuracy",
            "roc",
        ]
    )

In [75]:
cat_features1 = ['sex']

In [97]:
lgbm_model = joblib.load('../model/best_clf.pkl')

for attribute in cat_features:
    print(attribute)
    for category in test[attribute].unique():
        print(category)
        filtered_df = test[test[attribute] == category]
        n_samples = len(filtered_df)
        
        np_data = encoder_attributes(filtered_df)

        X = np_data[:, :-1]
        y = np_data[:, -1]
        
        if len(set(y)) == 1:
            accuracy = list(set(y))[0]
            roc_ = -1
        else:    
            y_pred = lgbm_model.predict(X)

            accuracy = int(accuracy_score(y, y_pred) * 10000)/10000
            roc_ = int(roc_auc_score(y,y_pred) * 10000)/10000
        
        scores_list = [
                attribute,
                category,
                n_samples,
                accuracy,
                roc_,
            ]
        
        scores_series = pd.Series(scores_list, index=all_scores_df.columns)

        # Add scores to DataFrame
        all_scores_df = all_scores_df.append(
            scores_series, ignore_index=True
        )

all_scores_df.to_csv('hey.csv',index=False)
        

age
adult


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(


elder


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(


underage


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])


sex
Male
Female


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(


race
White


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])


Asian-Pac-Islander
Black


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(


Amer-Indian-Eskimo


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documen

Other
occupation
Craft-repair


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Transport-moving
Prof-specialty


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Sales
Exec-managerial


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Tech-support
Priv-house-serv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Other-service
Adm-clerical


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Machine-op-inspct
Handlers-cleaners


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Farming-fishing
Protective-serv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Armed-Forces
workclass
Private


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Local-gov
Federal-gov


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Self-emp-not-inc
State-gov


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Self-emp-inc
Without-pay


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

marital-status
Married-civ-spouse
Separated


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Divorced
Never-married


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instea

Widowed
Married-spouse-absent
Married-AF-spouse


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[attribute] = le.transform(df[attribute])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('age', axis=1, inplace=True)
  all_scores_df = all_scores_df.append(


In [None]:
def pre_process_data(path):
    df = pd.read_csv(path)
    np_data = entire_data_processing(df, True)
    
    y = np_data[:, -1]
    X = np_data[:, :-1]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle=True)
    
    return X_test, y_test

In [None]:
def slices_scores(path):
    
    

In [104]:
!touch slices.py