https://www.kaggle.com/competitions/amex-default-prediction/data

In [1]:
import os
import pyarrow as pa
import pandas as pd
import shutil

In [2]:
import joblib
import xgboost as xgb

In [3]:
import pickle
from sklearn.impute import SimpleImputer
from sklearn.feature_extraction import DictVectorizer

In [4]:
def score_xgb_model(model_file, input_data, columns_used):
    # Load the XGBoost model from the specified file
    xgb_model = joblib.load(model_file)
    
    # Ensure the input DataFrame contains only the specified columns
    input_data = input_data[columns_used]
    
    # Make predictions using the loaded model
    # predictions = xgb_model.predict(input_data)
    predictions = xgb_model.predict_proba(input_data)  # Use predict_proba to get probabilities

    # Extract the probability of the positive class (assuming binary classification)
    probabilities = predictions[:, 1]
    
    # Add predictions to the input data DataFrame
    # input_data['prediction'] = predictions

    # Add probabilities to the input data DataFrame
    input_data['prediction'] = probabilities
    
    return input_data

In [5]:
# Define the directory path
data_dir = os.path.join('C:\\', 'Users', 'KonuTech', 'zoomcamp-capstone-01', 'data')
data_dir

'C:\\Users\\KonuTech\\zoomcamp-capstone-01\\data'

In [6]:
# Step 4: Convert and save the combined DataFrame as a single Parquet file
combined_parquet_file = 'test_data.parquet'

In [7]:
# Step 6: EDA on the combined Parquet file
parquet_df = pd.read_parquet(os.path.join(data_dir, combined_parquet_file))  # Read the combined Parquet file

In [8]:
# Shuffle the entire DataFrame randomly
parquet_df = parquet_df.sample(frac=1, random_state=42).reset_index(drop=True)

In [9]:
unique_clients_df = parquet_df.drop_duplicates(subset='customer_ID', keep='first')

In [10]:
unique_clients_df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Index: 924621 entries, 0 to 11363265
Data columns (total 190 columns):
 #    Column       Dtype  
---   ------       -----  
 0    customer_ID  object 
 1    S_2          object 
 2    P_2          float64
 3    D_39         float64
 4    B_1          float64
 5    B_2          float64
 6    R_1          float64
 7    S_3          float64
 8    D_41         float64
 9    B_3          float64
 10   D_42         float64
 11   D_43         float64
 12   D_44         float64
 13   B_4          float64
 14   D_45         float64
 15   B_5          float64
 16   R_2          float64
 17   D_46         float64
 18   D_47         float64
 19   D_48         float64
 20   D_49         float64
 21   B_6          float64
 22   B_7          float64
 23   B_8          float64
 24   D_50         float64
 25   D_51         float64
 26   B_9          float64
 27   R_3          float64
 28   D_52         float64
 29   P_3          float64
 30   B_10         float64


In [11]:
# unique_clients_df.to_parquet(os.path.join(data_dir, "test_data_randomly_sampled.parquet"))

In [12]:
!ls

01_prepare_train_data.ipynb
02_eda.ipynb
03_downsample_data.ipynb
04_get_champion_binary_classifier.ipynb
05_prepare_test_data.ipynb
06_score_test_data.ipynb
dict_vectorizer.pkl
grid_search_results_2023-11-01_17-54-58.json
grid_search_results_2023-11-01_20-08-34.json
grid_search_results_2023-11-02_16-48-32.json
grid_search_results_2023-11-04_18-06-41.json
grid_search_results_2023-11-05_14-54-16.json
imputer.pkl
training_log.log


In [13]:
categorical_features = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']
categorical_features

['B_30',
 'B_38',
 'D_114',
 'D_116',
 'D_117',
 'D_120',
 'D_126',
 'D_63',
 'D_64',
 'D_66',
 'D_68']

In [14]:
# Step 2: Load the pickled SimpleImputer from the file
with open('imputer.pkl', 'rb') as file:
    imputer = pickle.load(file)

In [15]:
# Specify columns to exclude from imputation
exclude_columns = ['customer_ID', 'S_2', 'target', 'D_63', 'D_64']

In [16]:
# Separate the DataFrame into the columns to impute and those to exclude
columns_to_impute = unique_clients_df.columns.difference(exclude_columns)
columns_to_impute

Index(['B_1', 'B_10', 'B_11', 'B_12', 'B_13', 'B_14', 'B_15', 'B_16', 'B_17',
       'B_18',
       ...
       'S_24', 'S_25', 'S_26', 'S_27', 'S_3', 'S_5', 'S_6', 'S_7', 'S_8',
       'S_9'],
      dtype='object', length=186)

In [17]:
# Fit the imputer to the data and transform the specified columns
unique_clients_df[columns_to_impute] = imputer.transform(unique_clients_df[columns_to_impute])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unique_clients_df[columns_to_impute] = imputer.transform(unique_clients_df[columns_to_impute])


In [18]:
# Convert the specified columns to strings
unique_clients_df[categorical_features] = unique_clients_df[categorical_features].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unique_clients_df[categorical_features] = unique_clients_df[categorical_features].astype(str)


In [19]:
features = [
    'D_48',
    'D_55',
    'B_9',
    'D_58',
    'D_75',
    'B_7',
    'B_23',
    'B_16',
    'D_44',
    'B_3',
    'B_30',
    'B_38',
    'D_114',
    'D_116',
    'D_117',
    'D_120',
    'D_126',
    'D_63',
    'D_64',
    'D_66',
    'D_68'
]

### Unpickling DictVectorizer()

In [20]:
unique_clients_df_dict = unique_clients_df[features].to_dict(orient='records')

In [21]:
# Load the saved DictVectorizer from the file
with open('dict_vectorizer.pkl', 'rb') as file:
    dict_vectorizer = pickle.load(file)

In [22]:
# Transform the new data using the unpickled DictVectorizer
unique_clients_df_encoded = dict_vectorizer.transform(unique_clients_df_dict)

In [23]:
len(dict_vectorizer.get_feature_names_out())

56

In [24]:
dict_vectorizer.get_feature_names_out()

array(['B_16', 'B_23', 'B_3', 'B_30=0.0', 'B_30=1.0', 'B_30=2.0',
       'B_38=1.0', 'B_38=2.0', 'B_38=3.0', 'B_38=4.0', 'B_38=5.0',
       'B_38=6.0', 'B_38=7.0', 'B_7', 'B_9', 'D_114=0.0', 'D_114=1.0',
       'D_116=0.0', 'D_116=1.0', 'D_117=-1.0', 'D_117=1.0', 'D_117=2.0',
       'D_117=3.0', 'D_117=4.0', 'D_117=5.0', 'D_117=6.0', 'D_120=0.0',
       'D_120=1.0', 'D_126=-1.0', 'D_126=0.0', 'D_126=1.0', 'D_44',
       'D_48', 'D_55', 'D_58', 'D_63=CL', 'D_63=CO', 'D_63=CR', 'D_63=XL',
       'D_63=XM', 'D_63=XZ', 'D_64=-1', 'D_64=None', 'D_64=O', 'D_64=R',
       'D_64=U', 'D_66=0.0', 'D_66=1.0', 'D_68=0.0', 'D_68=1.0',
       'D_68=2.0', 'D_68=3.0', 'D_68=4.0', 'D_68=5.0', 'D_68=6.0', 'D_75'],
      dtype=object)

In [25]:
# model_file = "XGBoost_2023-11-01_17-54-58.bin"
# model_file = "XGBoost_2023-11-01_20-08-34.bin"
# model_file = "XGBoost_2023-11-04_18-06-41.bin"
model_file = "XGBoost_2023-11-05_14-54-16.bin"

In [26]:
models_dir = os.path.join('C:\\', 'Users', 'KonuTech', 'zoomcamp-capstone-01', 'models')
models_dir

'C:\\Users\\KonuTech\\zoomcamp-capstone-01\\models'

In [27]:
models_path = os.path.join(models_dir, model_file)

In [28]:
# XGBoost_2023-11-01_17-54-58.bin :
# columns_used = ['B_16', 'B_19', 'B_20', 'B_23', 'B_3', 'B_38', 'B_4', 'B_7', 'B_9', 'D_44', 'D_48', 'D_55', 'D_58', 'D_74', 'D_75']  

# XGBoost_2023-11-01_20-08-34.bin :
# columns_used = ['B_16', 'B_19', 'B_20', 'B_23', 'B_3', 'B_38', 'B_4', 'B_7', 'B_9', 'D_44', 'D_48', 'D_55', 'D_58', 'D_74', 'D_75']  

# XGBoost_2023-11-04_18-06-41.bin :
# columns_used = ['D_48', 'D_55', 'B_9', 'D_58', 'D_75', 'D_44', 'B_7', 'B_23', 'B_16','B_3', 'D_74','B_38','B_20', 'B_4', 'B_19', 'B_1', 'B_37', 'B_8', 'B_22', 'B_11', 'B_30', 'S_7', 'R_1', 'S_3', 'D_70']

# XGBoost_.bin :
# columns_used = ['B_16', 'B_23', 'B_3', 'B_30=0.0', 'B_30=1.0', 'B_30=2.0', 'B_30=nan', 'B_38=1.0', 'B_38=2.0', 'B_38=3.0', 'B_38=4.0', 'B_38=5.0', 'B_38=6.0', 'B_38=7.0', 'B_38=nan', 'B_7', 'B_9', 'D_114=0.0', 'D_114=1.0', 'D_114=nan', 'D_116=0.0', 'D_116=1.0', 'D_116=nan', 'D_117=-1.0', 'D_117=1.0', 'D_117=2.0', 'D_117=3.0', 'D_117=4.0', 'D_117=5.0', 'D_117=6.0', 'D_117=nan', 'D_120=0.0', 'D_120=1.0', 'D_120=nan', 'D_126=-1.0', 'D_126=0.0', 'D_126=1.0', 'D_126=nan', 'D_44', 'D_48', 'D_55', 'D_58', 'D_63=CL', 'D_63=CO', 'D_63=CR', 'D_63=XL', 'D_63=XM', 'D_63=XZ', 'D_64=-1', 'D_64=None', 'D_64=O', 'D_64=R', 'D_64=U', 'D_66=0.0', 'D_66=1.0', 'D_66=nan', 'D_68=0.0', 'D_68=1.0', 'D_68=2.0', 'D_68=3.0', 'D_68=4.0', 'D_68=5.0', 'D_68=6.0', 'D_68=nan', 'D_75']
columns_used = ['B_16', 'B_23', 'B_3', 'B_30=0.0', 'B_30=1.0', 'B_30=2.0',
       'B_38=1.0', 'B_38=2.0', 'B_38=3.0', 'B_38=4.0', 'B_38=5.0',
       'B_38=6.0', 'B_38=7.0', 'B_7', 'B_9', 'D_114=0.0', 'D_114=1.0',
       'D_116=0.0', 'D_116=1.0', 'D_117=-1.0', 'D_117=1.0', 'D_117=2.0',
       'D_117=3.0', 'D_117=4.0', 'D_117=5.0', 'D_117=6.0', 'D_120=0.0',
       'D_120=1.0', 'D_126=-1.0', 'D_126=0.0', 'D_126=1.0', 'D_44',
       'D_48', 'D_55', 'D_58', 'D_63=CL', 'D_63=CO', 'D_63=CR', 'D_63=XL',
       'D_63=XM', 'D_63=XZ', 'D_64=-1', 'D_64=None', 'D_64=O', 'D_64=R',
       'D_64=U', 'D_66=0.0', 'D_66=1.0', 'D_68=0.0', 'D_68=1.0',
       'D_68=2.0', 'D_68=3.0', 'D_68=4.0', 'D_68=5.0', 'D_68=6.0', 'D_75']

In [29]:
len(columns_used)

56

In [30]:
# Convert the NumPy array to a DataFrame
unique_clients_df_encoded_df = pd.DataFrame(unique_clients_df_encoded, columns=columns_used)  # Replace 'your_column_names' with the actual column names

In [31]:
for column_name in columns_used:
    if column_name not in unique_clients_df_encoded_df.columns:
        print(f"Column '{column_name}' not found in the input_data DataFrame.")

### Scoring of a data

In [32]:
df_scored = score_xgb_model(
    model_file=models_path,
    input_data=unique_clients_df_encoded_df,
    columns_used=columns_used
)



In [33]:
df_scored.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 924621 entries, 0 to 924620
Data columns (total 57 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   B_16        924621 non-null  float64
 1   B_23        924621 non-null  float64
 2   B_3         924621 non-null  float64
 3   B_30=0.0    924621 non-null  float64
 4   B_30=1.0    924621 non-null  float64
 5   B_30=2.0    924621 non-null  float64
 6   B_38=1.0    924621 non-null  float64
 7   B_38=2.0    924621 non-null  float64
 8   B_38=3.0    924621 non-null  float64
 9   B_38=4.0    924621 non-null  float64
 10  B_38=5.0    924621 non-null  float64
 11  B_38=6.0    924621 non-null  float64
 12  B_38=7.0    924621 non-null  float64
 13  B_7         924621 non-null  float64
 14  B_9         924621 non-null  float64
 15  D_114=0.0   924621 non-null  float64
 16  D_114=1.0   924621 non-null  float64
 17  D_116=0.0   924621 non-null  float64
 18  D_116=1.0   924621 non-null  float64
 19  D_

In [34]:
df_scored

Unnamed: 0,B_16,B_23,B_3,B_30=0.0,B_30=1.0,B_30=2.0,B_38=1.0,B_38=2.0,B_38=3.0,B_38=4.0,...,D_66=1.0,D_68=0.0,D_68=1.0,D_68=2.0,D_68=3.0,D_68=4.0,D_68=5.0,D_68=6.0,D_75,prediction
0,0.168312,0.020590,0.018735,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.073677,0.017970
1,0.091521,0.024689,0.004492,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.006693,0.043156
2,0.008443,0.002116,0.007269,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.005511,0.017051
3,1.007958,0.638273,0.628909,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.338710,0.895407
4,0.086049,0.065805,0.013247,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.004395,0.014569
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
924616,0.006080,0.028284,0.009488,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.001578,0.715548
924617,0.009373,0.761858,0.003161,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.542992,0.758180
924618,0.006825,0.128480,0.002558,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.068366,0.529055
924619,0.253342,0.073859,0.309063,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.076225,0.769782


In [35]:
unique_clients_df.shape

(924621, 190)

In [36]:
unique_clients_df['customer_ID']

0           f56fdbc843bc02ad4dbc20e61b493d6905126d692eec79...
1           cc3a057fe61c68bcc9865ca319226e2ea1a372769b3115...
2           376a87a3d47e34a1d40b0c026fe5a7a7c801c086b13a54...
3           00b513cf1a88a057c5065dc286dcb01a56a831978ea09d...
4           931acc1d65abc0a68d638b69ef5a3e91fda6d14bec933e...
                                  ...                        
11354522    60d63bf43d55b13117986bca0b03d6a58c182ee747af98...
11354756    a0aac4094d863cb7cabd0b2de32634c9fefaf814153e9e...
11356280    2542d893ac6a5dd0c16aa0c758e3d129f2d82b93393739...
11357428    4abd50915c60908fed1163b87a3ce51e5985e7bc5878e4...
11363265    23e103fab928270fa6061a1028e192b918b73d4e73d69a...
Name: customer_ID, Length: 924621, dtype: object

In [37]:
df_scored.shape

(924621, 57)

In [38]:
df_scored["prediction"]

0         0.017970
1         0.043156
2         0.017051
3         0.895407
4         0.014569
            ...   
924616    0.715548
924617    0.758180
924618    0.529055
924619    0.769782
924620    0.636297
Name: prediction, Length: 924621, dtype: float32

In [39]:
# Assuming you have two Series: unique_clients_df["customer_ID"] and df_scored["prediction"]
customer_ID_series = unique_clients_df["customer_ID"].reset_index(drop=True)
prediction_series = df_scored["prediction"].reset_index(drop=True)

# Concatenate the two Series by row indices
merged_df = pd.concat([customer_ID_series, prediction_series], axis=1)

# Optional: Rename the columns if needed
merged_df.columns = ["customer_ID", "prediction"]

In [40]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 924621 entries, 0 to 924620
Data columns (total 2 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   customer_ID  924621 non-null  object 
 1   prediction   924621 non-null  float32
dtypes: float32(1), object(1)
memory usage: 10.6+ MB


In [41]:
merged_df

Unnamed: 0,customer_ID,prediction
0,f56fdbc843bc02ad4dbc20e61b493d6905126d692eec79...,0.017970
1,cc3a057fe61c68bcc9865ca319226e2ea1a372769b3115...,0.043156
2,376a87a3d47e34a1d40b0c026fe5a7a7c801c086b13a54...,0.017051
3,00b513cf1a88a057c5065dc286dcb01a56a831978ea09d...,0.895407
4,931acc1d65abc0a68d638b69ef5a3e91fda6d14bec933e...,0.014569
...,...,...
924616,60d63bf43d55b13117986bca0b03d6a58c182ee747af98...,0.715548
924617,a0aac4094d863cb7cabd0b2de32634c9fefaf814153e9e...,0.758180
924618,2542d893ac6a5dd0c16aa0c758e3d129f2d82b93393739...,0.529055
924619,4abd50915c60908fed1163b87a3ce51e5985e7bc5878e4...,0.769782


In [42]:
# merged_df.to_csv("submission_2023-11-01_17-54-58.csv", index=False)
# merged_df.to_csv("submission_2023-11-01_20-08-34.csv", index=False)
# merged_df.to_csv("submission_2023-11-04_18-06-41.csv", index=False)
merged_df.to_csv("submission_2023-11-05_14-54-16.bin.csv", index=False)

In [43]:
# Get the current working directory
current_dir = os.getcwd()
current_dir

'C:\\Users\\KonuTech\\zoomcamp-capstone-01\\notebooks'

In [44]:
destination_dir = os.path.join('C:\\', 'Users', 'KonuTech', 'zoomcamp-capstone-01', 'data')
destination_dir

'C:\\Users\\KonuTech\\zoomcamp-capstone-01\\data'

In [45]:
# Specify the file extension to filter (e.g., ".bin")
file_extension = ".csv"

In [46]:
# List files in the current working directory
source_files = os.listdir(current_dir)
source_files

['.ipynb_checkpoints',
 '01_prepare_train_data.ipynb',
 '02_eda.ipynb',
 '03_downsample_data.ipynb',
 '04_get_champion_binary_classifier.ipynb',
 '05_prepare_test_data.ipynb',
 '06_score_test_data.ipynb',
 'dict_vectorizer.pkl',
 'grid_search_results_2023-11-01_17-54-58.json',
 'grid_search_results_2023-11-01_20-08-34.json',
 'grid_search_results_2023-11-02_16-48-32.json',
 'grid_search_results_2023-11-04_18-06-41.json',
 'grid_search_results_2023-11-05_14-54-16.json',
 'imputer.pkl',
 'submission_2023-11-05_14-54-16.bin.csv',
 'training_log.log']

In [47]:
# Move files with the specified extension
for file in source_files:
    if file.endswith(file_extension):
        # Construct the source and destination paths
        source_path = os.path.join(current_dir, file)
        
        # Check if the file exists before moving
        if os.path.exists(source_path):
            destination_path = os.path.join(destination_dir, file)
            shutil.move(source_path, destination_path)
        else:
            print(f"File not found: {file}")

In [48]:
# Verify the move operation
destination_files = os.listdir(destination_dir)
print(f'Moved files with extension {file_extension} to destination directory: {destination_files}')

Moved files with extension .csv to destination directory: ['parquet_partitions', 'sample_submission.csv', 'submission_2023-11-01_17-54-58.csv', 'submission_2023-11-01_20-08-34.csv', 'submission_2023-11-04_18-06-41.csv', 'submission_2023-11-05_14-54-16.bin.csv', 'test_data.csv', 'test_data.parquet', 'test_data_randomly_sampled.parquet', 'train_data_downsampled.parquet']


In [49]:
os.listdir(destination_dir)

['parquet_partitions',
 'sample_submission.csv',
 'submission_2023-11-01_17-54-58.csv',
 'submission_2023-11-01_20-08-34.csv',
 'submission_2023-11-04_18-06-41.csv',
 'submission_2023-11-05_14-54-16.bin.csv',
 'test_data.csv',
 'test_data.parquet',
 'test_data_randomly_sampled.parquet',
 'train_data_downsampled.parquet']

In [56]:
df_scored.iloc[0].to_dict()

{'B_16': 0.1683122615936458,
 'B_23': 0.020589636817371,
 'B_3': 0.0187345221097178,
 'B_30=0.0': 1.0,
 'B_30=1.0': 0.0,
 'B_30=2.0': 0.0,
 'B_38=1.0': 0.0,
 'B_38=2.0': 1.0,
 'B_38=3.0': 0.0,
 'B_38=4.0': 0.0,
 'B_38=5.0': 0.0,
 'B_38=6.0': 0.0,
 'B_38=7.0': 0.0,
 'B_7': 0.030808470113386,
 'B_9': 0.0213623699449047,
 'D_114=0.0': 0.0,
 'D_114=1.0': 1.0,
 'D_116=0.0': 1.0,
 'D_116=1.0': 0.0,
 'D_117=-1.0': 0.0,
 'D_117=1.0': 0.0,
 'D_117=2.0': 0.0,
 'D_117=3.0': 1.0,
 'D_117=4.0': 0.0,
 'D_117=5.0': 0.0,
 'D_117=6.0': 0.0,
 'D_120=0.0': 1.0,
 'D_120=1.0': 0.0,
 'D_126=-1.0': 0.0,
 'D_126=0.0': 0.0,
 'D_126=1.0': 1.0,
 'D_44': 0.0096977284989766,
 'D_48': 0.0566975983295227,
 'D_55': 0.0407420703650585,
 'D_58': 0.0038394008873149,
 'D_63=CL': 0.0,
 'D_63=CO': 1.0,
 'D_63=CR': 0.0,
 'D_63=XL': 0.0,
 'D_63=XM': 0.0,
 'D_63=XZ': 0.0,
 'D_64=-1': 0.0,
 'D_64=None': 0.0,
 'D_64=O': 1.0,
 'D_64=R': 0.0,
 'D_64=U': 0.0,
 'D_66=0.0': 0.0,
 'D_66=1.0': 1.0,
 'D_68=0.0': 0.0,
 'D_68=1.0': 0.0,
