https://www.kaggle.com/competitions/amex-default-prediction/data

In [1]:
import gc
import os
import psutil
import pyarrow as pa
import time

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import random

In [3]:
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [4]:
def save_histograms(dataframe, save_path):

    start_time = time.time()
    
    numeric_columns = dataframe.select_dtypes(include=['number'])
    
    # Create the directory if it doesn't exist
    os.makedirs(save_path, exist_ok=True)
    
    for column in numeric_columns.columns:
        plt.figure(figsize=(8, 6))
        
        sns.histplot(dataframe[column], kde=True, color='blue', bins=20)
        plt.title(f'Histogram with KDE of {column}')
        plt.xlabel(column)
        plt.ylabel('Density')
        
        # Calculate the number of events (N) and format it with thousands separators
        num_events = len(dataframe[column])
        formatted_num_events = '{:,}'.format(num_events)
        
        # Add annotation for N in the top-middle part of the histogram
        plt.text(0.5, 0.95, f'N={formatted_num_events}', fontsize=12, ha='center', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.7))
        
        # Save the histogram as a .jpg file with the column name
        file_name = f'{column}.jpg'
        file_path = os.path.join(save_path, file_name)
        plt.savefig(file_path)
        
        print(f'Saved: {file_name}')
        
        plt.close()  # Close the plot to avoid displaying in the notebook

        # Print time elapsed for each whole minute
        elapsed_time = time.time() - start_time
        if int(elapsed_time) % 60 == 0 or num_events == len(dataframe[column]) - 1:
            print(f'Time elapsed: {int(elapsed_time / 60)} minutes')

In [5]:
current_directory = os.getcwd()
print("Current working directory:", current_directory)

Current working directory: C:\Users\KonuTech\zoomcamp-capstone-01\notebooks


In [6]:
!ls -lah "C:\Users\KonuTech\zoomcamp-capstone-01\data"

total 42G
drwxr-xr-x 1 KonuTech 197121    0 Oct 30 19:08 .
drwxr-xr-x 1 KonuTech 197121    0 Oct 29 19:44 ..
drwxr-xr-x 1 KonuTech 197121    0 Oct 30 19:12 parquet_partitions
-rw-r--r-- 1 KonuTech 197121  60M May 20  2022 sample_submission.csv
-rw-r--r-- 1 KonuTech 197121  32G May 20  2022 test_data.csv
-rw-r--r-- 1 KonuTech 197121  16G May 20  2022 train_data.csv
-rw-r--r-- 1 KonuTech 197121 6.5G Oct 30 19:12 train_data.parquet
-rw-r--r-- 1 KonuTech 197121  30M May 20  2022 train_labels.csv


In [7]:
# Define the directory path
data_dir = os.path.join('C:\\', 'Users', 'KonuTech', 'zoomcamp-capstone-01', 'data')
data_dir

'C:\\Users\\KonuTech\\zoomcamp-capstone-01\\data'

In [8]:
# Step 4: Convert and save the combined DataFrame as a single Parquet file
combined_parquet_file = 'train_data.parquet'

In [9]:
# Step 6: EDA on the combined Parquet file
parquet_df = pd.read_parquet(os.path.join(data_dir, combined_parquet_file))  # Read the combined Parquet file

In [10]:
# Trigger garbage collection to clear unreferenced objects
gc.collect()

18

In [11]:
parquet_df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5531451 entries, 0 to 5531450
Data columns (total 191 columns):
 #    Column       Dtype  
---   ------       -----  
 0    customer_ID  object 
 1    S_2          object 
 2    P_2          float64
 3    D_39         float64
 4    B_1          float64
 5    B_2          float64
 6    R_1          float64
 7    S_3          float64
 8    D_41         float64
 9    B_3          float64
 10   D_42         float64
 11   D_43         float64
 12   D_44         float64
 13   B_4          float64
 14   D_45         float64
 15   B_5          float64
 16   R_2          float64
 17   D_46         float64
 18   D_47         float64
 19   D_48         float64
 20   D_49         float64
 21   B_6          float64
 22   B_7          float64
 23   B_8          float64
 24   D_50         float64
 25   D_51         float64
 26   B_9          float64
 27   R_3          float64
 28   D_52         float64
 29   P_3          float64
 30   B_10         flo

In [12]:
for column in parquet_df.columns:
    print("Column name:", column)
    print("Data type:", parquet_df[column].dtype)
    print("First five unique values:", parquet_df[column].unique()[:5])
    print("Count of unique values:", parquet_df[column].nunique())
    print("Count of NULL values:", parquet_df[column].isnull().sum(), "\n")

Column name: customer_ID
Data type: object
First five unique values: ['0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fbac11a8ed792feb62a'
 '00000fd6641609c6ece5454664794f0340ad84dddce9a267a310b5ae68e9d8e5'
 '00001b22f846c82c51f6e3958ccd81970162bae8b007e80662ef27519fcc18c1'
 '000041bdba6ecadd89a52d11886e8eaaec9325906c9723355abb5ca523658edc'
 '00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8ad51ca8b8c4a24cefed']
Count of unique values: 458913
Count of NULL values: 0 

Column name: S_2
Data type: object
First five unique values: ['2017-03-09' '2017-04-07' '2017-05-28' '2017-06-13' '2017-07-16']
Count of unique values: 396
Count of NULL values: 0 

Column name: P_2
Data type: float64
First five unique values: [0.93846872 0.93666461 0.95418028 0.96038359 0.94724838]
Count of unique values: 5485466
Count of NULL values: 45985 

Column name: D_39
Data type: float64
First five unique values: [0.00173334 0.00577544 0.0915054  0.00245522 0.00248301]
Count of unique values: 5531451
Count of NULL valu

In [13]:
parquet_df.head().to_dict(orient='records')

[{'customer_ID': '0000099d6bd597052cdcda90ffabf56573fe9d7c79be5fbac11a8ed792feb62a',
  'S_2': '2017-03-09',
  'P_2': 0.9384687191272548,
  'D_39': 0.0017333390041739,
  'B_1': 0.0087244509498605,
  'B_2': 1.0068382339663076,
  'R_1': 0.0092277222786689,
  'S_3': 0.1240351558143743,
  'D_41': 0.0087711319938824,
  'B_3': 0.0047092406313857,
  'D_42': nan,
  'D_43': nan,
  'D_44': 0.0006301348049115,
  'B_4': 0.0809863324662527,
  'D_45': 0.708906305121159,
  'B_5': 0.1706002293387026,
  'R_2': 0.0062040314303209,
  'D_46': 0.3585865793715965,
  'D_47': 0.525351040810055,
  'D_48': 0.255736073902975,
  'D_49': nan,
  'B_6': 0.0639022133803909,
  'B_7': 0.0594157330614109,
  'B_8': 0.006465576798311,
  'D_50': 0.1486979558199995,
  'D_51': 1.3358557940752642,
  'B_9': 0.0082067391252636,
  'R_3': 0.0014225021561254,
  'D_52': 0.2073338786110817,
  'P_3': 0.7364627260945562,
  'B_10': 0.0962188066642168,
  'D_53': nan,
  'S_5': 0.0233811220509184,
  'B_11': 0.0027680616648439,
  'S_6': 0.0

In [14]:
parquet_df.describe(include='all')

Unnamed: 0,customer_ID,S_2,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,D_42,D_43,D_44,B_4,D_45,B_5,R_2,D_46,D_47,D_48,D_49,B_6,B_7,B_8,D_50,D_51,B_9,R_3,D_52,P_3,B_10,D_53,S_5,B_11,S_6,D_54,R_4,S_7,B_12,S_8,D_55,D_56,B_13,R_5,D_58,S_9,B_14,D_59,D_60,D_61,B_15,S_11,D_62,D_63,D_64,D_65,B_16,B_17,B_18,B_19,D_66,B_20,D_68,S_12,R_6,S_13,B_21,D_69,B_22,D_70,D_71,D_72,S_15,B_23,D_73,P_4,D_74,D_75,D_76,B_24,R_7,D_77,B_25,B_26,D_78,D_79,R_8,R_9,S_16,D_80,R_10,R_11,B_27,D_81,D_82,S_17,R_12,B_28,R_13,D_83,R_14,R_15,D_84,R_16,B_29,B_30,S_18,D_86,D_87,R_17,R_18,D_88,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,D_89,R_22,R_23,D_91,D_92,D_93,D_94,R_24,R_25,D_96,S_22,S_23,S_24,S_25,S_26,D_102,D_103,D_104,D_105,D_106,D_107,B_36,B_37,R_26,R_27,B_38,D_108,D_109,D_110,D_111,B_39,D_112,B_40,S_27,D_113,D_114,D_115,D_116,D_117,D_118,D_119,D_120,D_121,D_122,D_123,D_124,D_125,D_126,D_127,D_128,D_129,B_41,B_42,D_130,D_131,D_132,D_133,R_28,D_134,D_135,D_136,D_137,D_138,D_139,D_140,D_141,D_142,D_143,D_144,D_145,target
count,5531451,5531451,5485466.0,5531451.0,5531451.0,5529435.0,5531451.0,4510907.0,5529435.0,5529435.0,791314.0,3873055.0,5257132.0,5531451.0,5529434.0,5531451.0,5531451.0,4319752.0,5531451.0,4812726.0,545534.0,5531218.0,5531451.0,5509183.0,2389049.0,5531451.0,5531451.0,5531451.0,5501888.0,5229959.0,5531451.0,1446866.0,5531451.0,5531451.0,5531451.0,5529435.0,5531451.0,4510907.0,5531451.0,5531451.0,5346648.0,2540508.0,5481932.0,5531451.0,5531451.0,2597808.0,5531451.0,5424726.0,5531451.0,4933399.0,5524528.0,5531451.0,4773290.0,5531451,5314009,5531451.0,5529435.0,2393853.0,5531451.0,5529435.0,623354.0,5529435.0,5314948.0,5531451.0,5531451.0,5531451.0,5531451.0,5336978.0,5529435.0,5436534.0,5531451.0,5507743.0,5531451.0,5531451.0,55856.0,5531451.0,5509678.0,5531451.0,622497.0,5531451.0,5531450.0,3017539.0,5524528.0,5529435.0,5257132.0,5455512.0,5531451.0,312533.0,5531451.0,5509678.0,5531451.0,5531451.0,5529435.0,5505764.0,1472837.0,5531451.0,5531395.0,5531451.0,5531451.0,5336978.0,5531450.0,5531451.0,5501888.0,5531451.0,381416.0,5529435.0,5531451.0,5531451.0,3865.0,5531451.0,5531451.0,6004.0,5531451.0,5531451.0,5531451.0,5531451.0,5531451.0,5531376.0,5531451.0,5529435.0,5501888.0,5531451.0,5531451.0,5374235.0,5531451.0,5531451.0,5531451.0,5531451.0,5531451.0,5531451.0,5512427.0,5531006.0,5512858.0,5518604.0,5530817.0,5490796.0,5429903.0,5429903.0,2510020.0,541349.0,5429903.0,5531451.0,5531395.0,609305.0,5402748.0,5529435.0,28938.0,5529854.0,31334.0,31334.0,33632.0,5528801.0,5531398.0,4130516.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5354735.0,5414635.0,5531451.0,5429903.0,5429903.0,5530761.0,71478.0,5429903.0,5429903.0,542577.0,5488735.0,5531451.0,194699.0,194699.0,194699.0,194699.0,194699.0,5429903.0,5490819.0,5429903.0,944408.0,5429903.0,5490724.0,5429903.0,5531451.0
unique,458913,396,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
top,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2018-02-17,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CO,O,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
freq,13,26715,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4119621,2913244,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
mean,,,0.656334,0.1531172,0.12401,0.6214887,0.0788027,0.2258455,0.05978469,0.1325389,0.184974,0.1546841,0.1184431,0.1725543,0.2521867,0.08311185,0.04751791,0.4754784,0.4071093,0.3816245,0.1919254,0.1521174,0.1860836,0.4505812,0.1744347,0.145079,0.1895237,0.1208287,0.1800504,0.6012886,0.2316003,0.07618341,0.08810534,0.1109685,0.2441923,0.9846392,0.03120413,0.223138,0.09369956,0.3275998,0.2980905,0.2037094,0.1007155,0.03459403,0.2155312,0.07441472,0.1011546,0.3624224,0.376155,0.4296291,0.05280757,0.3753335,0.1906617,,,0.03976371,0.3486764,0.7152395,0.5962383,0.1501351,0.989913,0.2363041,5.01203,0.2404628,0.05853069,0.2546721,0.05315427,0.1540408,0.1072333,0.1105738,0.07083231,0.03756613,0.3747912,0.1714804,0.156493,0.1439271,0.1534933,0.1711995,0.1405049,0.03965135,0.08806446,0.256556,0.1038704,0.09061996,0.06261216,0.06912672,0.03812034,0.2298895,0.05081593,0.1120091,0.06477524,0.05083079,0.00500823,0.05491775,0.457711,0.0397203,0.9792204,0.1545278,0.005479773,0.04093297,0.241438,0.02028241,0.04474851,0.05853865,0.03299849,0.1579888,0.03146584,0.03749742,1.0,0.005318529,0.005008654,0.172304,0.9969435,0.005025946,0.02269557,0.02741512,0.01748739,0.03694556,0.02160973,0.6121952,0.006448085,0.008604041,0.005296266,0.06220701,0.09101932,0.01510617,0.02227298,0.01891782,0.008205672,0.03571874,0.7791037,0.1796774,0.7349203,0.9250379,0.06683993,0.1898365,0.4672621,0.4506291,0.3701221,0.2052421,0.2068964,0.006359806,0.1231896,0.08464978,0.8936941,2.716704,0.0714819,0.006660049,0.739263,0.874545,0.255473,0.8410442,0.2032609,0.2779786,0.156956,0.6193543,0.2781288,0.00123741,2.319056,0.2835119,0.2794081,0.1167214,0.5322441,0.4044235,0.05050569,0.305548,0.08434248,0.7390186,0.1075479,0.5841212,0.4399473,0.03234881,0.110038,0.2016622,0.1033794,0.210199,0.04614354,0.005742033,0.330082,0.02997322,0.2427725,0.01424409,0.1646183,0.1789305,0.02664348,0.1645212,0.390799,0.1788022,0.05238952,0.06233496,0.2490972
std,,,0.2446494,0.2700709,0.2119869,0.4014877,0.2263971,0.1933475,0.2025443,0.2349929,0.228185,0.2133977,0.2213035,0.2224149,0.2426829,0.3970432,0.2017843,0.1698931,0.2347989,0.3264793,0.278391,1.47677,0.2304195,0.4969413,0.6337476,0.2423309,0.2862744,0.2105157,0.1738649,0.1707992,4.799846,0.2026925,0.4074052,0.2082564,0.4265999,0.1411058,0.1597703,0.206303,0.820427,0.3030894,0.2887424,0.2121586,0.5593841,0.2568067,0.2604405,0.1959977,0.2835833,0.1931294,0.3718325,0.5245269,0.3005498,0.19101,0.2302574,,,0.4730981,0.4023605,0.3866728,0.3656087,0.2896168,0.099928,0.3737005,1.351339,0.2507191,0.6415658,0.2966695,0.7196354,16.00965,0.2232634,0.2277415,0.3260109,0.2029198,0.1950984,0.2293527,0.204952,0.3382721,0.2191907,0.2242142,0.2901273,0.3172191,1.550873,0.2326449,0.2087053,2.588205,0.2593385,0.2201319,0.3036416,0.1891741,0.7724871,0.2147461,0.3056937,0.1756224,0.003011975,0.339997,0.1899956,0.2410227,0.1537445,0.227502,0.007279533,0.3059534,8.356452,0.1227099,0.2850662,0.2392474,0.2765134,0.3909677,0.1605456,0.1773409,0.0,0.00638048,0.003240295,0.218063,0.05520127,0.003069175,0.1318707,0.1480568,0.1110884,0.3780643,0.1278468,0.4883836,0.01902708,0.06000691,0.01743741,0.186434,0.2936208,0.1000588,0.1303137,0.1171877,0.05660054,0.1725806,0.7187471,0.851958,0.8356727,0.2068055,0.4638081,0.2748047,0.4985823,0.4807409,0.2505208,1.257594,0.2572546,0.0212393,0.211777,0.2443295,0.3123073,1.581322,0.321731,0.04080646,0.308294,0.269492,0.297895,0.3694077,8.081345,0.3250327,0.226665,0.4855457,0.256212,0.03515506,2.28873,0.2559502,0.256467,0.321088,0.2338079,0.2369062,0.2456041,0.2261268,0.292258,0.5378084,0.3033793,0.4931497,0.4957559,0.2223059,0.313704,0.3974942,0.2953313,0.24763,0.1720901,0.02737901,0.296216,0.1561003,0.210132,0.09571115,0.2670208,0.3790614,0.145548,0.3482771,0.236182,0.3789498,0.1825135,0.1934937,0.4324903
min,,,-0.4589548,5.02619e-09,-7.588799,9.19228e-09,1.534223e-09,-0.627132,5.566545e-10,6.285293e-09,-0.000454,1.15455e-07,5.153088e-10,3.099332e-09,1.563241e-08,2.804822e-11,8.265748e-10,-17.28934,-0.02662164,-0.009615277,5.021298e-07,-0.005178168,-2.652748,1.153704e-08,-306.4871,3.551498e-10,6.72607e-09,2.238322e-09,-0.007083109,-1.51969,-0.002958481,5.678129e-08,8.168135e-09,2.017814e-08,2.541465e-09,-0.002005767,4.024685e-10,-0.4701318,3.343624e-08,6.187214e-09,2.353218e-08,-0.01708915,1.90585e-08,1.154589e-09,4.968945e-09,2.823498e-07,-8.469854,-0.1024301,5.90503e-09,-0.009259018,-10.00805,-0.1999987,-0.002822135,,,3.539404e-09,7.81382e-10,3.774555e-08,5.891222e-08,1.573038e-09,0.0,1.218623e-09,0.0,-0.404109,4.650681e-10,3.655658e-09,4.426753e-09,6.051601e-09,2.056148e-09,6.623321e-10,3.026939e-09,3.381434e-10,-0.2999998,4.352963e-08,-0.045261,2.668066e-09,2.137169e-09,1.120811e-09,3.128482e-07,1.820939e-09,8.67629e-10,2.678714e-08,-11.25358,1.691721e-09,6.210012e-10,1.438067e-10,1.738091e-09,3.121485e-07,5.083305e-09,1.890425e-09,2.23325e-09,8.36379e-10,1.044159e-09,5.759929e-10,3.813199e-08,1.019806e-09,-0.09179347,-2.160207e-05,2.894743e-09,5.042657e-09,6.70242e-10,3.588444e-10,1.190379e-09,5.66056e-10,2.146359e-08,0.0,4.075929e-11,7.226233e-11,1.0,1.066114e-09,2.170287e-09,-3.2e-05,0.0,3.15639e-09,1.954925e-09,2.098142e-09,6.887991e-10,4.972086e-09,3.646872e-09,2.147189e-10,2.0143e-10,9.576146e-10,8.4432e-10,1.117682e-09,2.088378e-09,6.382819e-09,9.098536e-10,6.570262e-10,4.640562e-11,2.677825e-10,-91.99816,-206.0855,-90.48382,-2.453451,5.137101e-09,9.620587e-10,1.025806e-09,2.869108e-09,-0.02640482,7.532992e-08,6.403922e-09,1.160856e-09,-7.604273,2.9707e-08,-0.02571099,1.0,3.917639e-07,3.119567e-10,-0.02375,3e-06,-0.979314,4.34464e-08,1.201275e-08,4.417077e-10,1.463474e-11,0.0,2.228054e-07,0.0,-1.0,2.551906e-08,7.076709e-08,0.0,-0.03207547,4.033095e-08,8.578643e-10,-0.04545438,4.410464e-09,-1.0,1.006008e-09,3.235349e-09,2.422876e-10,2.030713e-09,3e-06,6.097074e-11,5.173169e-10,-0.015102,6.380582e-10,1.097922e-09,-0.012444,5.008304e-08,6.316773e-08,1.078787e-08,3.307923e-08,3.767347e-10,3.725073e-09,1.6501e-10,-0.014539,5.549692e-09,2.500991e-09,1.226024e-09,0.0
25%,,,0.4803307,0.004528464,0.008863645,0.1053313,0.002895934,0.1272588,0.002873244,0.00522757,0.037516,0.04227546,0.003832839,0.02750011,0.05462902,0.007280386,0.002613008,0.4251277,0.2323514,0.08100685,0.06158435,0.02048716,0.02824729,0.004506935,0.0647612,0.00364493,0.005752602,0.004700085,0.07387472,0.5408657,0.0289907,0.006171486,0.005636077,0.00660267,0.003285294,1.002341,0.002565921,0.09372875,0.01086972,0.007645077,0.05701689,0.08700979,0.009255422,0.002575087,0.00585812,0.009789102,0.008151003,0.2348764,0.03227373,0.09247558,0.003139858,0.2824439,0.03101255,,,0.002611131,0.006237099,0.4800441,0.2067308,0.003455325,1.0,0.004038402,4.0,0.1868979,0.002592271,0.004894503,0.002550872,0.002585181,0.003080827,0.003398316,0.008633795,0.002624563,0.20755,0.01747993,0.056187,0.002924059,0.005887432,0.006720455,0.01534821,0.002587743,0.002568756,0.07946026,0.005964626,0.00262016,0.002700048,0.002777576,0.00255369,0.1691219,0.00255321,0.003621739,0.002624501,0.002707382,0.002502497,0.002584436,0.5015002,0.002686053,1.002284,0.02745631,0.002521217,0.002561606,0.002535147,0.002538874,0.002602662,0.002705823,0.002557025,0.0,0.002571064,0.0025846,1.0,0.002518641,0.002498538,0.027192,1.0,0.00250417,0.00254858,0.002555637,0.00253276,0.002537703,0.002536777,0.006361827,0.002524606,0.002505943,0.002502194,0.002773983,0.002722708,0.002525242,0.00254503,0.002535647,0.002509307,0.002579307,0.8111971,0.1334254,0.8094516,0.9708745,0.003413339,0.004453618,0.004645799,0.004654078,0.1706199,0.05172805,0.004647512,0.002513542,0.008834879,0.005244059,1.001545,2.0,0.002676816,0.002503555,0.525457,1.000482,0.058394,1.001025,0.01739309,0.006364579,0.004352625,0.0,0.06258397,0.0,-1.0,0.06241067,0.05837902,0.0,0.3563471,0.1514714,0.002614415,0.1405611,0.00269724,1.0,0.002787533,0.005949431,0.004424397,0.002553868,0.00726,0.003113379,0.002780952,0.073693,0.002718821,0.00250225,0.114126,0.002551973,0.009314305,0.00253247,0.003517452,0.003027212,0.002555848,0.003026087,0.199399,0.003028116,0.002752896,0.003028347,0.0
50%,,,0.694295,0.009056902,0.03132968,0.8143328,0.00578223,0.1639082,0.005746725,0.00977723,0.120519,0.08851245,0.007668474,0.08222598,0.1808053,0.01537682,0.005223076,0.4596703,0.3831664,0.2861716,0.1303682,0.08338487,0.07574612,0.009020668,0.1094135,0.007282917,0.02587759,0.009400614,0.1444321,0.6183025,0.1105554,0.01337968,0.01352057,0.01945133,0.006573143,1.004893,0.005132659,0.1397011,0.01944087,0.3217483,0.1816685,0.149429,0.02931426,0.005149018,0.1065893,0.01949362,0.02849409,0.378409,0.2353444,0.3722665,0.006234545,0.2897305,0.09443576,,,0.005223146,0.09173869,0.9320872,0.6466326,0.006907869,1.0,0.008074334,6.0,0.1908262,0.00518687,0.009796928,0.005101578,0.005166079,0.006160283,0.006787958,0.0127965,0.005250227,0.4018434,0.05951238,0.103089,0.005848502,0.07570754,0.07436966,0.05874229,0.005174882,0.005135867,0.2058335,0.01985531,0.005237409,0.005395388,0.005555031,0.005105827,0.1727493,0.005104234,0.0072475,0.005247257,0.005413602,0.005004534,0.005168711,0.5044334,0.005367302,1.004859,0.07706299,0.005043645,0.005121536,0.005069985,0.005078574,0.005206744,0.005410478,0.005088062,0.0,0.005134277,0.005167799,1.0,0.005030552,0.004999655,0.085616,1.0,0.005007637,0.005092067,0.005114076,0.005061884,0.005075567,0.005080965,1.001767,0.005048758,0.005015861,0.005002915,0.00554598,0.005447958,0.005052575,0.005090312,0.005069786,0.005015866,0.005156619,0.9422135,0.1362231,0.9493249,0.9736766,0.00675473,0.008898508,0.00929649,0.009302054,0.3338461,0.1363444,0.009299602,0.005028643,0.03114089,0.03685061,1.004365,2.0,0.005350452,0.005009813,0.878845,1.003683,0.149225,1.004013,0.05830858,0.2905143,0.008711394,1.0,0.1991381,0.0,3.0,0.2204558,0.2133946,0.0,0.5953796,0.4291804,0.005236194,0.274195,0.005399487,1.0,0.00557243,1.000418,0.008850722,0.005106535,0.021599,0.00622416,0.005560417,0.161484,0.005439861,0.005004332,0.217769,0.005109191,0.2539468,0.00506983,0.007037814,0.00605301,0.005110523,0.00605159,0.382136,0.006053151,0.005508129,0.006053248,0.0
75%,,,0.8648159,0.2366407,0.1259019,1.002403,0.00866059,0.2581017,0.008615665,0.1550507,0.250869,0.1843206,0.1319484,0.2388817,0.3733219,0.05371837,0.007837339,0.5174066,0.5615486,0.6693663,0.2467636,0.1919294,0.2709323,1.004175,0.1869698,0.336034,0.3342967,0.2003358,0.233146,0.6838264,0.2955389,0.04987849,0.07132089,0.1019708,0.009858875,1.007448,0.007700315,0.2954592,0.06913838,0.4949366,0.5012073,0.2496252,0.08941487,0.00772187,0.3726992,0.05322812,0.1003196,0.4678927,0.7036913,0.774524,0.009289617,0.448762,0.2844873,,,0.007835914,0.7565882,1.002225,1.003277,0.0954603,1.0,0.357849,6.0,0.2088756,0.007781599,0.4277199,0.007651583,0.00774985,0.009242109,0.2507828,0.03118547,0.007873465,0.5048339,0.2472314,0.174849,0.008766024,0.2194434,0.2688564,0.1678398,0.007760693,0.007702539,0.3749256,0.106159,0.007855688,0.008095952,0.008331475,0.007657797,0.17634,0.0076574,0.2031722,0.007869666,0.008121441,0.007506658,0.007755537,0.5073687,0.008049988,1.007424,0.1944009,0.007567901,0.007679788,0.007608542,0.00761554,0.007811827,0.008111589,0.007633993,0.0,0.007701028,0.007751853,1.0,0.007544346,0.007500095,0.234028,1.0,0.007510969,0.007634533,0.007674605,0.00759221,0.007614203,0.007624209,1.005885,0.007570818,0.007524826,0.007501197,0.008317631,0.00817154,0.007578425,0.007632676,0.007604379,0.007524547,0.007736906,0.9647727,0.1390174,0.9715761,0.9764816,0.01103299,0.3278337,1.00459,0.964433,0.5256952,0.2626341,0.3394784,0.007542553,0.1238671,0.07713827,1.007182,3.0,0.007955245,0.007513661,1.003896,1.00683,0.291426,1.007006,0.2451545,0.4477965,0.2077554,1.0,0.4340722,0.0,4.0,0.4417185,0.4372384,0.0,0.7148844,0.576125,0.007855673,0.4166305,0.008096402,1.0,0.008356925,1.004735,1.004246,0.007659996,0.105613,0.009334572,0.008334345,0.278052,0.008163446,0.00750612,0.454186,0.00768475,0.258245,0.007573434,0.5015468,0.009080455,0.007663697,0.009078915,0.559308,0.009076287,0.008260447,0.00908093,0.0


In [15]:
# Define the directory path
save_path = os.path.join('C:\\', 'Users', 'KonuTech', 'zoomcamp-capstone-01', 'eda', "histograms")
save_path

'C:\\Users\\KonuTech\\zoomcamp-capstone-01\\eda\\histograms'

In [16]:
save_histograms(parquet_df, save_path)

Saved: P_2.jpg
Saved: D_39.jpg
Saved: B_1.jpg
Saved: B_2.jpg
Saved: R_1.jpg
Saved: S_3.jpg
Saved: D_41.jpg
Saved: B_3.jpg
Saved: D_42.jpg
Saved: D_43.jpg
Saved: D_44.jpg
Saved: B_4.jpg
Saved: D_45.jpg
Saved: B_5.jpg
Saved: R_2.jpg
Saved: D_46.jpg
Saved: D_47.jpg
Saved: D_48.jpg
Saved: D_49.jpg
Saved: B_6.jpg
Saved: B_7.jpg
Saved: B_8.jpg
Saved: D_50.jpg
Saved: D_51.jpg
Saved: B_9.jpg
Saved: R_3.jpg
Saved: D_52.jpg
Saved: P_3.jpg
Saved: B_10.jpg
Saved: D_53.jpg
Saved: S_5.jpg
Saved: B_11.jpg
Saved: S_6.jpg
Saved: D_54.jpg
Saved: R_4.jpg
Saved: S_7.jpg
Saved: B_12.jpg
Saved: S_8.jpg
Saved: D_55.jpg
Saved: D_56.jpg
Saved: B_13.jpg
Saved: R_5.jpg
Saved: D_58.jpg
Saved: S_9.jpg
Saved: B_14.jpg
Saved: D_59.jpg
Saved: D_60.jpg
Saved: D_61.jpg
Saved: B_15.jpg
Saved: S_11.jpg
Saved: D_62.jpg
Saved: D_65.jpg
Saved: B_16.jpg
Saved: B_17.jpg
Saved: B_18.jpg
Saved: B_19.jpg
Saved: D_66.jpg
Saved: B_20.jpg
Saved: D_68.jpg
Saved: S_12.jpg
Saved: R_6.jpg
Saved: S_13.jpg
Saved: B_21.jpg
Saved: D_69.jpg