https://www.kaggle.com/competitions/amex-default-prediction/data

In [1]:
import gc

In [2]:
import os
import pandas as pd
from sklearn.feature_selection import VarianceThreshold
# from sklearn.model_selection import GridSearchCV, train_test_split

# from sklearn.pipeline import Pipeline
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# from sklearn.svm import SVC
# from sklearn.linear_model import LogisticRegression
# from sklearn.naive_bayes import GaussianNB
# import xgboost as xgb
# from sklearn.preprocessing import LabelEncoder
# from sklearn.metrics import f1_score
# from sklearn.feature_extraction import DictVectorizer
# from sklearn.impute import SimpleImputer
# # from imblearn.under_sampling import RandomUnderSampler

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
def calculate_correlation_with_target(features_df, target_series, variance_threshold=0.0):
    """
    Calculate the correlation of numeric columns in a features DataFrame with a target Series
    and perform Variance Threshold feature selection.
    
    Parameters:
    features_df (pd.DataFrame): The features DataFrame.
    target_series (pd.Series): The target Series.
    variance_threshold (float): Variance threshold for feature selection. Features with variance
        below this threshold will be removed. Default is 0.0 (no threshold).

    Returns:
    pd.Series: A Series containing the correlation coefficients sorted by absolute values.
    """
    # Select only numeric columns from the features DataFrame
    numeric_features = features_df.select_dtypes(include=['number'])
    
    # Calculate the correlation and sort the result by absolute values in descending order
    correlation_series = numeric_features.corrwith(target_series)
    absolute_correlation_series = correlation_series.abs()
    
    # Apply Variance Threshold to filter features
    if variance_threshold > 0.0:
        selector = VarianceThreshold(threshold=variance_threshold)
        numeric_features = selector.fit_transform(numeric_features)
        # Update correlation series to match the selected features
        correlation_series = pd.Series(selector.inverse_transform(correlation_series.values.reshape(1, -1))[0], index=numeric_features.columns)
    
    # Sort the DataFrame by absolute values
    correlation_series = correlation_series.sort_values(ascending=False)
    
    return correlation_series

In [5]:
# Define the directory path
data_dir = os.path.join('C:\\', 'Users', 'KonuTech', 'zoomcamp-capstone-01', 'data')
data_dir

'C:\\Users\\KonuTech\\zoomcamp-capstone-01\\data'

In [6]:
train_data_parquet_file = 'train_data.parquet'

In [7]:
# Load the training data
train_data = pd.read_parquet(os.path.join(data_dir, train_data_parquet_file))

In [8]:
train_data.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5531451 entries, 0 to 5531450
Data columns (total 191 columns):
 #    Column       Dtype  
---   ------       -----  
 0    customer_ID  object 
 1    S_2          object 
 2    P_2          float64
 3    D_39         float64
 4    B_1          float64
 5    B_2          float64
 6    R_1          float64
 7    S_3          float64
 8    D_41         float64
 9    B_3          float64
 10   D_42         float64
 11   D_43         float64
 12   D_44         float64
 13   B_4          float64
 14   D_45         float64
 15   B_5          float64
 16   R_2          float64
 17   D_46         float64
 18   D_47         float64
 19   D_48         float64
 20   D_49         float64
 21   B_6          float64
 22   B_7          float64
 23   B_8          float64
 24   D_50         float64
 25   D_51         float64
 26   B_9          float64
 27   R_3          float64
 28   D_52         float64
 29   P_3          float64
 30   B_10         flo

### downsampling

In [9]:
# Assuming 'target' is your target column name
target_column = 'target'

# Assuming you have a DataFrame 'train_data' with the dataset

# Separate the dataset into two DataFrames based on the target value
zeros = train_data[train_data[target_column] == 0]
ones = train_data[train_data[target_column] == 1]

# Determine the size of the minority class (the one with fewer samples)
minority_class_size = min(len(zeros), len(ones))

# Sample an equal number of samples from each class
zeros_downsampled = zeros.sample(minority_class_size, random_state=42)
ones_downsampled = ones.sample(minority_class_size, random_state=42)

# Combine the downsampled DataFrames
downsampled_data = pd.concat([zeros_downsampled, ones_downsampled])

# Shuffle the downsampled dataset (optional but recommended)
downsampled_data = downsampled_data.sample(frac=1, random_state=42)

# Your downsampled dataset is in the 'downsampled_data' DataFrame

In [10]:
dfs = ["train_data", "zeros", "ones", "zeros_downsampled", "ones_downsampled"]

In [11]:
for el in dfs:
    if el in locals():
        del el

In [12]:
downsampled_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2755738 entries, 541332 to 2614482
Columns: 191 entries, customer_ID to target
dtypes: float64(185), int64(2), object(4)
memory usage: 3.9+ GB


In [13]:
downsampled_data.describe()

Unnamed: 0,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,D_42,D_43,D_44,B_4,D_45,B_5,R_2,D_46,D_47,D_48,D_49,B_6,B_7,B_8,D_50,D_51,B_9,R_3,D_52,P_3,B_10,D_53,S_5,B_11,S_6,D_54,R_4,S_7,B_12,S_8,D_55,D_56,B_13,R_5,D_58,S_9,B_14,D_59,D_60,D_61,B_15,S_11,D_62,D_65,B_16,B_17,B_18,B_19,D_66,B_20,D_68,S_12,R_6,S_13,B_21,D_69,B_22,D_70,D_71,D_72,S_15,B_23,D_73,P_4,D_74,D_75,D_76,B_24,R_7,D_77,B_25,B_26,D_78,D_79,R_8,R_9,S_16,D_80,R_10,R_11,B_27,D_81,D_82,S_17,R_12,B_28,R_13,D_83,R_14,R_15,D_84,R_16,B_29,B_30,S_18,D_86,D_87,R_17,R_18,D_88,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,D_89,R_22,R_23,D_91,D_92,D_93,D_94,R_24,R_25,D_96,S_22,S_23,S_24,S_25,S_26,D_102,D_103,D_104,D_105,D_106,D_107,B_36,B_37,R_26,R_27,B_38,D_108,D_109,D_110,D_111,B_39,D_112,B_40,S_27,D_113,D_114,D_115,D_116,D_117,D_118,D_119,D_120,D_121,D_122,D_123,D_124,D_125,D_126,D_127,D_128,D_129,B_41,B_42,D_130,D_131,D_132,D_133,R_28,D_134,D_135,D_136,D_137,D_138,D_139,D_140,D_141,D_142,D_143,D_144,D_145,target
count,2732522.0,2755738.0,2755738.0,2754627.0,2755738.0,2344286.0,2754627.0,2754627.0,525770.0,2020065.0,2620816.0,2755738.0,2754627.0,2755738.0,2755738.0,2197122.0,2755738.0,2459355.0,381430.0,2755625.0,2755738.0,2743884.0,1083252.0,2755738.0,2755738.0,2755738.0,2738937.0,2566303.0,2755738.0,899377.0,2755738.0,2755738.0,2755738.0,2754627.0,2755738.0,2344286.0,2755738.0,2755738.0,2672810.0,1107736.0,2723122.0,2755738.0,2755738.0,1249946.0,2755738.0,2690428.0,2755738.0,2523761.0,2753017.0,2755738.0,2447861.0,2755738.0,2754627.0,1457871.0,2755738.0,2754627.0,284277.0,2754627.0,2624210.0,2755738.0,2755738.0,2755738.0,2755738.0,2635546.0,2754627.0,2697527.0,2755738.0,2742263.0,2755738.0,2755738.0,32596.0,2755738.0,2745929.0,2755738.0,249596.0,2755738.0,2755737.0,1352836.0,2753017.0,2754627.0,2620816.0,2711416.0,2755738.0,224660.0,2755738.0,2745929.0,2755738.0,2755738.0,2754627.0,2740658.0,691000.0,2755738.0,2755709.0,2755738.0,2755738.0,2635546.0,2755737.0,2755738.0,2738937.0,2755738.0,182817.0,2754627.0,2755738.0,2755738.0,3518.0,2755738.0,2755738.0,5193.0,2755738.0,2755738.0,2755738.0,2755738.0,2755738.0,2755699.0,2755738.0,2754627.0,2738937.0,2755738.0,2755738.0,2654897.0,2755738.0,2755738.0,2755738.0,2755738.0,2755738.0,2755738.0,2747819.0,2755430.0,2747998.0,2745825.0,2755469.0,2731161.0,2697732.0,2697732.0,1344027.0,378533.0,2697732.0,2755738.0,2755710.0,437659.0,2684226.0,2754627.0,17848.0,2754994.0,22106.0,22106.0,23481.0,2754358.0,2755713.0,2132029.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2645693.0,2680601.0,2755738.0,2697732.0,2697732.0,2755440.0,29326.0,2697732.0,2697732.0,379288.0,2729592.0,2755738.0,143460.0,143460.0,143460.0,143460.0,143460.0,2697732.0,2731177.0,2697732.0,544492.0,2697732.0,2731114.0,2697732.0,2755738.0
mean,0.5697896,0.1813815,0.1719185,0.5090367,0.1266744,0.261921,0.08880831,0.1911463,0.22372,0.1896464,0.1788027,0.2246005,0.2141091,0.0680319,0.07834897,0.4909192,0.3690383,0.4853733,0.189523,0.1173611,0.2436012,0.5440467,0.1523367,0.1101159,0.268495,0.1557561,0.1573229,0.5771288,0.1823322,0.1007948,0.09856271,0.1555321,0.2197427,0.973346,0.05161758,0.2629959,0.08027397,0.300547,0.3744911,0.1863008,0.09098406,0.0594384,0.2833497,0.08260007,0.1149494,0.3842058,0.3594231,0.527587,0.05405064,0.3658791,0.1438524,0.05874789,0.4411772,0.7730539,0.4928718,0.2153187,0.986833,0.3193211,4.875511,0.2416067,0.09863605,0.236412,0.09044903,0.1959966,0.1551047,0.1510252,0.05578418,0.05903313,0.3966376,0.2277103,0.14431,0.1904485,0.2071185,0.2307406,0.1227286,0.05968328,0.1450411,0.2125973,0.1264695,0.1202833,0.1031678,0.09758156,0.06518607,0.2285352,0.08163921,0.1193792,0.1063809,0.06057915,0.005014644,0.09049371,0.4598642,0.04395965,0.967133,0.184291,0.005903199,0.05509275,0.4090309,0.02952634,0.07606576,0.07645032,0.04499492,0.2378035,0.0305589,0.03052385,1.0,0.005592525,0.00501214,0.182406,0.9950271,0.005043065,0.03049966,0.0362108,0.02606383,0.06126427,0.03300688,0.4839172,0.007669741,0.01084338,0.005458167,0.04911584,0.06589466,0.01304558,0.0170558,0.02911068,0.01025684,0.02972408,0.8158707,0.2076145,0.7802176,0.8954651,0.05531621,0.1929517,0.5032081,0.4850772,0.3646181,0.2018972,0.2251036,0.006909619,0.1707179,0.09966485,0.8436831,3.073555,0.08465137,0.006183443,0.758144,0.890131,0.241205,0.7946118,0.2702304,0.2736024,0.177742,0.5664429,0.2518239,0.001854713,2.172398,0.2583015,0.2544093,0.1525967,0.5093787,0.3830225,0.05873825,0.3128463,0.09665601,0.7291488,0.07576079,0.5306298,0.4013155,0.04441112,0.090139,0.2503599,0.1432559,0.198469,0.05986937,0.00617895,0.329267,0.03073464,0.2476212,0.01466615,0.1668529,0.2068384,0.03652167,0.1895381,0.382592,0.2067111,0.05181971,0.07538855,0.5
std,0.2597889,0.3150461,0.2480699,0.4209212,0.2885,0.219212,0.2585824,0.2726711,0.237311,0.2553531,0.2659341,0.2544961,0.2315906,0.3438308,0.2607221,0.1803908,0.231978,0.3376537,0.246143,0.962658,0.2551763,0.4983693,0.5620717,0.2148595,0.3266507,0.2554524,0.1680401,0.1933114,4.838469,0.2410035,0.3955523,0.2453462,0.4106529,0.1749695,0.2108412,0.2301444,0.6613525,0.2837437,0.3082708,0.2013424,0.4746291,0.3531632,0.2863306,0.2174936,0.2879382,0.1940575,0.3506414,0.6523927,0.3009198,0.1871958,0.2035899,0.5677652,0.4200877,0.3534903,0.3636171,0.3322996,0.113989,0.403688,1.41221,0.2892262,0.8404922,0.2842775,0.9771891,13.53784,0.2578101,0.2728783,0.2868281,0.2637073,0.2079812,0.2553787,0.207108,0.3801992,0.2522731,0.2549396,0.2709011,0.4142484,1.947383,0.2227654,0.225523,3.256852,0.3412346,0.2657361,0.4164299,0.1850898,1.010492,0.2283906,0.400236,0.1975468,0.003128122,0.4480873,0.1918307,0.2638366,0.1860533,0.2452607,0.009659827,0.3640993,10.70107,0.1547044,0.3864085,0.2886387,0.3355419,0.4590131,0.1578508,0.1577355,0.0,0.008235447,0.003414786,0.223189,0.07034322,0.003187398,0.1576621,0.173903,0.1436295,0.5092307,0.165028,0.4995644,0.0258391,0.0762797,0.02155772,0.1644521,0.25058,0.08939283,0.1091609,0.1534273,0.07236549,0.1553145,0.6667089,0.9664251,0.797622,0.2575335,0.4092931,0.2694669,0.5000056,0.4819102,0.24879,1.017668,0.2643853,0.02492217,0.2480132,0.2714814,0.3656005,1.704607,0.3583457,0.03449481,0.297638,0.252932,0.284149,0.4067688,9.982616,0.3556156,0.2391686,0.4955658,0.2455824,0.04302643,2.25396,0.2459568,0.2462288,0.3595984,0.2367615,0.2322263,0.2389644,0.2356049,0.3117686,0.5378275,0.2564335,0.4988635,0.4891377,0.2757776,0.272352,0.4303256,0.3418821,0.222107,0.1934434,0.03441738,0.292732,0.1583953,0.2170809,0.09783733,0.2709314,0.4013835,0.1747473,0.36767,0.240782,0.4012887,0.1802761,0.2191363,0.5
min,-0.4589548,5.02619e-09,-7.588799,9.19228e-09,2.600494e-09,-0.5450055,5.566545e-10,6.285293e-09,-0.000454,1.15455e-07,2.222165e-09,2.597303e-08,1.563241e-08,2.804822e-11,2.810394e-09,-17.28934,-0.02662015,-0.009615277,1e-06,-0.005178168,-2.652748,1.179499e-08,-3.672546,8.043521e-10,7.205206e-09,2.238322e-09,-0.007038857,-1.298901,-0.002958413,6.371256e-08,8.168135e-09,3.717132e-08,2.541465e-09,-0.002005767,4.268392e-10,-0.4055925,1.221753e-07,6.187214e-09,2.353218e-08,-0.01704956,5.377362e-08,1.26229e-08,5.133327e-09,2.823498e-07,-8.456786,-0.1024301,1.709353e-08,-0.009259018,-9.994886,-0.1999987,-0.002822135,3.539404e-09,5.823164e-09,1.558113e-07,5.891222e-08,1.573038e-09,0.0,7.384696e-09,0.0,-0.404109,4.650681e-10,3.655658e-09,6.627088e-09,6.503459e-09,2.101537e-09,1.214633e-09,3.026939e-09,3.381434e-10,-0.2999998,4.363841e-08,-0.039936,2.668066e-09,2.137169e-09,1.357813e-08,9.015968e-07,7.803936e-09,9.567845e-09,2.678714e-08,-7.397486,3.492131e-09,1.444001e-08,3.743007e-10,4.341855e-09,3.121485e-07,6.926695e-09,1.890425e-09,9.892342e-09,8.36379e-10,1.044159e-09,7.861255e-10,1.079826e-07,1.505726e-09,-0.09170651,-2.160207e-05,3.577781e-09,5.042657e-09,6.70242e-10,3.588444e-10,2.011803e-09,5.66056e-10,3.472485e-08,0.0,3.354748e-09,1.621446e-09,1.0,1.066114e-09,4.450989e-09,-3.2e-05,0.0,1.140757e-08,3.842997e-09,3.027279e-09,6.887991e-10,5.544154e-09,3.646872e-09,2.147189e-10,1.041092e-09,9.576146e-10,8.4432e-10,6.488856e-09,1.812521e-08,6.876775e-09,9.098536e-10,2.177146e-09,4.640562e-11,2.677825e-10,-91.99566,-170.237,-90.48382,-2.453451,1.251653e-08,9.620587e-10,3.98222e-09,1.233736e-08,-0.02592385,7.532992e-08,1.235147e-08,1.160856e-09,-7.604273,7.335065e-08,-0.02571099,1.0,5.256835e-07,1.691577e-09,-0.02375,5e-06,-0.976204,7.391609e-08,1.838434e-08,1.95921e-08,1.463474e-11,0.0,3.095199e-07,0.0,-1.0,9.613649e-08,1.134186e-07,0.0,-0.03207547,4.033095e-08,8.578643e-10,-0.04545438,5.37322e-09,-1.0,1.006008e-09,3.235349e-09,2.422876e-10,8.749324e-09,1.5e-05,6.097074e-11,5.168406e-09,-0.015102,1.502323e-09,1.097922e-09,-0.012444,5.008304e-08,6.316773e-08,1.078787e-08,3.307923e-08,3.767347e-10,3.725073e-09,5.011415e-09,-0.014539,5.549692e-09,2.500991e-09,1.226024e-09,0.0
25%,0.3831461,0.005004422,0.01297296,0.04510782,0.003157263,0.1408603,0.00303679,0.006596537,0.071271,0.04959717,0.00488161,0.04076841,0.04050684,0.007421977,0.002697892,0.4337217,0.1992328,0.145068,0.061743,0.01382959,0.03801367,0.00542752,0.05766691,0.003291104,0.007093115,0.005330991,0.0569209,0.5056524,0.02125191,0.007052684,0.006370823,0.008315397,0.00318313,1.002255,0.002621919,0.1025239,0.01036287,0.007792189,0.08562242,0.07785055,0.009882456,0.002636337,0.007773858,0.009930224,0.0101399,0.2746646,0.05236181,0.1783479,0.003113053,0.281868,0.02177945,0.002678512,0.00783009,0.734868,0.1540575,0.004043517,1.0,0.005072662,4.0,0.1868665,0.002668117,0.004716057,0.00258387,0.002619182,0.003454494,0.003780889,0.008200932,0.002708818,0.2097052,0.02605302,0.050622,0.003094692,0.007832754,0.009401552,0.01101555,0.002625527,0.00262172,0.04160418,0.007210084,0.002680951,0.002842415,0.002910792,0.002597457,0.1691121,0.002592268,0.003677915,0.002714222,0.002748728,0.002505839,0.002644008,0.5015207,0.002679575,1.002185,0.03727702,0.002543528,0.002587398,0.002564699,0.002565336,0.002681801,0.002765304,0.002576223,0.0,0.002568609,0.002568628,1.0,0.002532261,0.002498434,0.031431,1.0,0.002510385,0.002568032,0.002581219,0.002555246,0.002563568,0.002565439,0.004797338,0.002542336,0.002510078,0.002505531,0.002711298,0.0026543,0.002519094,0.002534072,0.002561322,0.00251465,0.002562203,0.8964376,0.1335163,0.9016751,0.9703183,0.003299871,0.004712349,0.00497906,0.004989384,0.1677011,0.05164253,0.004985533,0.002519201,0.01279996,0.006191371,1.001038,2.0,0.002705424,0.002500687,0.530199,1.000731,0.057498,1.000495,0.02473918,0.005669821,0.004710862,0.0,0.05225048,0.0,-1.0,0.05211551,0.04867109,0.0,0.3321961,0.1504656,0.002639782,0.1403917,0.002732006,1.0,0.002694157,0.00528037,0.00413782,0.002573613,0.00586,0.003309463,0.002912529,0.071466,0.002813405,0.002506872,0.11612,0.002556519,0.009360612,0.002533513,0.003498978,0.003134602,0.00258237,0.003131629,0.186647,0.003134804,0.002756536,0.003135607,0.0
50%,0.5625103,0.02942747,0.05877124,0.703299,0.006298063,0.1741633,0.006073136,0.03533699,0.162177,0.109176,0.009758762,0.1347901,0.1113896,0.01401212,0.005398416,0.4692355,0.3357309,0.488568,0.130232,0.04040203,0.1489108,1.000515,0.09657723,0.006580791,0.1105292,0.1014485,0.112551,0.60764,0.05392224,0.01833163,0.01985219,0.04198717,0.006369996,1.004836,0.005243638,0.1869017,0.01540953,0.3164492,0.3088271,0.1354045,0.02791527,0.005269804,0.2053714,0.02037786,0.03828194,0.3992524,0.2309713,0.5876889,0.006188776,0.2895144,0.05441826,0.005349964,0.3346476,0.9538994,0.4413869,0.008081816,1.0,0.06040436,5.0,0.1906911,0.005332531,0.009443226,0.005169983,0.005233027,0.006904122,0.007546988,0.01214293,0.005411285,0.4050503,0.1274136,0.092806,0.006192518,0.1443999,0.1412397,0.04501534,0.005252692,0.005243548,0.1484279,0.04083627,0.005361545,0.005678312,0.005819474,0.005188328,0.1727449,0.005175842,0.00735888,0.005427316,0.005494002,0.005007737,0.005283691,0.5044569,0.00535123,1.004794,0.1044943,0.005085576,0.005170199,0.00512213,0.005126172,0.005360844,0.005527807,0.005116417,0.0,0.005125888,0.005129345,1.0,0.005059137,0.00499819,0.098686,1.0,0.005014529,0.005131484,0.005163158,0.005104503,0.005129516,0.005139098,0.009595365,0.00508629,0.005027404,0.005004099,0.005417212,0.005309855,0.005037144,0.005065552,0.005119804,0.005028387,0.005124072,0.9503585,0.1365271,0.956147,0.9733423,0.006534612,0.009426319,0.009963859,0.00996411,0.3270154,0.1361784,0.009964538,0.005039455,0.05780595,0.03991443,1.004028,3.0,0.005427276,0.005007518,0.914035,1.003846,0.145425,1.003662,0.1190709,0.2614354,0.009427318,1.0,0.161817,0.0,3.0,0.1825516,0.1763646,0.0,0.5560139,0.2941498,0.005282578,0.2752673,0.005466611,1.0,0.005383687,0.9995326,0.008285129,0.00514993,0.011803,0.006621373,0.005825953,0.155482,0.005629264,0.005006849,0.219125,0.005113684,0.254032,0.005075855,0.007057276,0.006265835,0.005161997,0.006260988,0.365164,0.006262268,0.005515115,0.006262628,0.5
75%,0.796226,0.2722469,0.220806,1.000138,0.00943625,0.321681,0.009110461,0.2930419,0.298865,0.2302309,0.256222,0.3266556,0.3247141,0.04052321,0.008094985,0.5363462,0.5185463,0.8073282,0.245527,0.1719443,0.3789003,1.005155,0.1650917,0.009867905,0.5646327,0.2056815,0.2012176,0.6774505,0.2501735,0.07783945,0.08352904,0.1834907,0.009552684,1.007419,0.007862033,0.3557537,0.05414962,0.4824219,0.6320478,0.2283556,0.07924614,0.007898211,0.4819464,0.05768172,0.1228015,0.4822684,0.61763,0.8534,0.009228381,0.4463872,0.1929326,0.008023867,0.926433,1.002889,1.000565,0.3600081,1.0,0.6563929,6.0,0.2005785,0.007999611,0.426444,0.007759011,0.007849824,0.5010116,0.2543188,0.02338407,0.008114827,0.5056183,0.3550601,0.160428,0.009285267,0.2929975,0.3392511,0.1414431,0.00787772,0.007861614,0.3093936,0.148428,0.008043585,0.008522708,0.008726515,0.00778388,0.1763483,0.007764302,0.2036849,0.00813586,0.008242629,0.007508783,0.007930521,0.5073917,0.008025196,1.00739,0.2403983,0.007629148,0.007750454,0.007687868,0.007686022,0.008049478,0.008289298,0.007682534,0.0,0.007691052,0.007696218,1.0,0.007587125,0.007496568,0.251968,1.0,0.007518961,0.007697718,0.007747376,0.007659014,0.007697132,0.007710398,1.004785,0.007628316,0.007543065,0.007504336,0.008120608,0.007961166,0.007560548,0.007591859,0.007682962,0.007543435,0.007688466,0.9702758,0.1395348,0.9754663,0.976367,0.009732963,0.3302781,1.004977,0.9654672,0.5159463,0.2626941,0.3401017,0.007561114,0.2194425,0.0805242,1.007013,4.0,0.008044097,0.007510287,1.004149,1.006901,0.274335,1.006828,0.348336,0.4558666,0.2091809,1.0,0.3928329,0.0,4.0,0.4038574,0.3988226,0.0,0.7064388,0.5742237,0.007926349,0.4545678,0.008196925,1.0,0.008073997,1.004321,1.003684,0.007725525,0.077193,0.009939074,0.008728482,0.266088,0.008455752,0.007509169,0.445527,0.007691999,0.2583827,0.007584555,0.5016201,0.009396331,0.007743999,0.009395453,0.550729,0.009393951,0.008271623,0.009395417,1.0
max,1.01,5.389619,1.32406,1.01,3.256284,5.482888,8.988807,1.625262,4.189044,10.11162,5.634724,6.091593,1.599662,140.5655,1.01,16.3199,1.643008,8.971425,40.444644,798.7061,1.25275,1.017768,244.0427,2.676178,27.42486,11.6026,1.01,2.428051,4097.433,7.901939,87.06509,1.767753,1.01,1.01,1.01,3.948271,420.749,1.23141,2.767759,10.95282,276.1753,35.00452,1.261945,2.839635,48.0043,2.12523,1.01,868.5154,56.74432,3.801432,11.10095,330.2694,1.01,1.01,1.01,1.01,1.0,1.01,6.0,175.3314,14.4482,1.01,288.2943,13000.91,3.507386,7.004608,67.03231,3.01,5.308442,1.546055,3.853647,1.26939,4.509085,4.275118,15.13179,46.60665,324.9039,10.22254,15.52364,1499.22,16.00864,18.00687,38.00808,1.509999,231.7589,8.00978,21.00367,12.5026,1.00537,8.009993,3.007715,4.049059,1.01,22.27338,1.005321,38.00753,4837.305,1.01,29.00559,18.50013,12.02397,2.0,1.01,1.01,1.0,1.001528,1.000777,2.66925,1.0,1.004729,1.01,1.01,1.01,45.00603,1.01,1.01,1.009996,1.01,1.009979,3.006422,2.01,1.009999,1.009999,1.01,1.009999,1.01,1.032727,682.6593,1.054369,4.052048,95.47155,1.089998,1.01,1.302976,5.631732,356.6558,6.343187,1.00234,1.327812,9.509053,1.01,7.0,9.008856,1.009995,1.01,1.01,2.060899,1.01,5755.076,5.456269,7.60796,1.0,1.99954,1.0,6.0,1.970907,1.977007,1.0,1.791115,2.009276,100.0013,2.824142,10.00688,1.0,1.01,1.021384,1.01,22.00488,9.028084,1.028044,1.491624,6.039006,1.509999,1.00999,1.01,1.009999,1.75991,1.009998,3.005383,1.01,1.01,1.33991,2.229368,1.01,1.343331,4.82763,1.0


In [14]:
downsampled_data.to_parquet(os.path.join(data_dir, "train_data_downsampled.parquet"))