In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification, load_breast_cancer
from sklearn.cluster import KMeans

# Sampling Algorithms

In [2]:
cancer = load_breast_cancer()
data = pd.DataFrame(cancer.data, columns=cancer.feature_names)
data["Target"] = cancer.target
data.loc[data["Target"] == 0, "Target"] = "malignant" # ממאיר
data.loc[data["Target"] == 1, "Target"] = "benign" # שפיר
data.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,malignant
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,malignant
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,malignant
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,malignant
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,malignant


In [3]:
data["Target"].value_counts()

benign       357
malignant    212
Name: Target, dtype: int64

In [4]:
data["Target"].value_counts(normalize=True)

benign       0.627417
malignant    0.372583
Name: Target, dtype: float64

* **Sampling** is a *process used in statistical analysis in which a predetermined number of observations are taken from a larger population.*

---

## 1. Simple Random Sampling
* **Simple random sampling** is the *basic sampling technique where we select a group of subjects (a sample) for study from a larger group (a population).* Each individual is chosen entirely by chance and each member of the population has an equal chance of being included in the sample. Every possible sample of a given size has the same chance of selection. 

![](https://research-methodology.net/wp-content/uploads/2015/04/Simple-random-sampling2.png)

In [8]:
data.sample(n=5)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
193,12.34,26.86,81.15,477.4,0.1034,0.1353,0.1085,0.04562,0.1943,0.06937,...,39.34,101.7,768.9,0.1785,0.4706,0.4425,0.1459,0.3215,0.1205,malignant
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,malignant
558,14.59,22.68,96.39,657.1,0.08473,0.133,0.1029,0.03736,0.1454,0.06147,...,27.27,105.9,733.5,0.1026,0.3171,0.3662,0.1105,0.2258,0.08004,benign
554,12.88,28.92,82.5,514.3,0.08123,0.05824,0.06195,0.02343,0.1566,0.05708,...,35.74,88.84,595.7,0.1227,0.162,0.2439,0.06493,0.2372,0.07242,benign
118,15.78,22.91,105.7,782.6,0.1155,0.1752,0.2133,0.09479,0.2096,0.07331,...,30.5,130.3,1272.0,0.1855,0.4925,0.7356,0.2034,0.3274,0.1252,malignant


In [10]:
data.sample(frac=0.25)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
464,13.17,18.22,84.28,537.3,0.07466,0.05994,0.04859,0.02870,0.1454,0.05549,...,23.89,95.10,687.6,0.1282,0.19650,0.18760,0.10450,0.2235,0.06925,benign
49,13.49,22.30,86.91,561.0,0.08752,0.07698,0.04751,0.03384,0.1809,0.05718,...,31.82,99.00,698.8,0.1162,0.17110,0.22820,0.12820,0.2871,0.06917,benign
52,11.94,18.24,75.71,437.6,0.08261,0.04751,0.01972,0.01349,0.1868,0.06110,...,21.33,83.67,527.2,0.1144,0.08906,0.09203,0.06296,0.2785,0.07408,benign
376,10.57,20.22,70.15,338.3,0.09073,0.16600,0.22800,0.05941,0.2188,0.08450,...,22.82,76.51,351.9,0.1143,0.36190,0.60300,0.14650,0.2597,0.12000,benign
240,13.64,15.60,87.38,575.3,0.09423,0.06630,0.04705,0.03731,0.1717,0.05660,...,19.05,94.11,683.4,0.1278,0.12910,0.15330,0.09222,0.2530,0.06510,benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,13.16,20.54,84.06,538.7,0.07335,0.05275,0.01800,0.01256,0.1713,0.05888,...,28.46,95.29,648.3,0.1118,0.16460,0.07698,0.04195,0.2687,0.07429,benign
437,14.04,15.98,89.78,611.2,0.08458,0.05895,0.03534,0.02944,0.1714,0.05898,...,21.58,101.20,750.0,0.1195,0.12520,0.11170,0.07453,0.2725,0.07234,benign
93,13.45,18.30,86.60,555.1,0.10220,0.08165,0.03974,0.02780,0.1638,0.05710,...,25.94,97.59,699.4,0.1339,0.17510,0.13810,0.07911,0.2678,0.06603,benign
148,14.44,15.18,93.97,640.1,0.09970,0.10210,0.08487,0.05532,0.1724,0.06081,...,19.85,108.60,766.9,0.1316,0.27350,0.31030,0.15990,0.2691,0.07683,benign


## 2. Stratified Sampling

---

* **Stratified random sampling** is a method of sampling that *involves the division of a population into smaller sub-groups known* as **strata** In stratified random sampling or stratification, the strata are formed based on members' shared attributes or characteristics such as income or educational attainment.

* **Stratified random sampling** is also called *proportional random sampling or quota random sampling.*

<img src="https://www.qualtrics.com/m/assets/wp-content/uploads/2021/08/Screen-Shot-2021-08-31-at-10.17.31-AM.png" alt="Drawing" style="width: 500px;"/>


In [11]:
X_train, X_test, y_train, y_test = train_test_split(data.drop(columns=["Target"]), 
                                                    data["Target"],
                                                    stratify=data["Target"],
                                                    test_size=0.2)

In [12]:
y_train.value_counts(normalize=True)

benign       0.626374
malignant    0.373626
Name: Target, dtype: float64

In [13]:
y_test.value_counts(normalize=True)

benign       0.631579
malignant    0.368421
Name: Target, dtype: float64

## 3. Systematic Sampling

Systematic sampling is defined as a probability sampling approach where the elements from a target population are selected from a random starting point and after a fixed sampling interval.

We calculate the sampling interval by dividing the entire population size by the desired sample size.

Note that, Systematic Sampling usually produces a random sample but <b>is not addressing the bias in the created sample</b>.

In [14]:
def systematic_sampling(df, step): 
    indexes = np.arange(0, len(df), step=step)
    systematic_sample = df.iloc[indexes]
    return systematic_sample

In [15]:
systematic_sampling(data, 5)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.1622,0.66560,0.7119,0.26540,0.4601,0.11890,malignant
5,12.45,15.70,82.57,477.1,0.12780,0.17000,0.15780,0.08089,0.2087,0.07613,...,23.75,103.40,741.6,0.1791,0.52490,0.5355,0.17410,0.3985,0.12440,malignant
10,16.02,23.24,102.70,797.8,0.08206,0.06669,0.03299,0.03323,0.1528,0.05697,...,33.88,123.80,1150.0,0.1181,0.15510,0.1459,0.09975,0.2948,0.08452,malignant
15,14.54,27.54,96.73,658.8,0.11390,0.15950,0.16390,0.07364,0.2303,0.07077,...,37.13,124.10,943.2,0.1678,0.65770,0.7026,0.17120,0.4218,0.13410,malignant
20,13.08,15.71,85.63,520.0,0.10750,0.12700,0.04568,0.03110,0.1967,0.06811,...,20.49,96.09,630.5,0.1312,0.27760,0.1890,0.07283,0.3184,0.08183,benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,13.62,23.23,87.19,573.2,0.09246,0.06747,0.02974,0.02443,0.1664,0.05801,...,29.09,97.58,729.8,0.1216,0.15170,0.1049,0.07174,0.2642,0.06953,benign
550,10.86,21.48,68.51,360.5,0.07431,0.04227,0.00000,0.00000,0.1661,0.05948,...,24.77,74.08,412.3,0.1001,0.07348,0.0000,0.00000,0.2458,0.06592,benign
555,10.29,27.61,65.67,321.4,0.09030,0.07658,0.05999,0.02738,0.1593,0.06127,...,34.91,69.57,357.6,0.1384,0.17100,0.2000,0.09127,0.2226,0.08283,benign
560,14.05,27.15,91.38,600.4,0.09929,0.11260,0.04462,0.04304,0.1537,0.06171,...,33.17,100.20,706.7,0.1241,0.22640,0.1326,0.10480,0.2250,0.08321,benign


## 4. Cluster Sampling

Cluster sampling is a probability sampling technique where we divide the population into multiple clusters(groups) based on certain clustering criteria. Then we select a random cluster(s) with simple random or systematic sampling techniques. So, in cluster sampling, the entire population is divided into clusters or segments and then cluster(s) are randomly selected.

Basic idea:
* Evaluate K-Means. 
* Sample <strong>equal number of observations</strong> from each cluster.

Note that, Systematic Sampling usually produces a random sample but is not addressing the bias in the created sample.


In [18]:
kmeans = KMeans(n_clusters = 4, n_init="auto")
kmeans.fit(data.drop(columns=["Target"]))

In [19]:
data["Cluster"] = kmeans.labels_
data

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target,Cluster
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,malignant,1
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,malignant,1
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,malignant,1
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,malignant,2
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,malignant,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,malignant,1
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,malignant,1
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,malignant,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,malignant,1


In [23]:
data.groupby("Cluster").sample(n=3)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target,Cluster
19,13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,...,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259,benign,0
536,14.27,22.55,93.77,629.8,0.1038,0.1154,0.1463,0.06139,0.1926,0.05982,...,104.3,728.3,0.138,0.2733,0.4234,0.1362,0.2698,0.08351,malignant,0
157,16.84,19.46,108.4,880.2,0.07445,0.07223,0.0515,0.02771,0.1844,0.05268,...,120.3,1032.0,0.08774,0.171,0.1882,0.08436,0.2527,0.05972,benign,0
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,malignant,1
161,19.19,15.94,126.3,1157.0,0.08694,0.1185,0.1193,0.09667,0.1741,0.05176,...,146.6,1495.0,0.1124,0.2016,0.2264,0.1777,0.2443,0.06251,malignant,1
244,19.4,23.5,129.1,1155.0,0.1027,0.1558,0.2049,0.08886,0.1978,0.06,...,144.9,1417.0,0.1463,0.2968,0.3458,0.1564,0.292,0.07614,malignant,1
497,12.47,17.31,80.45,480.1,0.08928,0.0763,0.03609,0.02369,0.1526,0.06046,...,92.82,607.3,0.1276,0.2506,0.2028,0.1053,0.3035,0.07661,benign,2
405,10.94,18.59,70.39,370.0,0.1004,0.0746,0.04944,0.02932,0.1486,0.06615,...,82.76,472.4,0.1363,0.1644,0.1412,0.07887,0.2251,0.07732,benign,2
107,12.36,18.54,79.01,466.7,0.08477,0.06815,0.02643,0.01921,0.1602,0.06066,...,85.56,544.1,0.1184,0.1963,0.1937,0.08442,0.2983,0.07185,benign,2
339,23.51,24.27,155.1,1747.0,0.1069,0.1283,0.2308,0.141,0.1797,0.05506,...,202.4,2906.0,0.1515,0.2678,0.4819,0.2089,0.2593,0.07738,malignant,3


## Random Undersampling and Oversampling

---

![](https://miro.medium.com/max/700/0*u6pKLqdCDsG_5kXa.png)

* A widely adopted technique for dealing with highly imbalanced datasets is called resampling. It consists of *removing samples from the majority class* (**under-sampling**) and/or *adding more examples from the minority class* (**over-sampling**).

In [24]:
x = 5 
noise = np.random.random()
print(x + noise)
print(x - noise)

5.098312480027333
4.901687519972667


In [None]:
X, y = make_classification(
    n_classes=2, class_sep=1.5, weights=[0.9, 0.1],
    n_informative=3, n_redundant=1, flip_y=0,
    n_features=20, n_clusters_per_class=1,
    n_samples=100, random_state=10
)
X = pd.DataFrame(X)
X['Target'] = y

We can now do random oversampling and undersampling using:

In [None]:
num_0 = len(X[X['Target']==0])
num_1 = len(X[X['Target']==1])

# random undersample
undersampled_data = pd.concat([X[X['Target']==0].sample(num_1, replace=True) , X[X['Target']==1] ])
print(len(undersampled_data))

In [None]:
# random oversample
oversampled_data = pd.concat([X[X['Target']==1] , X[X['Target']==0].sample(num_0, replace=True) ])
print(len(oversampled_data))

In [25]:
data["Target"].value_counts()

benign       357
malignant    212
Name: Target, dtype: int64

In [None]:
pd.concat([df1, df2, df3, df4, df5])

In [29]:
data[data["Target"] == "malignant"].sample(frac=0.2)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target,Cluster
28,15.3,25.27,102.4,732.4,0.1082,0.1697,0.1683,0.08751,0.1926,0.0654,...,149.3,1269.0,0.1641,0.611,0.6335,0.2024,0.4027,0.09876,malignant,0
323,20.34,21.51,135.9,1264.0,0.117,0.1875,0.2565,0.1504,0.2569,0.0667,...,171.1,1938.0,0.1592,0.4492,0.5344,0.2685,0.5558,0.1024,malignant,1
26,14.58,21.53,97.41,644.8,0.1054,0.1868,0.1425,0.08783,0.2252,0.06924,...,122.4,896.9,0.1525,0.6643,0.5539,0.2701,0.4264,0.1275,malignant,0
389,19.55,23.21,128.9,1174.0,0.101,0.1318,0.1856,0.1021,0.1989,0.05884,...,142.0,1313.0,0.1251,0.2414,0.3829,0.1825,0.2576,0.07602,malignant,1
34,16.13,17.88,107.0,807.2,0.104,0.1559,0.1354,0.07752,0.1998,0.06515,...,132.7,1261.0,0.1446,0.5804,0.5274,0.1864,0.427,0.1233,malignant,0
330,16.03,15.51,105.8,793.2,0.09491,0.1371,0.1204,0.07041,0.1782,0.05976,...,124.3,1070.0,0.1435,0.4478,0.4956,0.1981,0.3019,0.09124,malignant,0
177,16.46,20.11,109.3,832.9,0.09831,0.1556,0.1793,0.08866,0.1794,0.06323,...,123.5,981.2,0.1415,0.4667,0.5862,0.2035,0.3054,0.09519,malignant,0
254,19.45,19.33,126.5,1169.0,0.1035,0.1188,0.1379,0.08591,0.1776,0.05647,...,163.1,1972.0,0.1497,0.3161,0.4317,0.1999,0.3379,0.0895,malignant,1
244,19.4,23.5,129.1,1155.0,0.1027,0.1558,0.2049,0.08886,0.1978,0.06,...,144.9,1417.0,0.1463,0.2968,0.3458,0.1564,0.292,0.07614,malignant,1
164,23.27,22.04,152.1,1686.0,0.08439,0.1145,0.1324,0.09702,0.1801,0.05553,...,184.2,2403.0,0.1228,0.3583,0.3948,0.2346,0.3589,0.09187,malignant,3


In [30]:
data["Target"].value_counts()

benign       357
malignant    212
Name: Target, dtype: int64

In [32]:
data[data["Target"] == "malignant"]

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target,Cluster
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,184.60,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.11890,malignant,1
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,158.80,1956.0,0.1238,0.1866,0.2416,0.1860,0.2750,0.08902,malignant,1
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,152.50,1709.0,0.1444,0.4245,0.4504,0.2430,0.3613,0.08758,malignant,1
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.17300,malignant,2
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,152.20,1575.0,0.1374,0.2050,0.4000,0.1625,0.2364,0.07678,malignant,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,20.92,25.09,143.00,1347.0,0.10990,0.22360,0.31740,0.14740,0.2149,0.06879,...,179.10,1819.0,0.1407,0.4186,0.6599,0.2542,0.2929,0.09873,malignant,1
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,166.10,2027.0,0.1410,0.2113,0.4107,0.2216,0.2060,0.07115,malignant,1
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,155.00,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,malignant,1
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,126.70,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.07820,malignant,0


In [33]:
samples = data[data["Target"] == "malignant"].sample(frac=0.3)
new_data = pd.concat([samples, data])
new_data["Target"].value_counts()

benign       357
malignant    276
Name: Target, dtype: int64

In [36]:
samples = data[data["Target"] == "malignant"].sample(frac=0.3)
samples_copy = samples.drop(columns=["Target", "Cluster"])
samples_copy.shape

(64, 30)

In [45]:
noises = np.random.random(size=samples_copy.shape)
res = samples_copy + noises
res["Target"] = samples["Target"].copy()
res["Cluster"] = samples["Cluster"].copy()
new_data = pd.concat([res, data])
new_data["Target"].value_counts()

benign       357
malignant    276
Name: Target, dtype: int64

In [49]:
res

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target,Cluster
564,22.167267,23.231978,142.228516,1479.869960,0.598686,0.694460,1.146369,0.640413,0.232274,0.599067,...,167.015471,2027.087202,0.969466,1.170026,0.877978,0.606224,0.467054,0.618982,malignant,1
300,19.553608,19.773977,130.117844,1217.282296,0.612417,1.022322,0.946904,0.317682,0.560424,0.659873,...,171.799013,2053.122415,0.957948,0.655264,1.052444,0.828543,0.604817,0.164715,malignant,1
32,17.268818,24.646654,113.143577,899.545381,0.678802,0.212098,0.264427,0.819690,0.594307,0.934033,...,136.439179,1344.323712,0.633782,0.634471,1.078094,1.183816,1.135285,0.401484,malignant,1
53,18.738495,18.891994,120.305820,1033.209554,0.685337,0.339200,0.604939,0.475817,1.066395,0.482390,...,135.501784,1321.881715,0.174240,1.012271,0.906830,1.074606,0.915771,0.229560,malignant,1
223,16.172123,20.275363,103.235699,761.577438,0.177545,0.469726,0.991346,0.598814,0.726602,0.386115,...,126.338959,1088.841325,0.309909,0.649310,0.999956,0.623345,1.278279,0.159896,malignant,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,18.771883,17.505945,118.362030,990.509417,0.335936,1.024021,0.800110,0.939750,0.367864,0.916816,...,133.399876,1292.925454,0.948501,0.480628,0.484864,0.905024,1.221827,0.692192,malignant,1
261,17.701695,23.297344,111.592776,933.830569,0.467290,0.322900,0.777837,0.048405,1.155608,1.042092,...,129.060753,1218.964258,1.015124,1.039716,0.993997,0.322909,0.476811,0.093032,malignant,1
563,21.377251,25.750812,143.361103,1347.075174,0.521268,0.855641,0.473883,0.965587,0.329304,0.965419,...,179.186971,1819.671162,0.314214,1.126320,1.209912,0.462289,1.234445,0.271910,malignant,1
15,15.103424,28.176348,96.734149,658.821901,0.469290,0.415057,0.856840,0.248906,0.717693,0.509380,...,124.982208,943.496047,0.407786,1.011078,0.897003,0.886646,0.725815,0.523625,malignant,0
