In [2]:

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.model_selection import train_test_split

In [3]:
# Load dataset Iris
iris = load_iris()
X = iris.data
y = iris.target

In [4]:
# Buat DataFrame
df = pd.DataFrame(data=X, columns=[f'feature_{i}' for i in range(X.shape[1])])
df['target'] = y

In [5]:
# Tampilan DataFrame asli
print("DataFrame Asli:")
df.head(10)
df

DataFrame Asli:


Unnamed: 0,feature_0,feature_1,feature_2,feature_3,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [6]:
# Random Sampling
sampled_df = df.sample(n=50, random_state=1) 
print("DataFrame Setelah Random Sampling:")
sampled_df

DataFrame Setelah Random Sampling:


Unnamed: 0,feature_0,feature_1,feature_2,feature_3,target
14,5.8,4.0,1.2,0.2,0
98,5.1,2.5,3.0,1.1,1
75,6.6,3.0,4.4,1.4,1
16,5.4,3.9,1.3,0.4,0
131,7.9,3.8,6.4,2.0,2
56,6.3,3.3,4.7,1.6,1
141,6.9,3.1,5.1,2.3,2
44,5.1,3.8,1.9,0.4,0
29,4.7,3.2,1.6,0.2,0
120,6.9,3.2,5.7,2.3,2


# Stratified Sampling 

In [7]:
stratified_sample = df.groupby('target', group_keys=False).apply(lambda x: x.sample(2, random_state=1))
print("DataFrame Setelah Stratified Sampling:")
stratified_sample

DataFrame Setelah Stratified Sampling:


  stratified_sample = df.groupby('target', group_keys=False).apply(lambda x: x.sample(2, random_state=1))


Unnamed: 0,feature_0,feature_1,feature_2,feature_3,target
27,5.2,3.5,1.5,0.2,0
35,5.0,3.2,1.2,0.2,0
77,6.7,3.0,5.0,1.7,1
85,6.0,3.4,4.5,1.6,1
127,6.1,3.0,4.9,1.8,2
135,7.7,3.0,6.1,2.3,2


In [8]:
# Seleksi Fitur dengan ANOVA (f_classif)
k_best = SelectKBest(score_func=f_classif, k=2)  # Pilih 2 fitur terbaik
X_selected = k_best.fit_transform(X, y)

In [9]:
# Tampilan DataFrame setelah seleksi fitur
selected_df = pd.DataFrame(data=X_selected, columns=['selected_feature_1', 'selected_feature_2'])
selected_df['target'] = y
print("\nDataFrame Setelah Seleksi Fitur:")
print(selected_df.head(10))


DataFrame Setelah Seleksi Fitur:
   selected_feature_1  selected_feature_2  target
0                 1.4                 0.2       0
1                 1.4                 0.2       0
2                 1.3                 0.2       0
3                 1.5                 0.2       0
4                 1.4                 0.2       0
5                 1.7                 0.4       0
6                 1.4                 0.3       0
7                 1.5                 0.2       0
8                 1.4                 0.2       0
9                 1.5                 0.1       0


In [10]:
# Train-Test Split (80% train, 20% test)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=1)
print("\nDataFrame Train Setelah Train-Test Split:")
print(train_df.head(10))
print("\nDataFrame Test Setelah Train-Test Split:")
print(test_df.head(10))


DataFrame Train Setelah Train-Test Split:
     feature_0  feature_1  feature_2  feature_3  target
91         6.1        3.0        4.6        1.4       1
135        7.7        3.0        6.1        2.3       2
69         5.6        2.5        3.9        1.1       1
128        6.4        2.8        5.6        2.1       2
114        5.8        2.8        5.1        2.4       2
48         5.3        3.7        1.5        0.2       0
53         5.5        2.3        4.0        1.3       1
28         5.2        3.4        1.4        0.2       0
54         6.5        2.8        4.6        1.5       1
108        6.7        2.5        5.8        1.8       2

DataFrame Test Setelah Train-Test Split:
     feature_0  feature_1  feature_2  feature_3  target
14         5.8        4.0        1.2        0.2       0
98         5.1        2.5        3.0        1.1       1
75         6.6        3.0        4.4        1.4       1
16         5.4        3.9        1.3        0.4       0
131        7.9     

In [11]:
# Seleksi Fitur dengan ANOVA (f_classif)
k_best = SelectKBest(score_func=f_classif, k=2)  # Pilih 2 fitur terbaik
X_selected = k_best.fit_transform(X, y)

# Tampilan DataFrame setelah seleksi fitur
selected_df = pd.DataFrame(data=X_selected, columns=['selected_feature_1', 'selected_feature_2'])
selected_df['target'] = y
print("\nDataFrame Setelah Seleksi Fitur:")
print(selected_df.head(10))


DataFrame Setelah Seleksi Fitur:
   selected_feature_1  selected_feature_2  target
0                 1.4                 0.2       0
1                 1.4                 0.2       0
2                 1.3                 0.2       0
3                 1.5                 0.2       0
4                 1.4                 0.2       0
5                 1.7                 0.4       0
6                 1.4                 0.3       0
7                 1.5                 0.2       0
8                 1.4                 0.2       0
9                 1.5                 0.1       0
