In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
data = {
    'Age': [25, 30, 35, None, 40],
    'Salary': [50000, 60000, None, 80000, 100000],
    'Country': ['USA', 'Canada', 'USA', 'UK', 'Canada'],
    'Purchased': ['NO', 'YES', 'NO', 'YES', 'NO']
}
df = pd.DataFrame(data)
df['Purchased'] = LabelEncoder().fit_transform(df['Purchased'])
df = pd.get_dummies(df, columns=['Country'], drop_first=True)
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Salary'].fillna(df['Salary'].mean(), inplace=True)
scaler = StandardScaler()
df[['Age', 'Salary']] = scaler.fit_transform(df[['Age', 'Salary']])
X = df.drop('Purchased', axis=1)
y = df['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Training Set:\n", X_train)
print("\nTesting Set:\n", X_test)                                                      

Training Set:
    Age    Salary  Country_UK  Country_USA
4  1.5  1.601112       False        False
2  0.5  0.000000       False         True
0 -1.5 -1.310001       False         True
3  0.0  0.436667        True        False

Testing Set:
    Age    Salary  Country_UK  Country_USA
1 -0.5 -0.727778       False        False


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Salary'].fillna(df['Salary'].mean(), inplace=True)
