In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Create the dataset
data = {
    'Country': ['France', 'Spain', 'Germany', 'Spain', 'Germany', 
               'France', 'Spain', 'France', 'Germany', 'France'],
    'Age': [44, 27, 30, 38, 40, 35, 31, 48, 50, 37],
    'Salary': [72000, 48000, 54000, 61000, 85000, 
              58000, 52000, 79000, 83000, 67000],
    'Purchased': ['No', 'Yes', 'No', 'No', 'Yes', 
                 'Yes', 'No', 'Yes', 'No', 'Yes']
}
df = pd.DataFrame(data)

# Select numerical features
numerical_features = df[['Age', 'Salary']]

# Standardization (Z-score normalization)
scaler_standard = StandardScaler()
standardized_features = scaler_standard.fit_transform(numerical_features)
df_standardized = pd.DataFrame(standardized_features, columns=['Age_Standardized', 'Salary_Standardized'])

# Normalization (Min-Max scaling)
scaler_minmax = MinMaxScaler()
normalized_features = scaler_minmax.fit_transform(numerical_features)
df_normalized = pd.DataFrame(normalized_features, columns=['Age_Normalized', 'Salary_Normalized'])

# Combine results
result = pd.concat([df, df_standardized, df_normalized], axis=1)
print("Dataset with scaled features:")
print(result[['Country', 'Age', 'Salary', 'Age_Standardized', 
              'Salary_Standardized', 'Age_Normalized', 'Salary_Normalized', 'Purchased']])

Dataset with scaled features:
   Country  Age  Salary  Age_Standardized  Salary_Standardized  \
0   France   44   72000          0.825723             0.480912   
1    Spain   27   48000         -1.513825            -1.411200   
2  Germany   30   54000         -1.100964            -0.938172   
3    Spain   38   61000          0.000000            -0.386306   
4  Germany   40   85000          0.275241             1.505805   
5   France   35   58000         -0.412861            -0.622820   
6    Spain   31   52000         -0.963343            -1.095848   
7   France   48   79000          1.376205             1.032778   
8  Germany   50   83000          1.651446             1.348129   
9   France   37   67000         -0.137620             0.086722   

   Age_Normalized  Salary_Normalized Purchased  
0        0.739130           0.648649        No  
1        0.000000           0.000000       Yes  
2        0.130435           0.162162        No  
3        0.478261           0.351351        No 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Standardize the data first
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform PCA
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

# Determine optimal number of components
explained_variance = pca.explained_variance_ratio_
cumulative_variance = explained_variance.cumsum()

print("Explained variance ratio:", explained_variance)
print("Cumulative explained variance:", cumulative_variance)

# Plot explained variance
plt.figure(figsize=(8, 4))
plt.bar(range(1, len(explained_variance)+1), explained_variance, alpha=0.5, align='center', label='Individual explained variance')
plt.step(range(1, len(cumulative_variance)+1), cumulative_variance, where='mid', label='Cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal component index')
plt.legend(loc='best')
plt.title('Explained Variance by Principal Components')
plt.show()

# Visualize in 2D space (first two principal components)
plt.figure(figsize=(8, 6))
colors = ['navy', 'turquoise', 'darkorange']
lw = 2

for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_pca[y == i, 0], X_pca[y == i, 1], color=color, alpha=.8, lw=lw, label=target_name)
    
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA of IRIS dataset')
plt.xlabel('Principal Component 1 (%.2f%%)' % (explained_variance[0]*100))
plt.ylabel('Principal Component 2 (%.2f%%)' % (explained_variance[1]*100))
plt.show()

# Create DataFrame with PCA results
pca_df = pd.DataFrame(data=X_pca[:, :2], columns=['PC1', 'PC2'])
pca_df['Species'] = y
pca_df['Species'] = pca_df['Species'].map({0: target_names[0], 1: target_names[1], 2: target_names[2]})

print("\nFirst two principal components:")
print(pca_df.head())