# Implement Dimensionality reduction using Principle Component Analysis (PCA) method on a dataset (For example Iris). 

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

# Load the DataFrame
df = pd.read_csv('./diabetes.csv')

# Get the list of numeric columns
numeric_columns = df.select_dtypes(include='number').columns.tolist()

# Apply MinMaxScaler to the numeric columns
scaler = MinMaxScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Perform PCA
pca = PCA()
pca.fit(df[numeric_columns])

# Transform the data into principal components
df_pca = pca.transform(df[numeric_columns])

# Create a DataFrame from the principal components
df_pca = pd.DataFrame(df_pca, columns=[f'PC{i+1}' for i in range(df_pca.shape[1])])

# Concatenate the principal components with non-numeric columns if any
df_final = pd.concat([df.drop(columns=numeric_columns), df_pca], axis=1)

# Print the DataFrame with principal components
print(df_final)


          PC1       PC2       PC3       PC4       PC5       PC6       PC7  \
0    0.706994  0.153130  0.053935  0.014182  0.008782  0.077709  0.120925   
1   -0.402711 -0.088188 -0.029916  0.074931 -0.058812  0.056709  0.139708   
2    0.689631  0.104960 -0.228744 -0.133842  0.074967 -0.044311 -0.278429   
3   -0.419790 -0.195420 -0.047320  0.079442 -0.053987 -0.081378  0.042092   
4    0.670742 -0.406714  0.153529 -0.144157  0.373100  0.618213 -0.037188   
..        ...       ...       ...       ...       ...       ...       ...   
763 -0.239673  0.571125  0.316590  0.148371  0.142716 -0.078587  0.292774   
764 -0.360620 -0.084038  0.052959  0.008006 -0.066171 -0.039148  0.002710   
765 -0.343772  0.072610  0.043200  0.015431  0.027228 -0.124067 -0.042639   
766  0.623114 -0.017950 -0.287323 -0.160894 -0.144410  0.084932  0.198446   
767 -0.407386 -0.174975  0.004435  0.108058 -0.082412 -0.002458  0.048259   

          PC8       PC9  
0   -0.081670  0.135842  
1    0.014610  0.082566

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Load the dataset
df = pd.read_csv('./diabetes.csv')

# Separate features (X) and target (y)
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

# Analyze explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_
print("Explained Variance Ratio:", explained_variance_ratio)

# Determine the number of principal components to retain
# For example, let's retain 95% of the variance
total_variance = 0
num_components = 0
for variance in explained_variance_ratio:
    total_variance += variance
    num_components += 1
    if total_variance >= 0.95:
        break

print("Number of components to retain:", num_components)

# Apply PCA with the chosen number of components
pca = PCA(n_components=num_components)
X_pca = pca.fit_transform(X_scaled)


Explained Variance Ratio: [0.26179749 0.21640127 0.12870373 0.10944113 0.09529305 0.08532855
 0.05247702 0.05055776]
Number of components to retain: 8


In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Load the dataset
df = pd.read_csv('./diabetes.csv')

# Separate features (X) and target variable (y)
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA
pca = PCA(n_components=2)  # We'll reduce the dataset to 2 principal components for visualization
X_pca = pca.fit_transform(X_scaled)

# Create a new DataFrame with the principal components
df_pca = pd.DataFrame(data=X_pca, columns=['PCA1', 'PCA2'])

# Concatenate the target variable with the principal components DataFrame
df_pca['Outcome'] = y

# Display the transformed dataset
print(df_pca.head())


       PCA1      PCA2  Outcome
0  1.068503  1.234895        1
1 -1.121683 -0.733852        0
2 -0.396477  1.595876        1
3 -1.115781 -1.271241        0
4  2.359334 -2.184819        1
