# Import Required Libraries
Import the necessary libraries, including pandas, numpy, matplotlib, and sklearn.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load the Dataset
Load the wine dataset from the provided CSV file using pandas.

In [None]:
# Load the wine dataset from the provided CSV file using pandas
wine_data = pd.read_csv('Wine.csv')

# Display the first few rows of the dataset to verify it loaded correctly
wine_data.head()

In [None]:
wine_data.describe()

In [None]:
wine_data.info()

# Preprocess the Data
Handle any missing values and encode categorical variables if necessary.

In [None]:
# Check for missing values in the dataset
missing_values = wine_data.isnull().sum()
print("Missing values in each column:\n", missing_values)

# If there were missing values, we could handle them by filling with the mean of the column
# For example:
# wine_data.fillna(wine_data.mean(), inplace=True)

# Standardize the data before applying PCA
features = wine_data.columns[:-1]  # Exclude the target variable 'Customer_Segment'
x = wine_data.loc[:, features].values
y = wine_data.loc[:, ['Customer_Segment']].values

# Standardize the features
x = StandardScaler().fit_transform(x)

# Standardize the Data
Standardize the dataset to have a mean of 0 and a standard deviation of 1 using StandardScaler from sklearn.

In [None]:
# Standardize the Data

# Standardize the features
x = StandardScaler().fit_transform(x)

# Apply PCA
Apply the PCA algorithm to the standardized data using PCA from sklearn.

In [None]:
# Apply PCA
pca = PCA(n_components=2)  # Reduce to 2 principal components
principal_components = pca.fit_transform(x)

# Create a DataFrame with the principal components
principal_df = pd.DataFrame(data=principal_components, columns=['Principal Component 1', 'Principal Component 2'])

# Concatenate the target variable 'Customer_Segment' with the principal components
final_df = pd.concat([principal_df, wine_data[['Customer_Segment']]], axis=1)

# Display the first few rows of the final DataFrame
final_df.head()

# Visualize the Principal Components
Create scatter plots of the first two principal components to visualize the data.

In [None]:
# Visualize the Principal Components

# Create a scatter plot of the first two principal components
plt.figure(figsize=(10, 6))
plt.scatter(final_df['Principal Component 1'], final_df['Principal Component 2'], c=final_df['Customer_Segment'], cmap='viridis', edgecolor='k', s=100)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('PCA of Wine Dataset')
plt.colorbar(label='Customer Segment')
plt.show()

# Interpret the Results
Analyze the principal components to understand the variance captured and how well they distinguish between different types of wine.

In [None]:
# Analyze the explained variance ratio to understand how much variance is captured by each principal component
explained_variance = pca.explained_variance_ratio_
print("Explained variance by each principal component:\n", explained_variance)

# Create a bar plot to visualize the explained variance ratio
plt.figure(figsize=(8, 5))
plt.bar(range(1, len(explained_variance) + 1), explained_variance, alpha=0.7, align='center', color='blue')
plt.xlabel('Principal Components')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance Ratio by Principal Components')
plt.show()

In [None]:
# Analyze the loadings to understand the contribution of each feature to the principal components
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
loadings_df = pd.DataFrame(loadings, columns=['Principal Component 1', 'Principal Component 2'], index=features)
print("Loadings of each feature on the principal components:\n", loadings_df)

# Visualize the loadings of the features on the principal components
plt.figure(figsize=(10, 6))
plt.scatter(loadings_df['Principal Component 1'], loadings_df['Principal Component 2'], s=100)
for i, feature in enumerate(features):
    plt.text(loadings_df['Principal Component 1'][i], loadings_df['Principal Component 2'][i], feature, fontsize=12)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Loadings of Features on Principal Components')
plt.grid()
plt.show()