# LG Customer Segmentation Analysis
This notebook performs customer segmentation using K-Means clustering.

In [None]:
import pandas as pd

df = pd.read_csv("LG_Customer_Segmentation_Dataset.csv")
df.head()

## Data Preprocessing

In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

df_preprocessed = df.drop("Customer_ID", axis=1).copy()

label_encoders = {}
for column in ["Gender", "Income_Level", "Region"]:
    le = LabelEncoder()
    df_preprocessed[column] = le.fit_transform(df_preprocessed[column])
    label_encoders[column] = le

scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df_preprocessed), columns=df_preprocessed.columns)
df_scaled.head()

## Determine Optimal Number of Clusters with Elbow Method

In [None]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(df_scaled)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), inertia, marker='o')
plt.title("Elbow Method for Optimal k")
plt.xlabel("Number of clusters")
plt.ylabel("Inertia")
plt.grid(True)
plt.show()

## Apply K-Means Clustering

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
df_preprocessed['Cluster'] = kmeans.fit_predict(df_scaled)
df_clustered = pd.concat([df[['Customer_ID']], df_preprocessed], axis=1)
df_clustered.head()

## Visualize Clusters Using PCA

In [None]:
from sklearn.decomposition import PCA
import seaborn as sns

pca = PCA(n_components=2)
pca_components = pca.fit_transform(df_scaled)
df_visual = pd.DataFrame(pca_components, columns=["PCA1", "PCA2"])
df_visual["Cluster"] = df_preprocessed["Cluster"]

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_visual, x="PCA1", y="PCA2", hue="Cluster", palette="Set1", s=100)
plt.title("Customer Segments Visualized Using PCA")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend(title="Cluster")
plt.grid(True)
plt.show()

## Cluster Profiles

In [None]:
df_preprocessed.groupby("Cluster").mean().round(2)