In [8]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
dataset = pd.read_csv('Customer Segmentation.csv')

# Print the first few rows of the loaded dataset
print(dataset.head())

# Select the relevant columns for demographic segmentation
selected_columns = ['country_code', 'latitude', 'longitude', 'city']
print("Selected Columns:", selected_columns)

# Create a subset of the data with selected columns
data = dataset[selected_columns]
print("Subset Data Shape:", data.shape)

# Drop rows with missing values
data.dropna(inplace=True)

# Encode categorical column 'country_code'
label_encoder = LabelEncoder()
data['country_code'] = label_encoder.fit_transform(data['country_code'])

# Encode categorical column 'city'
data['city'] = label_encoder.fit_transform(data['city'])

# Standardize the numeric features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

# Determine the optimal number of clusters using the elbow method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(scaled_data)
    wcss.append(kmeans.inertia_)

# Plot the elbow method
plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), wcss, marker='o', linestyle='--')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()

# Based on the elbow plot, choose the optimal number of clusters
optimal_clusters = 3

# Apply K-Means clustering
kmeans = KMeans(n_clusters=optimal_clusters, init='k-means++', random_state=42)
data['cluster'] = kmeans.fit_predict(scaled_data)

# Plot the clusters on a map (latitude vs longitude)
plt.figure(figsize=(10, 6))
sns.scatterplot(x='longitude', y='latitude', hue='cluster', data=data, palette='viridis')
plt.title('Demographic Segmentation')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Cluster')
plt.show()


  first_name last_name      title   gender                   email  \
0  Priscella   Pollett  Honorable  Agender   ppollett0@foxnews.com   
1        Ana      Onge         Mr     Male          aonge1@fda.gov   
2      Butch     Rawls        Mrs     Male  brawls2@slideshare.net   
3     Ambros   Fairrie        Rev     Male   afairrie3@reuters.com   
4    Gaylene   Titcomb  Honorable   Female    gtitcomb4@dyndns.org   

           city   country country_code   latitude   longitude  ...  \
0       Sesheke    Zambia           ZM -17.473886   24.295514  ...   
1  Querecotillo      Peru           PE  -4.838304  -80.648215  ...   
2       Licupis      Peru           PE  -6.424052  -79.242648  ...   
3        Yonghe     China           CN  36.759507  110.632006  ...   
4     Wang Yang  Thailand           TH  17.074088  104.449040  ...   

  street_number street_suffix      time_zone  company_name   department  \
0         90004      Crossing  Africa/Lusaka          Omba      Support   
1       

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['country_code'] = label_encoder.fit_transform(data['country_code'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['city'] = label_encoder.fit_transform(data['city'])


AttributeError: 'NoneType' object has no attribute 'split'