## Experience Analytics

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Set up visualization style
sns.set(style="whitegrid")


In [None]:
#load the dataset
df = pd.read_csv(r"C:\Users\hp\Downloads\Data-20241219T180034Z-001\Data\Copy of Week2_challenge_data_source(CSV).csv")

df.head()

In [7]:

# Aggregate per Customer

# Replace missing values with mean (for numeric) or mode (for categorical)
df['Total UL (Bytes)'] = df['Total UL (Bytes)'].fillna(df['Total UL (Bytes)'].mean())
df['Total DL (Bytes)'] = df['Total DL (Bytes)'].fillna(df['Total DL (Bytes)'].mean())
df['IMEI'] = df['IMEI'].fillna(df['IMEI'].mode()[0])

# Aggregate metrics by IMSI (assuming it represents the customer ID)
aggregated_df = df.groupby('MSISDN/Number').agg({
    'Total UL (Bytes)': 'mean',
    'Total DL (Bytes)': 'mean',
    'IMEI': lambda x: x.mode()[0]  # Most frequently used device
}).reset_index()

# Rename columns
aggregated_df.rename(columns={
    'Total UL (Bytes)': 'avg_uplink_bytes',
    'Total DL (Bytes)': 'avg_downlink_bytes',
    'IMEI': 'most_used_device'
}, inplace=True)


print(aggregated_df.head())

   MSISDN/Number  avg_uplink_bytes  avg_downlink_bytes  most_used_device
0   3.360100e+10        36053108.0         842637466.0      8.627420e+13
1   3.360100e+10        36104459.0         120755184.0      3.553261e+13
2   3.360100e+10        39306820.0         556659663.0      3.561271e+13
3   3.360101e+10        20327526.0         401993172.0      3.520491e+13
4   3.360101e+10        47140263.5         681565208.5      3.530681e+13


In [None]:
# Top 10 and Bottom 10 for Total UL and DL Bytes
top_10_ul = df['Total UL (Bytes)'].nlargest(10)
bottom_10_ul = df['Total UL (Bytes)'].nsmallest(10)
most_frequent_ul = df['Total UL (Bytes)'].value_counts().head(10)

top_10_dl = df['Total DL (Bytes)'].nlargest(10)
bottom_10_dl = df['Total DL (Bytes)'].nsmallest(10)
most_frequent_dl = df['Total DL (Bytes)'].value_counts().head(10)

# Create dataframes for display
ul_table = pd.DataFrame({
    'Top 10 Uplink Bytes': top_10_ul.values,
    'Bottom 10 Uplink Bytes': bottom_10_ul.values,
    'Most Frequent Uplink Bytes': most_frequent_ul.index
})

dl_table = pd.DataFrame({
    'Top 10 Downlink Bytes': top_10_dl.values,
    'Bottom 10 Downlink Bytes': bottom_10_dl.values,
    'Most Frequent Downlink Bytes': most_frequent_dl.index
})

# Display results
print("\nUplink Bytes Table:")
print(ul_table)

print("\nDownlink Bytes Table:")
print(dl_table)


In [None]:
# Average bytes per device type (IMEI)
uplink_by_device = df.groupby('IMEI')['Total UL (Bytes)'].mean().sort_values(ascending=False)
downlink_by_device = df.groupby('IMEI')['Total DL (Bytes)'].mean().sort_values(ascending=False)

# Plotting
plt.figure(figsize=(14, 6))

# Uplink
plt.subplot(1, 2, 1)
uplink_by_device.plot(kind='bar', color='skyblue')
plt.title('Average Uplink Bytes per Device Type')
plt.ylabel('Uplink Bytes')
plt.xlabel('Device Type')

# Downlink
plt.subplot(1, 2, 2)
downlink_by_device.plot(kind='bar', color='salmon')
plt.title('Average Downlink Bytes per Device Type')
plt.ylabel('Downlink Bytes')
plt.xlabel('Device Type')

plt.tight_layout()
plt.show()

## K-Means Clustering

In [None]:

# Select features for clustering
features = df[['Total UL (Bytes)', 'Total DL (Bytes)']]

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Apply K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['experience_cluster'] = kmeans.fit_predict(features_scaled)

# Analyze cluster centers
cluster_centers = scaler.inverse_transform(kmeans.cluster_centers_)
print("\nCluster Centers (Uplink, Downlink):\n", cluster_centers)

# Plot clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='Total DL (Bytes)', y='Total UL (Bytes)', hue='experience_cluster', palette='viridis')
plt.title('Experience Clusters')
plt.xlabel('Downlink Bytes')
plt.ylabel('Uplink Bytes')
plt.legend(title='Cluster')
plt.show()