In [13]:
# Task 4: Satisfaction Analysis

# Step 1: Scoring (Engagement & Experience Scores)
# Compute the engagement score and experience score using Euclidean distance from their respective least engaged and worst experience clusters.

import pandas as pd
import numpy as np
import psycopg2

from sklearn.metrics.pairwise import euclidean_distances

# Load the data

# Connect to psycopg2 database (or create it if it doesn't exist)
from sqlalchemy import create_engine

connection = psycopg2.connect("dbname=Teleco user=postgres password=Leul@123")

# Create a cursor object to interact with the database
cursor = connection.cursor()

# Query the data
query = "SELECT * FROM xdr_data"

# Load data into a pandas DataFrame
df = pd.read_sql_query(query, connection)

# Define least engaged and worst experience points (example)
least_engaged_point = df[['Totaldl', 'Totalul']].min().values
worst_experience_point = df[['Totaldl', 'Totalul']].max().values

# least_engaged_point = df[['Engagement_Metric1', 'Engagement_Metric2']].iloc[0].values
# worst_experience_point = df[['Experience_Metric1', 'Experience_Metric2']].iloc[0].values

# Handle NaN values
df = df.fillna(0)  # Replace all NaN with 0

# Compute Engagement and Experience Scores
df['Engagement_Score'] = euclidean_distances(
    df[['Totaldl', 'Totalul']], [least_engaged_point]
).flatten()

df['Experience_Score'] = euclidean_distances(
    df[['Totaldl', 'Totalul']], [worst_experience_point]
).flatten()

# Display results

print(df[['Bearer Id', 'Engagement_Score', 'Experience_Score']])

# Step 2: Satisfaction Calculation
# Compute the average of Engagement_Score and Experience_Score as the Satisfaction_Score.

# Compute Satisfaction Score
df['Satisfaction_Score'] = df[['Engagement_Score', 'Experience_Score']].mean(axis=1)

# Top 10 satisfied customers
top_satisfied = df.nlargest(10, 'Satisfaction_Score')

# Display satisfaction scores
print("Top 10 Satisfied Customers:\n", top_satisfied[['Bearer Id', 'Satisfaction_Score']])

""" Step 3: Model Building
Train a regression model to predict Satisfaction_Score """

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Prepare features and target
X = df[['Engagement_Score', 'Experience_Score']]
y = df['Satisfaction_Score']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions and calculate error
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)

# Display model results
print("Model Coefficients:", model.coef_)
print("Mean Squared Error:", mse)

# Step 4: Clustering & Insights
# 4.1 Clustering Users Based on Satisfaction
# Perform k-means clustering (k=2) to group users into satisfaction clusters.

from sklearn.cluster import KMeans

# Perform k-means clustering
kmeans = KMeans(n_clusters=2, random_state=42)
df['Satisfaction_Cluster'] = kmeans.fit_predict(df[['Engagement_Score', 'Experience_Score']])

# Display cluster assignments
print(df[['Bearer Id', 'Satisfaction_Cluster']])

# Cluster Insights
cluster_agg = df.groupby('Satisfaction_Cluster')['Satisfaction_Score'].agg(['mean', 'count'])
print("Cluster Insights:\n", cluster_agg)

# Step 5: Export Final Table to Local Database
# Use SQLAlchemy to export the results to a local PostgreSQL database.
# 5.1 Setup Database Connection

from sqlalchemy import create_engine

# Database connection (replace with your credentials)
engine = create_engine('postgresql://postgres:Leul%40123@localhost:5432/Teleco')

# Export DataFrame to database
df[['Bearer Id', 'Engagement_Score', 'Experience_Score', 'Satisfaction_Score', 'Satisfaction_Cluster']].to_sql(
    'user_satisfaction', engine, if_exists='replace', index=False)
# import pickle
# df.to_csv("../notebooks/data/user_satisfaction.csv", index=False)
# print("Data exported successfully!")

# 5.2 Query the Exported Data

# Query data to verify export
query_result = pd.read_sql_query('SELECT * FROM user_satisfaction LIMIT 10', engine)
print(query_result)


  df = pd.read_sql_query(query, connection)


           Bearer Id  Engagement_Score  Experience_Score
0       1.268214e+19      6.783131e+08      2.241109e+08
1       7.277826e+18      1.338831e+08      7.680415e+08
2       7.277826e+18      3.314872e+08      5.695954e+08
3       1.304243e+19      7.991450e+08      1.054812e+08
4       1.304243e+19      3.709881e+08      5.282207e+08
...              ...               ...               ...
149996  1.304243e+19      2.854246e+08      6.142481e+08
149997  1.311448e+19      7.520958e+08      1.471783e+08
149998  7.277826e+18      2.030368e+08      6.967513e+08
149999  6.917538e+18      6.076440e+08      2.917487e+08
150000  6.917538e+18      3.967272e+08      5.028601e+08

[150001 rows x 3 columns]
Top 10 Satisfied Customers:
            Bearer Id  Satisfaction_Score
19787   7.277826e+18        4.779738e+08
31830   1.311448e+19        4.760258e+08
140916  1.304243e+19        4.759500e+08
65740   1.304243e+19        4.757592e+08
125167  1.311448e+19        4.754379e+08
36532   7.3498