In [1]:
import pandas as pd
from sklearn.cluster import KMeans
from sqlalchemy import create_engine

class UserSatisfactionAnalysis:
    def __init__(self, db_params):
        self.db_params = db_params
        self.mydata = self.load_data_from_database()

    def load_data_from_database(self):
        # Create a SQLAlchemy engine
        engine = create_engine(f'postgresql+psycopg2://{self.db_params["user"]}:{self.db_params["password"]}@{self.db_params["host"]}:{self.db_params["port"]}/{self.db_params["dbname"]}')

        # SQL query to retrieve data
        sql_query = "SELECT * FROM xdr_data;"

        # Read data from PostgreSQL into a DataFrame
        mydata = pd.read_sql_query(sql_query, engine)
        return mydata

    def perform_user_satisfaction_analysis(self):
        # Based on the engagement analysis + the experience analysis conducted above,
        # Assign engagement and experience scores to each user
        engagement_score, experience_score = self.calculate_scores()

        # Consider the average of both engagement and experience scores as the satisfaction score
        satisfaction_score = (engagement_score + experience_score) / 2

        # Add satisfaction score to the DataFrame
        self.mydata['SatisfactionScore'] = satisfaction_score

        # Report the top 10 satisfied customers
        top_satisfied_customers = self.mydata.nlargest(10, 'SatisfactionScore')

        # Run k-means (k=2) on the engagement and experience scores
        kmeans_clusters = self.run_kmeans()

        # Aggregate the average satisfaction and experience scores per cluster
        cluster_aggregates = self.aggregate_scores_by_cluster(kmeans_clusters)

        return top_satisfied_customers, cluster_aggregates

    def calculate_scores(self):
        # Your logic to calculate engagement and experience scores
        # Example: Calculate average scores
        engagement_score = self.mydata['EngagementMetric'].mean()
        experience_score = self.mydata['ExperienceMetric'].mean()

        return engagement_score, experience_score

    def run_kmeans(self):
        # Your logic to run k-means (k=2) on engagement and experience scores
        # Example:
        kmeans_data = self.mydata[['EngagementMetric', 'ExperienceMetric']]
        kmeans_model = KMeans(n_clusters=2, random_state=42)
        kmeans_clusters = kmeans_model.fit_predict(kmeans_data)

        return kmeans_clusters

    def aggregate_scores_by_cluster(self, kmeans_clusters):
        # Your logic to aggregate average satisfaction and experience scores per cluster
        # Example:
        cluster_data = self.mydata.copy()
        cluster_data['Cluster'] = kmeans_clusters
        cluster_aggregates = cluster_data.groupby('Cluster').agg({
            'SatisfactionScore': 'mean',
            'ExperienceMetric': 'mean'
        }).reset_index()

        return cluster_aggregates

# Database connection parameters
db_params = {
    'dbname': 'week1',
    'user': 'postgres',
    'password': 'habte',
    'host': 'localhost',
    'port': '5432'
}

# Create an instance of UserSatisfactionAnalysis with the actual database parameters
satisfaction_analysis = UserSatisfactionAnalysis(db_params)

# Execute the analysis
top_satisfied, cluster_aggregates = satisfaction_analysis.perform_user_satisfaction_analysis()

# Display the results
print("Top 10 Satisfied Customers:")
print(top_satisfied)

print("\nCluster Aggregates:")
print(cluster_aggregates)


KeyError: 'EngagementMetric'