### Read file

In [None]:
import pandas as pd

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt



df = pd.read_csv('../input/customer-feedback-and-satisfaction/customer_feedback_satisfaction.csv')

df.head()


### Dataset shape & info

In [None]:
df.shape

In [None]:
df.info()

## Data analysis

In [None]:
for column in df.columns:

  print(f"{column}: {df[column].nunique()}")

In [None]:
 print('countries :')

 print(df['Country'].unique())



print('FeedbackScores :')

print(df['FeedbackScore'].unique())



print('LoyaltyLevels :')

print(df['LoyaltyLevel'].unique())

### Distribution Count based on Country:


In [None]:
Country = df['Country'].value_counts()

print("\nDistribution Count based on Country:")
print(Country)

plt.figure(figsize=(6, 3))

plt.subplot(1, 2, 1)
sns.countplot(x='Country', data=df, hue='Country', palette='viridis', legend=False)

plt.title('Distribution of Medical Conditions - Bar Chart')

plt.xlabel('Country')
plt.ylabel('Count')

plt.xticks(rotation=45, ha='right')


plt.subplot(1, 2, 2)

plt.pie(Country, labels=Country.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('viridis'))

plt.title('Distribution of Country - Pie Chart')


plt.tight_layout()
plt.show()

### Distribution Count based on FeedbackScore:


In [None]:
FeedbackScore = df['FeedbackScore'].value_counts()

print("\nDistribution Count based on FeedbackScore:")

print(FeedbackScore)

plt.figure(figsize=(6, 3))



plt.subplot(1, 2, 1)

sns.countplot(x='FeedbackScore', data=df, hue='FeedbackScore', palette='viridis', legend=False)

plt.title('Distribution of FeedbackScore - Bar Chart')

plt.xlabel('FeedbackScore')

plt.ylabel('Count')

plt.xticks(rotation=45, ha='right')


plt.subplot(1, 2, 2)

plt.pie(FeedbackScore, labels=FeedbackScore.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('viridis'))

plt.title('Distribution of FeedbackScore - Pie Chart')


plt.tight_layout()

plt.show()


### Distribution Count based on LoyaltyLevel:


In [None]:
LoyaltyLevel = df['LoyaltyLevel'].value_counts()



print("\nDistribution Count based on LoyaltyLevel:")

print(LoyaltyLevel)



plt.figure(figsize=(6, 3))



plt.subplot(1, 2, 1)

sns.countplot(x='LoyaltyLevel', data=df, hue='LoyaltyLevel', palette='viridis', legend=False)

plt.title('Distribution of LoyaltyLevel - Bar Chart')

plt.xlabel('LoyaltyLevel')

plt.ylabel('Count')

plt.xticks(rotation=45, ha='right')



plt.subplot(1, 2, 2)

plt.pie(LoyaltyLevel, labels=LoyaltyLevel.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('viridis'))

plt.title('Distribution of LoyaltyLevel - Pie Chart')



plt.tight_layout()

plt.show()


### Distribution Count based on Gender:


In [None]:
Gender = df['Gender'].value_counts()


print("\nDistribution Count based on Gender:")

print(Gender)

plt.figure(figsize=(6, 3))

plt.subplot(1, 2, 1)
sns.countplot(x='Gender', data=df, hue='Gender', palette='viridis', legend=False)

plt.title('Distribution of Medical Conditions - Bar Chart')

plt.xlabel('Gender')
plt.ylabel('Count')

plt.xticks(rotation=45, ha='right')

plt.subplot(1, 2, 2)

plt.pie(Gender, labels=Gender.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('viridis'))

plt.title('Distribution of Gender - Pie Chart')


plt.tight_layout()
plt.show()

### Distribution of Age in 10-Year Intervals

In [None]:
df['AgeGroup'] = pd.cut(df['Age'], bins=range(0, 90, 10), right=False)



plt.figure(figsize=(5, 3))

sns.countplot(x='AgeGroup', data=df, color='skyblue')

plt.title('Distribution of Age in 10-Year Intervals')

plt.xlabel('Age Groups (10-Year Intervals)')

plt.ylabel('Count')

plt.xticks(rotation=45)

plt.show()


# testing **Machine Learning** algorithms to predict SatisfactionScore

In [None]:
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder, StandardScaler

from sklearn.linear_model import LinearRegression

from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.metrics import r2_score

import pandas as pd



# Label Encoding for categorical features

le_country = LabelEncoder()

le_gender = LabelEncoder()

le_loyalty = LabelEncoder()

le_feedback = LabelEncoder()



# Apply Label Encoding

df['Country'] = le_country.fit_transform(df['Country'])

df['Gender'] = le_gender.fit_transform(df['Gender'])

df['LoyaltyLevel'] = le_loyalty.fit_transform(df['LoyaltyLevel'])

df['FeedbackScore'] = le_feedback.fit_transform(df['FeedbackScore'])





# Defining the features (X) and the target (y)

X = df[['Country', 'Gender', 'Income', 'ProductQuality', 'ServiceQuality', 'PurchaseFrequency', 'FeedbackScore', 'LoyaltyLevel']]

y = df['SatisfactionScore']





# Splitting the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Initializing models

models = {

    "Linear Regression": LinearRegression(),

    "Decision Tree": DecisionTreeRegressor(random_state=42),

    "Random Forest": RandomForestRegressor(random_state=42),

    "Gradient Boosting": GradientBoostingRegressor(random_state=42)

}



# Dictionary to store R2 scores

r2_scores = {}



# Training and evaluating each model

for model_name, model in models.items():

    # Fit the model

    model.fit(X_train, y_train)

    # Predict on test set

    y_pred = model.predict(X_test)

    # Calculate R2 score

    r2_scores[model_name] = r2_score(y_test, y_pred)

    print(f"{model_name} R2 Score: {r2_scores[model_name]:.4f}")





# Finding the best model based on R2 score

best_model = max(r2_scores, key=r2_scores.get)

print(f"\nBest Model: {best_model} with R2 Score: {r2_scores[best_model]:.4f}")


###