<a href="https://colab.research.google.com/github/Advell/Herbarium-Plant-Classification/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**EXPLORATORY DATA ANALYSIS**

In [1]:
import pandas as pd

pd.set_option('display.max_columns', None)  # Show all rows
pd.set_option('display.max_rows', 10)  # Show all rows

# Read the CSV file
excel_file_path = '/content/medallia_dataset.xlsx'
df = pd.read_excel(excel_file_path)

FileNotFoundError: [Errno 2] No such file or directory: '/content/medallia_dataset.xlsx'

In [None]:
stores = df["Store"].value_counts()
stores

**Create a dataframe with store name and likelihood to recommend score**

In [None]:
store_col = df["Store"]
likelihood_to_rec = df["Likelihood to Recommend of Store"]

new_df = pd.DataFrame({
    'Store name': store_col,
    'Likelihood to recommend': likelihood_to_rec
})

#Drop null values
new_df = new_df.dropna()
new_df

In [None]:
# Group the DataFrame by store name and calculate the average likelihood to recommend for each store.
average_likelihood = new_df.groupby('Store name')['Likelihood to recommend'].mean().to_frame()
# Sort in descending order
average_likelihood = average_likelihood.sort_values(by='Likelihood to recommend', ascending=False)

pd.set_option('max_colwidth', 500)

average_likelihood.head()

**Calculate and add NPS score column**

In [None]:
# Create a new column NPS Category based on Likelihood to recommend scores
new_df['NPS Category'] = pd.cut(new_df['Likelihood to recommend'], bins=[0, 6, 8, 10], labels=['Detractor', 'Passive', 'Promoter'])

In [None]:
new_df

In [None]:
# Group the DataFrame by 'Store name' and 'NPS Category', and calculate the counts
grouped = new_df.groupby(['Store name', 'NPS Category']).size().unstack(fill_value=0)

# Calculate the total count of responses for each store
grouped['Total'] = grouped.sum(axis=1)

In [None]:
grouped

In [None]:
grouped['% Promoters'] = grouped['Promoter'] / grouped['Total'] * 100
grouped['% Passives'] = grouped['Passive'] / grouped['Total'] * 100
grouped['% Detractors'] = grouped['Detractor'] / grouped['Total'] * 100
grouped['NPS'] = grouped['% Promoters'] - grouped['% Detractors']
grouped

In [None]:
nps_df = grouped.sort_values(by='NPS', ascending=False)
likelihood_to_recommend = average_likelihood

In [None]:
nps_df = nps_df.drop(columns=['Detractor'])
nps_df = nps_df.drop(columns=['Passive'])
nps_df = nps_df.drop(columns=['Promoter'])
nps_df

In [None]:
likelihood_to_recommend

In [None]:
final_df = pd.merge(nps_df, likelihood_to_recommend, on='Store name', how='inner')

In [None]:
final_df.head()

In [None]:
# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(final_df['NPS'], final_df['Likelihood to recommend'], color='blue')
plt.title('Likelihood to Recommend vs NPS')
plt.xlabel('NPS (Net Promoter Score)')
plt.ylabel('Likelihood to Recommend')
plt.show()

**Plot the relationship between NPS and Likelihood to recommend**

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
plt.scatter(final_df['NPS'], final_df['Likelihood to recommend'])
plt.xlabel('NPS')
plt.ylabel('Likelihood to recommend')
plt.title('NPS and Likelihood to recommend')
plt.show()

In [None]:
# Line Plot
plt.plot(final_df['NPS'], final_df['Likelihood to recommend'])
plt.xlabel('NPS')
plt.ylabel('Likelihood to recommend')
plt.title('Line Plot: Relationship between NPS and Likelihood to recommend')
plt.show()

In [None]:
greeted = df["Greeted"]
assistance = df["Received Assistance"]
recommend = df["Likelihood to Recommend of Store"]
satisfaction = df["Satisfaction with Service"]

greeting = pd.DataFrame({
    'Greeted': greeted,
    'Received Assistance': assistance,
    'Likelihood to Recommend' : recommend,
    'Satisfaction' : satisfaction
 })

greeting

**Relationship between greeting and receiving assistance**

In [None]:
contingency_table = pd.crosstab(greeting['Greeted'], greeting['Received Assistance'])

# Plot the contingency table as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(contingency_table, annot=True, cmap='coolwarm', fmt='d')
plt.xlabel('Received Assistance')
plt.ylabel('Greeted')
plt.title('Relationship between greeting and receiving assistance')
plt.show()


**Relationship between receiving assistance and likelihood to recommend store**

In [None]:
plt.figure(figsize=(8, 6))
sns.countplot(data=greeting, x='Received Assistance', hue='Likelihood to Recommend')
plt.xlabel('Yes_No_Column')
plt.ylabel('Count')
plt.title('Relationship between Receiving Assistance and Likelihood to Recommend')
plt.grid(True)
plt.show()

**Relationship between receiving assistance and Satisfaction**

In [None]:
plt.figure(figsize=(8, 6))
sns.countplot(data=greeting, x='Received Assistance', hue='Satisfaction')
plt.xlabel('Received Assistance')
plt.ylabel('Count')
plt.title('Relationship between Receiving Assistance and Satisfaction with Service')
plt.grid(True)
plt.show()

In [None]:
further = pd.DataFrame({
    'Recommend': df["Likelihood to Recommend of Store"],
    'Satisfaction With Store Experience': df["Satisfaction with Store Experience"],
    'Checkout': df["Ease of Checkout"],
    'Ease Of Finding What You Were Looking For': df["Ease of Finding What You Were Looking For"],
    'Product Availability': df["Availability of Product"],
    'Store Appearance': df["Appearance of the Store"],
    'Satisfaction With Service': df["Satisfaction with Service"],
    'Helpful': df["Helpful"],
    'Knowledgable': df["Knowledgeable"],
    'Treatment': df["Treated You Like a Valued Customer"],
    'Staff Availability': df["Staff Availability"],
    'Selection': df["Selection"],
    'Value For Price Paid': df["Value for Price Paid"],
    'Quality': df["Quality"],
    'Ease Of Use': df["Ease of Use"]
})

further

In [None]:
# Create a pairplot to visualize the correlations
sns.pairplot(further)
plt.show()

In [None]:
easeOfUse_knowledgable = pd.DataFrame({
    'Ease Of Use': df["Ease of Use"],
    'Knowledgable': df["Knowledgeable"],
})

# Calculate average Ease Of Use for each unique Knowledgable value
avg_ease_of_use = easeOfUse_knowledgable.groupby('Knowledgable')['Ease Of Use'].mean()

# Plotting the average line
plt.plot(avg_ease_of_use.index, avg_ease_of_use.values, marker='o')
plt.xlabel('Knowledgable')
plt.ylabel('Average Ease Of Use')
plt.title('Average Ease Of Use vs Knowledgable')
plt.grid(True)
plt.show()

In [None]:
easeOfUse_quality = pd.DataFrame({
    'Ease Of Use': df["Ease of Use"],
    'Quality': df["Quality"],
})

avg_ease_of_use = easeOfUse_quality.groupby('Quality')['Ease Of Use'].mean()

# Plotting the average line
plt.plot(avg_ease_of_use.index, avg_ease_of_use.values, marker='o')
plt.xlabel('Quality')
plt.ylabel('Average Ease Of Use')
plt.title('Average Ease Of Use vs Quality')
plt.grid(True)
plt.show()

In [None]:
knowledgeable_quality = pd.DataFrame({
    'Knowledgeable': df["Knowledgeable"],
    'Quality': df["Quality"],
})

# Calculate average Y for each unique X value
avg_y = knowledgeable_quality.groupby('Knowledgeable')['Quality'].mean()

# Plotting the average line
plt.plot(avg_y.index, avg_y.values, marker='o')
plt.xlabel('Knowledgeable')
plt.ylabel('Average Quality')
plt.grid(True)
plt.show()

**Loyalty program effectiveness**

In [None]:
loyalty_program = pd.DataFrame({
    'Loyalty Tier': df["Loyalty Tier"],
    'Likelihood to Recommend of Store': df["Likelihood to Recommend of Store"],
})


In [None]:
# Plot countplot
plt.figure(figsize=(10, 6))
sns.countplot(data=loyalty_program, x='Loyalty Tier', hue='Likelihood to Recommend of Store')
plt.xlabel('Loyalty Tier')
plt.ylabel('Count')
plt.title('Count of Likelihood to Recommend of Store across Loyalty Tiers')
plt.legend(title='Likelihood to Recommend of Store')
plt.show()

In [None]:
loyalty_spend = pd.DataFrame({
    'Loyalty Tier': df["Loyalty Tier"],
    'Transaction Amount': df["Transaction Amount"],
})

# Plotting with Seaborn
plt.figure(figsize=(10, 6))
sns.violinplot(data=loyalty_spend, x='Loyalty Tier', y='Transaction Amount')
plt.xlabel('Loyalty Tier')
plt.ylabel('Transaction Amount')
plt.title('Distribution of Transaction Amount across Loyalty Tiers')
plt.show()


In [None]:
loyalty_spendd = pd.DataFrame({
    'Loyalty Tier': df["Loyalty Tier"],
    'Cart Spend Amount': df["Cart Spend Amount"],
})

# Plotting with Seaborn
plt.figure(figsize=(10, 6))
sns.violinplot(data=loyalty_spendd, x='Loyalty Tier', y='Cart Spend Amount')
plt.xlabel('Loyalty Tier')
plt.ylabel('Cart Spend Amount')
plt.title('Distribution of Cart Spend Amount across Loyalty Tiers')
plt.show()
