# CryptoData Analysis Tool 

Welcome to the ultimate **data analysis playground**! This tool helps you clean, explore, and visualize customer reviews for Amazon and Daraz—all with a simple menu interface. 💡

---

## What Can You Do? 🤔

- **Clean up your data**: Handle missing values like a pro.
- **Crunch the numbers**: Rolling averages, satisfaction scores, and sentiment trends.
- **Analyze it all**: Compare Amazon vs. Daraz in mean, median, and standard deviation.
- **Visualize the vibes**: Bar charts, line trends, and pie charts galore! 🎨
- **Export your results**: Save your hard work to a CSV file for sharing.
- **Summarize reviews by age**: Who loves what, and why?

---

## How It Works 🛠️

1. We’ve already got the data loaded for you (`data.csv`).
2. Just run the notebook, pick an option from the menu, and watch the magic happen!
3. From cleaning to analyzing, this tool is as interactive as it gets. 

---

## Have Fun! 🎉

Use the menu options, explore the data, and let the insights flow. Data analysis has never been this exciting!


## Lets import libraries first 😊

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%matplotlib notebook
import matplotlib
plt.close('all')
matplotlib.use('module://matplotlib_inline.backend_inline')


## This is our class 

In [3]:
class CryptoData:
    def __init__(self,data):
        self.data=data
    #Step2: Handling Missing Values    
    def filling_values(self):
        data.fillna({'Name': 'Unknown'}, inplace=True) #filling the missing name
        mean_Age=int(data['Age'].mean())   #finding the average age
        data.fillna({'Age': mean_Age}, inplace=True) #filling the misingvalues with average age
        data.loc[data['Age']<0,'Age']=mean_Age    #replacing the neagtive age by average age
        data.fillna({'Gender': 'Female'}, inplace=True)  # there are more Female so we filled the kssing value by Female
        #As we can see that there are nore Amazon in this the colom so we will just fill the values with amazon in this colomn 
        data.fillna({'Brand':'Amazon'},inplace=True)
        satisfaction_mean = int(data['Satisfaction Percentage'].mean())  #the satisfaction level average 
        data.fillna({'Satisfaction Percentage':satisfaction_mean},inplace =True) # fillint he value
        data.loc[data['Satisfaction Percentage']>100 ,'Satisfaction Percentage']=satisfaction_mean
        data.loc[data['Satisfaction Percentage']<0 ,'Satisfaction Percentage']=satisfaction_mean        
        #most frequent city is Quetta so we will fill the city Nan values with Quetta 
        data.fillna({'City':'Quetta'},inplace=True)
        #This time i tried a different approach i calculated the reviews of both brands and filled the values according to
        average_of_daraz=data.loc[data['Brand']=='Daraz','Product Reviews'].mode()[0]
        average_of_Amazon=data.loc[data['Brand']=='Amazon','Product Reviews'].mode()[0]
        data.loc[(data['Brand'] == 'Daraz') & (data['Product Reviews'].isna()), 'Product Reviews'] = average_of_daraz
        data.loc[(data['Brand'] == 'Amazon') & (data['Product Reviews'].isna()), 'Product Reviews'] = average_of_Amazon
        #THIS sums up to the fillijnf the missing values of the whole dataframe
    #Step 3:Data Transformation using numpy
    def calculate_rolling_average(self):
        """Calculate rolling average for a given column."""
        # Map categorical values to numeric values
        window_size=3
        column_name='Product Reviews'
        category_map = {'Good': 3, 'Average': 2, 'Bad': 1}
        self.data[f'{column_name}_numeric'] = self.data[column_name].map(category_map)
        rolling_averages = []   # Simple rolling average calculation
        for i in range(len(self.data)):
            # Determine the range for the current window
            window = self.data[f'{column_name}_numeric'].iloc[max(0, i - window_size + 1):i + 1]
            # Calculate the average for this window and append to rolling_averages
            rolling_averages.append(window.mean())
        self.data[f'{column_name}_rolling_avg'] = rolling_averages  # Add the rolling average as a new column
    def numeric_to_alphabetical(self):
        mapping = {1: 'Bad', 2: 'Average', 3: 'Good'}
        self.data['Product Reviews_rolling_avg']=self.data['Product Reviews_rolling_avg'].astype(int)
        self.data['Product Reviews_rolling_avg'] = self.data['Product Reviews_rolling_avg'].map(mapping)
    #Step 4: anaylsis of data
    #def anaylsis_data(self):
    #    amazon_data = data[data['Brand']== 'Amazon']
   #     daraz_data =data[data['Brand']=='Daraz']
        
    def mean_median_std_Amazon(self):    
        # Print statistics for Amazon data
        amazon_data = self.data[self.data['Brand']== 'Amazon']
        print(f"Amazon Data - Mean: {amazon_data['Product Reviews_numeric'].mean():.2f}")
        print(f"Amazon Data - Median: {amazon_data['Product Reviews_numeric'].median():.2f}")
        print(f"Amazon Data - Standard Deviation: {amazon_data['Product Reviews_numeric'].std():.2f}")
    def mean_median_std_daraz(self):
        # Print statistics for Daraz data
        daraz_data = self.data[self.data['Brand']=='Daraz']        
        print(f"Daraz Data - Mean: {daraz_data['Product Reviews_numeric'].mean():.2f}")
        print(f"Daraz Data - Median: {daraz_data['Product Reviews_numeric'].median():.2f}")
        print(f"Daraz Data - Standard Deviation: {daraz_data['Product Reviews_numeric'].std():.2f}")
        
    def Customer_satisfaction_Amazon(self):
        # Most positive sentiment based on Satisfaction Percentage
        amazon_data = self.data[self.data['Brand']== 'Amazon']
        most_positive_amazon = amazon_data.loc[amazon_data['Satisfaction Percentage'].idxmax()]
        print("Most Positive Sentiment Post:\n", most_positive_amazon)
        # Most negative sentiment based on Satisfaction Percentage
        most_negative_amazon = amazon_data.loc[amazon_data['Satisfaction Percentage'].idxmin()]
        print("Most Negative Sentiment Post:\n", most_negative_amazon)
        
    def Customer_satisfaction_Daraz(self):
        # Most positive sentiment based on Satisfaction Percentage
        daraz_data =self.data[self.data['Brand']=='Daraz']
        most_positive_daraz = daraz_data.loc[daraz_data['Satisfaction Percentage'].idxmax()]
        print("Most Positive Sentiment Post:\n", most_positive_daraz)
        # Most negative sentiment based on Satisfaction Percentage
        most_negative_daraz = daraz_data.loc[daraz_data['Satisfaction Percentage'].idxmin()]
        print("Most Negative Sentiment Post:\n", most_negative_daraz)
        
    def Correlation_Daraz_Amazon(self):
        min_len=min(len(amazon_data),len(daraz_data))
        amazon_data = self.data[self.data['Brand']== 'Amazon']
        daraz_data =self.data[self.data['Brand']=='Daraz']
        daraz_data_for_corr=daraz_data['Satisfaction Percentage'][:min_len]
        amazon_data_for_corr=amazon_data['Satisfaction Percentage'][:min_len]
        print(amazon_data_for_corr.corr(daraz_data_for_corr))
    # Now step:5 visualization 
    def Bar_sentimental_values(self):
        #Step 5 now lets go for visualization 
        sentiment_trends = data.groupby('Product Reviews_rolling_avg').size()
        sentiment_trends.plot(kind='line', color='purple', title='Sentiment Trends')
        plt.ylabel('Number of Reviews') 
        plt.xlabel('Sentiments (1=Bad, 3=Good)')  
        
        plt.show()

    def Line_sentimental_values(self):
        sentiment_trends = data.groupby('Product Reviews_rolling_avg').size()
        sentiment_trends.plot(kind='bar', color='pink', title='Sentiment Trends')
        plt.ylabel('Number of Reviews') 
        plt.xlabel('Sentiments (1=Bad, 3=Good)') 
        plt.show()

    def Pie_sentimental_(self):
        # Map numeric values to sentiment categories
        data['Sentiment Category'] = data['Product Reviews_numeric'].map({1: 'Negative', 2: 'Neutral', 3: 'Positive'})
        sentiment_distribution=data['Sentiment Category'].value_counts()
        sentiment_distribution.plot(kind='pie',title='Sentimental values of data', autopct='%1.1f%%', colors=['#ff9999','#66b3ff','#99ff99'],subplots=True)
        plt.show()
    #Step 6: Save the result 
    def export_CSV(self):
        file_path=r"C:\Users\syedn\Downloads\My_CSV.csv"
        self.data.to_csv(file_path,index=False)

    #Step 7: Conclusion of the review
    def conclusion_of_review_by_Age(self):
        self.data.sort_values(by='Age', ascending=True, inplace=True)

        # Age < 20
        Age_less_20 = self.data[self.data['Age'] < 20]
        Age_less_20_Daraz = Age_less_20[Age_less_20['Brand'] == 'Daraz']
        Age_less_20_Amazon = Age_less_20[Age_less_20['Brand'] == 'Amazon']

        Age_less_20_Daraz_median = Age_less_20_Daraz['Product Reviews_numeric'].median()
        Age_less_20_Amazon_median = Age_less_20_Amazon['Product Reviews_numeric'].median()

        Age_less_20_Daraz_median_mapped = {1: 'Bad', 2: 'Average', 3: 'Good'}.get(Age_less_20_Daraz_median, "Unknown")
        Age_less_20_Amazon_median_mapped = {1: 'Bad', 2: 'Average', 3: 'Good'}.get(Age_less_20_Amazon_median, "Unknown")

        # Age >= 20
        Age_more_20 = self.data[self.data['Age'] >= 20]
        Age_more_20_Daraz = Age_more_20[Age_more_20['Brand'] == 'Daraz']
        Age_more_20_Amazon = Age_more_20[Age_more_20['Brand'] == 'Amazon']
    
        Age_more_20_Daraz_median = Age_more_20_Daraz['Product Reviews_numeric'].median()
        Age_more_20_Amazon_median = Age_more_20_Amazon['Product Reviews_numeric'].median()
    
        Age_more_20_Daraz_median_mapped = {1: 'Bad', 2: 'Average', 3: 'Good'}.get(Age_more_20_Daraz_median, "Unknown")
        Age_more_20_Amazon_median_mapped = {1: 'Bad', 2: 'Average', 3: 'Good'}.get(Age_more_20_Amazon_median, "Unknown")
    
        print(f"People below the age of 20 using Amazon have a review: {Age_less_20_Amazon_median_mapped}")
        print(f"People below the age of 20 using Daraz have a review: {Age_less_20_Daraz_median_mapped}")
        print(f"People above the age of 20 using Amazon have a review: {Age_more_20_Amazon_median_mapped}")
        print(f"People above the age of 20 using Daraz have a review: {Age_more_20_Daraz_median_mapped}")


    #Extra function to use less space
    def data_type_cast(self):
        self.data['Age']=self.data['Age'].astype(int32)
        self.data['Satisfaction Percentage']=self.data['Satisfaction Percentage'].astype(int32)
        self.data[' Product Reviews_numeric']=self.data[' Product Reviews_numeric'].astype(int32)
    def get_transformed_data(self): 
        """Display the first few rows of the dataset.""" 
        print(self.data.head(10))

    # Just to display data    
    def get_transformed_data(self): 
        """Return the transformed data.""" 
        self.data.head()

## This is our menu driven program I hope you like it (❁´◡`❁)

In [None]:
#### print("Welcome to the CryptoData Analysis Tool")

# The dataset  loaded as 'data'
data = pd.read_csv('Product_reviews.csv')

# Instantiate the CryptoData class
crypto_data = CryptoData(data)

while True:
    print("\n--- Main Menu ---")
    print("1. Handle Missing Values")
    print("2. Calculate Rolling Averages")
    print("3. Perform Data Analysis")
    print("4. Generate Visualizations")
    print("5. Export Data to CSV")
    print("6. Show Data Summary by Age")
    print("7. Display Dataset ")
    print("8. Exit")

    choice = input("Enter your choice (1-8): ")

    if choice == '1':
        crypto_data.filling_values()
        print("Missing values handled successfully!")

    elif choice == '2':
        crypto_data.calculate_rolling_average()
        print(f"Rolling averages for column '{'Product Reviews'}' calculated successfully!")

    elif choice == '3':
        print("\n--- Data Analysis Menu ---")
        print("1. Mean, Median, and Standard Deviation for Amazon")
        print("2. Mean, Median, and Standard Deviation for Daraz")
        print("3. Most Positive and Negative Sentiments for Amazon")
        print("4. Most Positive and Negative Sentiments for Daraz")
        print("5. Correlate Satisfaction Trends between Amazon and Daraz")

        analysis_choice = input("Enter your choice (1-5): ")

        if analysis_choice == '1':
            crypto_data.mean_median_std_Amazon()
        elif analysis_choice == '2':
            crypto_data.mean_median_std_daraz()
        elif analysis_choice == '3':
            crypto_data.Customer_satisfaction_Amazon()
        elif analysis_choice == '4':
            crypto_data.Customer_satisfaction_Daraz()
        elif analysis_choice == '5':
            crypto_data.Correlation_Daraz_Amazon()
        else:
            print("Invalid choice. Please try again.")

    elif choice == '4':
        print("\n--- Visualization Menu ---")
        print("1. Bar Chart of Sentiment Trends")
        print("2. Line Chart of Sentiment Trends")
        print("3. Pie Chart of Sentiment Distribution")

        vis_choice = input("Enter your choice (1-3): ")

        if vis_choice == '1':
            crypto_data.Bar_sentimental_values()
        elif vis_choice == '2':
            crypto_data.Line_sentimental_values()
        elif vis_choice == '3':
            crypto_data.Pie_sentimental_()
        else:
            print("Invalid choice. Please try again.")

    elif choice == '5':
        crypto_data.export_CSV()
        print("Data exported to CSV successfully!")

    elif choice == '6':
        crypto_data.conclusion_of_review_by_Age()

    elif choice == '7':
        print("\n--- Dataset Overview ---")
        print("1. Display first few rows of the dataset")
        print("2. Display dataset information")

        overview_choice = input("Enter your choice (1-2): ")

        if overview_choice == '1':
            crypto_data.get_transformed_data()
        elif overview_choice == '2':
            print(data.info())
        else:
            print("Invalid choice. Please try again.")

    elif choice == '8':
        print("Exiting the program. Goodbye!")
        break

    else:
        print("Invalid choice. Please try again.")



--- Main Menu ---
1. Handle Missing Values
2. Calculate Rolling Averages
3. Perform Data Analysis
4. Generate Visualizations
5. Export Data to CSV
6. Show Data Summary by Age
7. Display Dataset 
8. Exit


In [None]:
data.info()

In [None]:
data.head(25)