In [None]:
!pip install ibm-watson-machine-learning
!pip install wordcloud
print("All required libraries are installed.")

In [None]:
import pandas as pd
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from tqdm.auto import tqdm
from google.colab import userdata

my_api_key = userdata.get('IBM_API_KEY')
my_project_id = userdata.get('IBM_PROJECT_ID')


# --- 1. SETUP Credentials ---
credentials = {
    "url": "https://jp-tok.ml.cloud.ibm.com",
    "apikey": "my_api_key"
}
project_id = "my_project_id"

# --- 2. DATA CLEANING & PREPARATION ---
print("--- Starting Data Cleaning and Preparation ---")
try:
    df = pd.read_csv('amazon.csv')

    df['discount_percentage_numeric'] = pd.to_numeric(df['discount_percentage'].str.replace('%', ''), errors='coerce')
    df.dropna(subset=['discount_percentage_numeric', 'review_content', 'rating'], inplace=True)

    high_discount_df = df[df['discount_percentage_numeric'] > 50]
    low_discount_df = df[df['discount_percentage_numeric'] < 10]

    sample_size = min(len(high_discount_df), len(low_discount_df))
    print(f"A balanced sample of {sample_size} will be used for each group.")

    high_discount_sample = high_discount_df.sample(n=sample_size, random_state=42)
    low_discount_sample = low_discount_df.sample(n=sample_size, random_state=42)
    print("Data preparation complete.")
except FileNotFoundError:
    print("Error: 'amazon.csv' not found. Please upload the file.")
    df = pd.DataFrame()

# --- 3. AI ANALYSIS WITH IBM GRANITE ---
if not df.empty and sample_size > 0:
    print("\n--- Starting AI Analysis with IBM Granite ---")

    model_id = "ibm/granite-13b-instruct-v2"
    params = {
        GenParams.DECODING_METHOD: "greedy",
        GenParams.MIN_NEW_TOKENS: 1,
        GenParams.MAX_NEW_TOKENS: 20
    }

    try:
        granite_model = Model(model_id=model_id, params=params, credentials=credentials, project_id=project_id)
        print(f"Successfully initialized model: {model_id}")
    except Exception as e:
        print(f"Error initializing IBM Model: {e}")
        granite_model = None

    def analyze_quality_complaints(df_sample, sample_name):
        results = []
        print(f"\nAnalyzing reviews for the '{sample_name}' group...")
        for review_text in tqdm(df_sample['review_content']):
            prompt = f"""Analyze the following customer review. If it contains a complaint about product quality
            (e.g., 'broken', 'defective', 'poor material', 'damaged', 'different from description', 'stopped working', 'poor quality', 'bad packaging', 'not durable', 'fake'),
            summarize the specific quality complaint in one short phrase. If there is no quality complaint, respond with 'No Quality Complaint'.

Review: "{str(review_text)[:1000]}"

Complaint Summary:"""
            try:
                generated_response = granite_model.generate_text(prompt=prompt)
                results.append(generated_response)
            except Exception as e:
                results.append("Analysis Error")
        return results

    if granite_model:
        high_discount_results = analyze_quality_complaints(high_discount_sample, "High Discount")
        low_discount_results = analyze_quality_complaints(low_discount_sample, "Low Discount")

        # --- 4. SAVE ANALYSIS RESULTS TO FILE ---
        print("\nSaving analysis results to a new CSV file...")
        results_df = pd.DataFrame.from_dict({
            'high_discount_complaints_summary': high_discount_results,
            'low_discount_complaints_summary': low_discount_results
        }, orient='index').transpose()
        results_df.to_csv('analysis_results.csv', index=False)
        print("Analysis results successfully saved to 'analysis_results.csv'")

In [None]:
import matplotlib.pyplot as plt
from wordcloud import WordCloud

if 'high_discount_results' in locals() and 'low_discount_results' in locals():
    # --- 1. Calculate Final Insight ---
    sample_size = min(len(high_discount_results), len(low_discount_results))
    high_complaints_count = sum(1 for result in high_discount_results if 'no quality complaint' not in str(result).lower())
    low_complaints_count = sum(1 for result in low_discount_results if 'no quality complaint' not in str(result).lower())

    high_complaint_percentage = (high_complaints_count / sample_size) * 100
    low_complaint_percentage = (low_complaints_count / sample_size) * 100

    print("\n--- FINAL INSIGHT ---")
    print(f"High Discount Group: {high_complaints_count} out of {sample_size} reviews had quality complaints ({high_complaint_percentage:.2f}%).")
    print(f"Low Discount Group: {low_complaints_count} out of {sample_size} reviews had quality complaints ({low_complaint_percentage:.2f}%).")

    # --- 2. Create Bar Chart ---
    labels = ['Low Discount (<10%)', 'High Discount (>50%)']
    values = [low_complaint_percentage, high_complaint_percentage]

    plt.figure(figsize=(8, 6))
    bars = plt.bar(labels, values, color=['lightcoral', 'skyblue'])
    plt.ylabel('Percentage of Complaints (%)')
    plt.title('Comparison of Quality Complaint Percentage by Discount Level')
    plt.ylim(0, max(values) + 10) # Dynamic y-axis limit

    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2.0, yval, f'{yval:.2f}%', va='bottom', ha='center')

    print("\nDisplaying Bar Chart...")
    plt.show()

    # --- 3. Create Word Clouds ---
    def create_wordcloud(results, title):
        complaint_text = ' '.join([str(res) for res in results if 'no quality complaint' not in str(res).lower()])
        if complaint_text.strip():
            wordcloud = WordCloud(width=800, height=400, background_color='white', collocations=False).generate(complaint_text)
            plt.figure(figsize=(10, 5))
            plt.imshow(wordcloud, interpolation='bilinear')
            plt.axis('off')
            plt.title(title, fontsize=16)
            plt.show()
        else:
            print(f"\nNo complaints to visualize for '{title}'")

    print("\nDisplaying Word Clouds...")
    create_wordcloud(high_discount_results, 'Common Keywords in High-Discount Complaints')
    create_wordcloud(low_discount_results, 'Common Keywords in Low-Discount Complaints')

else:
    print("\nAnalysis variables not found. Please run the analysis cell (Cell 2) first.")