# Part 3: Text Summarization
## AI-Powered Customer Feedback Summarization


In [2]:
import pandas as pd
import numpy as np
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.tokenize import sent_tokenize
import warnings
warnings.filterwarnings('ignore')


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
nltk.download('punkt', quiet=True)


NLTK data downloaded


## Load Dataset


In [4]:
df = pd.read_csv('cleaned_customer_feedback.csv')
print(f"Dataset shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")

df_with_text = df[df['Combined_Text'].notna() & (df['Combined_Text'].str.strip() != '')]
print(f"\nRecords with text: {len(df_with_text)}")
df_with_text.head()


Dataset shape: (259, 18)

Columns: ['Review Title', 'Customer name', 'Rating', 'Rating_Numeric', 'Date', 'Date_Parsed', 'Category', 'Comments', 'Useful', 'Helpful_Count', 'Review_Title_Clean', 'Comments_Clean', 'Combined_Text', 'Review_Title_Processed', 'Comments_Processed', 'Combined_Text_Processed', 'Text_Length', 'Word_Count']

Records with text: 259


Unnamed: 0,Review Title,Customer name,Rating,Rating_Numeric,Date,Date_Parsed,Category,Comments,Useful,Helpful_Count,Review_Title_Clean,Comments_Clean,Combined_Text,Review_Title_Processed,Comments_Processed,Combined_Text_Processed,Text_Length,Word_Count
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0 out of 5 stars,4.0,on 1 October 2018,2018-10-01,Display,Another Midrange killer Smartphone by Xiaomi\n...,,0,Another Midrange killer Smartphone by Xiaomi,Another Midrange killer Smartphone by Xiaomi M...,Another Midrange killer Smartphone by Xiaomi A...,another midrange killer smartphone xiaomi,another midrange killer smartphone xiaomi majo...,another midrange killer smartphone xiaomi anot...,4579,860
1,vry small size mobile,Raza ji,3.0 out of 5 stars,3.0,on 15 September 2018,2018-09-15,Others,All ok but vry small size mobile,7 people found this helpful,7,vry small size mobile,All ok but vry small size mobile,vry small size mobile All ok but vry small siz...,vry small size mobile,vry small size mobile,vry small size mobile vry small size mobile,54,11
2,Full display not working in all application.,Vaibhav Patel,3.0 out of 5 stars,3.0,on 18 September 2018,2018-09-18,Others,Quite good,7 people found this helpful,7,Full display not working in all application.,Quite good,Full display not working in all application. Q...,full display working application,quite good,full display working application quite good,55,9
3,Value for Money,Amazon Customer,5.0 out of 5 stars,5.0,on 28 September 2018,2018-09-28,Display,Redmi has always have been the the king of bud...,2 people found this helpful,2,Value for Money,Redmi has always have been the the king of bud...,Value for Money Redmi has always have been the...,value money,redmi always king budget segment.and yet anoth...,value money redmi always king budget segment.a...,857,154
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0 out of 5 stars,2.0,on 18 September 2018,2018-09-18,Others,worst product from MI. I am a hardcore fan of ...,6 people found this helpful,6,Not worth for the money,worst product from MI. I am a hardcore fan of ...,Not worth for the money worst product from MI....,worth money,worst product hardcore fan one really disappoi...,worth money worst product hardcore fan one rea...,108,21


## Method 1: Transformer-Based Summarization (T5)


In [5]:
print("Loading T5 summarization model...")
print("This may take a few minutes on first run (downloading model)")

summarizer = pipeline("summarization", model="t5-small")
print("T5 model loaded successfully")


Loading T5 summarization model...
This may take a few minutes on first run (downloading model)


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


T5 model loaded successfully


In [6]:
def summarize_with_t5(text, max_length=100, min_length=30):
    if not text or len(text.strip()) < 50:
        return text
    
    try:
        text = text[:1024]
        summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return f"Error: {str(e)}"


## Method 2: Extractive Summarization (TF-IDF + Cosine Similarity)


In [7]:
def extractive_summarize(text, num_sentences=3):
    if not text or len(text.strip()) < 50:
        return text
    
    try:
        sentences = sent_tokenize(text)
        
        if len(sentences) <= num_sentences:
            return text
        
        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform(sentences)
        
        similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
        
        sentence_scores = similarity_matrix.sum(axis=1)
        
        ranked_sentences = [sentences[i] for i in sentence_scores.argsort()[-num_sentences:][::-1]]
        
        summary = ' '.join(ranked_sentences)
        return summary
    
    except Exception as e:
        return text


## Generate Summaries for Sample Reviews


In [8]:
long_reviews = df_with_text[df_with_text['Word_Count'] > 50].head(5)

print("=" * 80)
print("SAMPLE SUMMARIZATIONS")
print("=" * 80)

for idx, row in long_reviews.iterrows():
    text = row['Combined_Text']
    
    print(f"\n{'='*80}")
    print(f"Review #{idx + 1}")
    print(f"{'='*80}")
    print(f"\nOriginal Text ({row['Word_Count']} words):")
    print("-" * 80)
    print(text[:500] + "..." if len(text) > 500 else text)
    
    print(f"\n{'T5 SHORT SUMMARY (30-50 words):'}")
    print("-" * 80)
    t5_short = summarize_with_t5(text, max_length=50, min_length=30)
    print(t5_short)
    
    print(f"\n{'T5 DETAILED SUMMARY (60-100 words):'}")
    print("-" * 80)
    t5_detailed = summarize_with_t5(text, max_length=100, min_length=60)
    print(t5_detailed)
    
    print(f"\n{'EXTRACTIVE SUMMARY (Top 3 sentences):'}")
    print("-" * 80)
    extractive = extractive_summarize(text, num_sentences=3)
    print(extractive)
    
    print("\n")


SAMPLE SUMMARIZATIONS

Review #1

Original Text (860 words):
--------------------------------------------------------------------------------
Another Midrange killer Smartphone by Xiaomi Another Midrange killer Smartphone by Xiaomi Major Highlights The Redmi 6 Pro sports a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC The phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well in Redmi 6 Series you will get 3 Different Smartphones with different specs, and from all of them, Redmi 6 pro is most powerful and advance...

T5 SHORT SUMMARY (30-50 words):
--------------------------------------------------------------------------------


Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Redmi 6 Pro features a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC . the phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well .

T5 DETAILED SUMMARY (60-100 words):
--------------------------------------------------------------------------------


Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Redmi 6 Pro features a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC . the phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well .

EXTRACTIVE SUMMARY (Top 3 sentences):
--------------------------------------------------------------------------------
Battery in Redmi 6 Pro Battery life is one of the strong suits of the Redmi 6 Pro. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well in Redmi 6 Series you will get 3 Different Smartphones with different specs, and from all of them, Redmi 6 pro is most powerful and advanced as said by Redmi India in their launch event . The Redmi 6 Pro doesn t support fast charging, but the bundled 10W adapter gave us roughly a 55 percent charge in an hour, and it took us roughly 2 hours and 35 minutes to charge it completely from zero Well before buying this phone 

Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


the Redmi 6 pro is a beautifully designed,strong and durable . the camera is awesome just like the Note 5 pro,Display is crisp and loud .

T5 DETAILED SUMMARY (60-100 words):
--------------------------------------------------------------------------------


Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


the Redmi 6 pro is a beautifully designed,strong and durable . the camera is awesome just like the Note 5 pro,Display is crisp and loud .the battery is 4000mah which is crazily optimized.In heavy useage it will give 2 day and a half in normal useage .

EXTRACTIVE SUMMARY (Top 3 sentences):
--------------------------------------------------------------------------------
Value for Money Redmi has always have been the the king of budget segment.And yet another they gave us the Redmi 6 pro.It s a beautifully designed,strong and durable.And the camera is awesome just like the Note 5 pro,Display is crisp and the notch is in good shape, means it does take whole screen.Sound quality is clear and loud.But the headset experience is awesome.The Battery is 4000mah which is crazily optimized.In heavy useage it will give 2 day and a half in normal useage it give 3 days.And best thing is the inbuild Mi sercurity which loaded with features.You don t need third security apps.And the phone do charge lit

Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Redmi 6 pro 3 32 GB review after using for one week . over prised by around Rs.1000 Low light photos are bad . only one SIM support VOLTE .

T5 DETAILED SUMMARY (60-100 words):
--------------------------------------------------------------------------------


Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Redmi 6 pro 3 32 GB review after using for one week . over prised by at least around Rs.1000 Low light photos are bad . only one SIM support VOLTE . good sound, good display, smooth touch, excellent battery life management, compact, nice photos under good light .

EXTRACTIVE SUMMARY (Top 3 sentences):
--------------------------------------------------------------------------------
I have already Redmi 4 A Rs.6000 only using for more than 14 months working superbly. I have good opinion about Redmi mobiles. Redmi 6 pro 3 32 GB review after using for one week.



Review #14

Original Text (62 words):
--------------------------------------------------------------------------------
dont buy these product. these copy of the product Please don t buy these product.. I have 3 mobile of MI but i did not face these type of issue. mobile data getting on and off automatically same things are happening with wifi, hot spot and bluetooth. The handset behaving like cheap quality i think its duplicate o

Your max_length is set to 100, but your input_length is only 77. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=38)
Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


i have 3 mobile of MI but i did not face these type of issue . mobile data getting on and off automatically same things are happening with wifi, hot spot and bluetooth .

T5 DETAILED SUMMARY (60-100 words):
--------------------------------------------------------------------------------


Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


i have 3 mobile of MI but i did not face these type of issue . mobile data getting on and off automatically same things are happening with wifi, hot spot and bluetooth . the handset behaving like cheap quality i think its duplicate or copy of original handset .

EXTRACTIVE SUMMARY (Top 3 sentences):
--------------------------------------------------------------------------------
these copy of the product Please don t buy these product.. dont buy these product. I have 3 mobile of MI but i did not face these type of issue.



Review #17

Original Text (74 words):
--------------------------------------------------------------------------------
Design can be better The product is great with quite good features. I am great fan of xiaomi and I have been always using Xiaomi phones. But I am disappointed with mobile designing of Xiaomi as most of all the lower range and mid range phones have same design. Xiaomi always being giving with so many features but being realme entering in market Xiaom

Your max_length is set to 100, but your input_length is only 87. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)
Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


design can be better The product is great with quite good features . I am disappointed with mobile designing of Xiaomi as most of all the lower range and mid range phones have same design .

T5 DETAILED SUMMARY (60-100 words):
--------------------------------------------------------------------------------
design can be better The product is great with quite good features . I am disappointed with mobile designing of Xiaomi as most of all the lower range and mid range phones have same design . Xiaomi has to be more creative to attract more indian customers . xiaomi is a great fan and I have been always using Xiaomi phones .

EXTRACTIVE SUMMARY (Top 3 sentences):
--------------------------------------------------------------------------------
I am great fan of xiaomi and I have been always using Xiaomi phones. But I am disappointed with mobile designing of Xiaomi as most of all the lower range and mid range phones have same design. Xiaomi always being giving with so many features but bei

## Apply Summarization to Entire Dataset


In [9]:
print("Generating short summaries for all reviews...")
df_with_text['Short_Summary'] = df_with_text['Combined_Text'].apply(
    lambda x: summarize_with_t5(str(x), max_length=50, min_length=20) if len(str(x)) > 50 else str(x)
)

print("Generating detailed summaries for all reviews...")
df_with_text['Detailed_Summary'] = df_with_text['Combined_Text'].apply(
    lambda x: summarize_with_t5(str(x), max_length=100, min_length=40) if len(str(x)) > 50 else str(x)
)

print("Generating extractive summaries for all reviews...")
df_with_text['Extractive_Summary'] = df_with_text['Combined_Text'].apply(
    lambda x: extractive_summarize(str(x), num_sentences=2)
)

print("Summarization completed for all records!")


Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Generating short summaries for all reviews...


Your max_length is set to 50, but your input_length is only 19. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Your max_length is set to 50, but your input_length is only 14. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)
Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `ma

Generating detailed summaries for all reviews...


Your max_length is set to 100, but your input_length is only 19. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Your max_length is set to 100, but your input_length is only 14. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)
Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both

Generating extractive summaries for all reviews...
Summarization completed for all records!


In [10]:
summary_df = df_with_text[['Review Title', 'Customer name', 'Rating_Numeric', 
                            'Combined_Text', 'Short_Summary', 'Detailed_Summary', 
                            'Extractive_Summary', 'Word_Count']]

summary_df.to_csv('summarized_feedback.csv', index=False)
print("Summarized dataset saved to 'summarized_feedback.csv'")


Summarized dataset saved to 'summarized_feedback.csv'


In [11]:
df_full = pd.read_csv('cleaned_customer_feedback.csv')
df_summarized = pd.read_csv('summarized_feedback.csv')

df_merged = df_full.merge(df_summarized[['Combined_Text', 'Short_Summary', 'Detailed_Summary']], 
                          on='Combined_Text', how='left')

category_summaries = {}

for category in df_merged['Category'].unique():
    category_reviews = df_merged[df_merged['Category'] == category]['Combined_Text'].head(10).tolist()
    combined_text = ' '.join([str(text) for text in category_reviews if pd.notna(text)])
    
    if len(combined_text) > 100:
        category_summary = summarize_with_t5(combined_text[:1024], max_length=150, min_length=50)
        category_summaries[category] = category_summary

print("=" * 80)
print("CATEGORY-WISE SUMMARIES")
print("=" * 80)
for category, summary in category_summaries.items():
    print(f"\n{category.upper()}:")
    print("-" * 80)
    print(summary)
    print()


Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


CATEGORY-WISE SUMMARIES

DISPLAY:
--------------------------------------------------------------------------------
Redmi 6 Pro features a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC . the phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well .


OTHERS:
--------------------------------------------------------------------------------
vry small size mobile Full display not working in all application . good Wooo All youtubers are paid Realme is sub brand of oppo . but this one really disappointing. i am a hardcore fan of MI . I have 3 mobile of MI but i did not face these type of issue .


CAMERA:
--------------------------------------------------------------------------------
camera clarity is not good and average mobile I purchased first time mi mobile but not good compare to expect Bad camera Bad camera quality and heating problem . front camera is poor only 5m

## Sentiment-based Summarization


In [12]:
def create_sentiment_label(rating):
    if rating >= 4.0:
        return 'Positive'
    elif rating >= 3.0:
        return 'Neutral'
    else:
        return 'Negative'

df_merged['Sentiment'] = df_merged['Rating_Numeric'].apply(create_sentiment_label)

sentiment_summaries = {}

for sentiment in ['Positive', 'Negative', 'Neutral']:
    sentiment_reviews = df_merged[df_merged['Sentiment'] == sentiment]['Combined_Text'].head(10).tolist()
    combined_text = ' '.join([str(text) for text in sentiment_reviews if pd.notna(text)])
    
    if len(combined_text) > 100:
        sentiment_summary = summarize_with_t5(combined_text[:1024], max_length=150, min_length=50)
        sentiment_summaries[sentiment] = sentiment_summary

print("=" * 80)
print("SENTIMENT-BASED SUMMARIES")
print("=" * 80)
for sentiment, summary in sentiment_summaries.items():
    print(f"\n{sentiment.upper()} REVIEWS:")
    print("-" * 80)
    print(summary)
    print()


Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


SENTIMENT-BASED SUMMARIES

POSITIVE REVIEWS:
--------------------------------------------------------------------------------
Redmi 6 Pro features a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC . the phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well .


NEGATIVE REVIEWS:
--------------------------------------------------------------------------------
i have 3 mobile of MI but i did not face these type of issue . mobile data getting on and off automatically same things are happening with wifi, hot spot and bluetooth . these copy of the product Please don t buy these product .


NEUTRAL REVIEWS:
--------------------------------------------------------------------------------
vry small size mobile Full display not working in all application . good sound, good display, smooth touch, excellent battery life management, compact, nice photos and videos under good li

## Custom Summarization Function


In [13]:
def smart_summarize(text, method='t5', length='short'):
    if method == 't5':
        if length == 'short':
            return summarize_with_t5(text, max_length=50, min_length=20)
        else:
            return summarize_with_t5(text, max_length=100, min_length=50)
    elif method == 'extractive':
        num_sentences = 2 if length == 'short' else 4
        return extractive_summarize(text, num_sentences=num_sentences)
    else:
        return text


## Test Custom Function with Examples


In [14]:
test_review = df_with_text.iloc[0]['Combined_Text']

print("=" * 80)
print("CUSTOM SUMMARIZATION FUNCTION DEMO")
print("=" * 80)
print(f"\nOriginal Review:")
print("-" * 80)
print(test_review[:300] + "..." if len(test_review) > 300 else test_review)

print(f"\n\nT5 Short Summary:")
print("-" * 80)
print(smart_summarize(test_review, method='t5', length='short'))

print(f"\n\nT5 Detailed Summary:")
print("-" * 80)
print(smart_summarize(test_review, method='t5', length='detailed'))

print(f"\n\nExtractive Short Summary:")
print("-" * 80)
print(smart_summarize(test_review, method='extractive', length='short'))

print(f"\n\nExtractive Detailed Summary:")
print("-" * 80)
print(smart_summarize(test_review, method='extractive', length='detailed'))


Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


CUSTOM SUMMARIZATION FUNCTION DEMO

Original Review:
--------------------------------------------------------------------------------
Another Midrange killer Smartphone by Xiaomi Another Midrange killer Smartphone by Xiaomi Major Highlights The Redmi 6 Pro sports a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC The phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 ...


T5 Short Summary:
--------------------------------------------------------------------------------


Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Redmi 6 Pro features a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC . the phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well .


T5 Detailed Summary:
--------------------------------------------------------------------------------
Redmi 6 Pro features a 5.84 inch full HD display with a notch Powered by the Qualcomm Snapdragon 625 SoC . the phone is priced at Rs. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well .


Extractive Short Summary:
--------------------------------------------------------------------------------
Battery in Redmi 6 Pro Battery life is one of the strong suits of the Redmi 6 Pro. 10,999 for the 3GB RAM variant Start Point Battery of 4000 mAh and its Durability The Tripple Slots Dual AI Camera Well in Redmi 6 Series you will get 3 Different Smartphones with different sp

## Summary Statistics


In [15]:
summary_stats = pd.read_csv('summarized_feedback.csv')

summary_stats['Original_Length'] = summary_stats['Combined_Text'].str.len()
summary_stats['Short_Summary_Length'] = summary_stats['Short_Summary'].str.len()
summary_stats['Detailed_Summary_Length'] = summary_stats['Detailed_Summary'].str.len()
summary_stats['Compression_Ratio_Short'] = (summary_stats['Short_Summary_Length'] / summary_stats['Original_Length'] * 100).round(2)
summary_stats['Compression_Ratio_Detailed'] = (summary_stats['Detailed_Summary_Length'] / summary_stats['Original_Length'] * 100).round(2)

print("=" * 80)
print("SUMMARIZATION STATISTICS")
print("=" * 80)
print(f"\nTotal reviews summarized: {len(summary_stats)}")
print(f"\nAverage original text length: {summary_stats['Original_Length'].mean():.0f} characters")
print(f"Average short summary length: {summary_stats['Short_Summary_Length'].mean():.0f} characters")
print(f"Average detailed summary length: {summary_stats['Detailed_Summary_Length'].mean():.0f} characters")
print(f"\nAverage compression ratio (short): {summary_stats['Compression_Ratio_Short'].mean():.1f}%")
print(f"Average compression ratio (detailed): {summary_stats['Compression_Ratio_Detailed'].mean():.1f}%")
print("=" * 80)


SUMMARIZATION STATISTICS

Total reviews summarized: 259

Average original text length: 153 characters
Average short summary length: 72 characters
Average detailed summary length: 100 characters

Average compression ratio (short): 96.0%
Average compression ratio (detailed): 124.4%


## Save All Results


In [16]:
import json

final_results = {
    'category_summaries': category_summaries,
    'sentiment_summaries': sentiment_summaries,
    'statistics': {
        'total_reviews': len(summary_stats),
        'avg_original_length': float(summary_stats['Original_Length'].mean()),
        'avg_short_summary_length': float(summary_stats['Short_Summary_Length'].mean()),
        'avg_detailed_summary_length': float(summary_stats['Detailed_Summary_Length'].mean()),
        'avg_compression_ratio_short': float(summary_stats['Compression_Ratio_Short'].mean()),
        'avg_compression_ratio_detailed': float(summary_stats['Compression_Ratio_Detailed'].mean())
    }
}

with open('summarization_results.json', 'w') as f:
    json.dump(final_results, f, indent=4)

print("All summarization results saved to 'summarization_results.json'")


All summarization results saved to 'summarization_results.json'


In [17]:
print("\n" + "=" * 80)
print("PART 3: TEXT SUMMARIZATION - COMPLETED")
print("=" * 80)
print("\nDeliverables:")
print("1. summarized_feedback.csv - All reviews with summaries")
print("2. summarization_results.json - Category and sentiment summaries")
print("\nMethods Implemented:")
print("- T5 Transformer-based summarization (short & detailed)")
print("- Extractive summarization using TF-IDF + Cosine Similarity")
print("- Category-wise aggregated summaries")
print("- Sentiment-based summaries")
print("=" * 80)



PART 3: TEXT SUMMARIZATION - COMPLETED

Deliverables:
1. summarized_feedback.csv - All reviews with summaries
2. summarization_results.json - Category and sentiment summaries

Methods Implemented:
- T5 Transformer-based summarization (short & detailed)
- Extractive summarization using TF-IDF + Cosine Similarity
- Category-wise aggregated summaries
- Sentiment-based summaries
