In [2]:
import pandas as pd 
import numpy as np 
import tensorflow as tf 
from textblob import TextBlob
from transformers import T5Tokenizer, T5ForConditionalGeneration
import language_tool_python


  from .autonotebook import tqdm as notebook_tqdm


data prep

In [3]:
df5 = pd.read_csv('codefest_dataset.csv')

In [4]:
def get_business_reviews(business_name):
    reviews = df5[df5['name'] == business_name]['text'].tolist()
    return " ".join(reviews)

def generate_summary(text, bullet_points):
    tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base")
    model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
    # Prompt for more emphasis on certain words.
    prompt = (
        "summarize: Summarize the following reviews into 3 distinct bullet points that are grammatically correct and provide clear context.\n\n"
        "Reviews:\n" + text
    )  
    input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
    output = model.generate(
        input_ids,
        max_length=500,
        min_length=50,
        num_beams=6,
        early_stopping=True
    )
    
    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    sentence = [s.strip() for s in summary.split('. ') if s.strip()]
    bullet = ["- " + sen for sen in sentence][:bullet_points]
    
    return bullet

def correct_grammer(bullet_list):
    points = []
    tool = language_tool_python.LanguageTool('en-US')
    for point in bullet_list:
        corrected = tool.correct(point)
        points.append(corrected)
    return points

def get_prediction(business_name):
    reviews = get_business_reviews(business_name)
    summary = generate_summary(reviews, bullet_points=3)
    
    print("Original:", summary)
    
    correct_summary = correct_grammer(summary)
    return correct_summary



In [7]:
blob = TextBlob(get_business_reviews("Eazor's Auto Salon"))

In [10]:
polarity = blob.sentiment[0]

In [13]:
blob.sentiment

Sentiment(polarity=0.24221213248557005, subjectivity=0.5457794462481961)

In [11]:
positive_percent = ((polarity + 1) / 2) * 100
negative_percent = 100 - positive_percent

In [12]:
print("Positive: {:.2f}%".format(positive_percent))
print("Negative: {:.2f}%".format(negative_percent))

Positive: 62.11%
Negative: 37.89%


In [14]:
subjectivity = blob.sentiment.subjectivity
subjectivity_percent = subjectivity * 100
objectivity_percent = 100 - subjectivity_percent

In [18]:
records = []
for i in range(len(df5)):
    business_name = df5['name'].loc[i]
    blob = TextBlob(get_business_reviews(business_name))
    polarity = blob.sentiment[0]
    subjectivity = blob.sentiment.subjectivity
    positive_percent = ((polarity + 1) / 2) * 100
    negative_percent = 100 - positive_percent
    subjectivity_percent = subjectivity * 100
    objectivity_percent = 100 - subjectivity_percent
    final_summary = get_prediction(business_name)
    #print("Positive: {:.2f}%".format(positive_percent))
    #print("Negative: {:.2f}%".format(negative_percent))     
    #print("Subjectivity: {:.2f}%".format(subjectivity_percent))
    #print("Objectivity: {:.2f}%".format(objectivity_percent))
    review_1 = final_summary[0] if len(final_summary) > 0 else ""
    review_2 = final_summary[1] if len(final_summary) > 1 else ""
    review_3 = final_summary[2] if len(final_summary) > 2 else ""
    #for bullet in final_summary:
    #    print(bullet)
    record = {
        "Business_name": business_name,
        "positive_percent": positive_percent,
        "negative_percent": negative_percent,
        "subjectivity_percent": subjectivity_percent,
        "objectivity_percent": objectivity_percent,
        "review_1": review_1,
        "review_2": review_2,
        "review_3": review_3
    }
    records.append(record)
results_df = pd.DataFrame(records)
results_df.to_csv("codefest_results.xksx", index=False)

Original: ['- i bought my car at Cochran and the salesman was excellent', "- i've taken the car back two times since for routine maintenance", "- i'm planning on getting a new car in a little over a year from now ."]
Original: ['- reviewer: "this place SUCKS', '- all they care about is the sale, not YOU!" reviewer: "i was first on the list for a test drive on a car that was a solid deal" reviewer: "they asked me to have a seat in the waiting room because someone is buying the car"']
Original: ["- reviewer's parents have been buying cars off of Donna Dunnivan for over a decade", '- "i couldn\'t have been more pleased with the transaction!" "my being in Japan was largely irrelevant" "i dropped my car off on a Wednesday morning for a diagnostic with John Salmon"']
Original: ["- cochran hyundai's check engine light came on just before a 1000 mile driving trip", '- the cause was determined to be minor, and in fact, had already been corrected', '- the service center has me HIGHLY disappointe

KeyboardInterrupt: 

In [21]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [22]:
results_df = pd.DataFrame(records)
results_df.to_excel("codefest_results.xlsx", index=False)

In [23]:
results_df

Unnamed: 0,Business_name,positive_percent,negative_percent,subjectivity_percent,objectivity_percent,review_1,review_2,review_3
0,#1 Cochran Buick GMC of Monroeville,52.823651,47.176349,40.699183,59.300817,- I bought my car at Cochran and the salesman ...,- I've taken the car back two times since for ...,- I'm planning on getting a new car in a littl...
1,#1 Cochran Buick GMC of Robinson,57.690078,42.309922,52.252409,47.747591,"- reviewer: ""this place SUCKS","- all they care about is the sale, not YOU!"" r...",
2,#1 Cochran Cadillac - Monroeville,52.335011,47.664989,38.452671,61.547329,- reviewer's parents have been buying cars off...,"- ""I couldn't have been more pleased with the ...",
3,#1 Cochran Hyundai - Monroeville,59.957997,40.042003,46.671758,53.328242,- Cochran Hyundai's check engine light came on...,"- the cause was determined to be minor, and in...",- the service center has me HIGHLY disappointe...
4,#1 Cochran Hyundai of South Hills,55.833488,44.166512,44.979958,55.020042,- reviewer's issues with Cochran Hyundai revol...,- Cochran Hyundai of south hills gave me a quo...,- when it came time to purchase they pulled ou...
...,...,...,...,...,...,...,...,...
517,Aurochs Brewing,64.893821,35.106179,53.268395,46.731605,- aurochs is a brewery that makes all of its b...,"- all the beers tasted pretty good, and I was ...",- the staff was great and really took the time...
518,Authentic Pizzeria & Deli,47.923089,52.076911,52.874060,47.125940,- authentic pizza was voted the best in the tr...,- the pizza itself was just plain bad,- the crust was a bit chewy and had absolutely...
519,Auto Addictions,66.666667,33.333333,61.541667,38.458333,- reviews: unprofessional sloppy work,- Install was not checked for accuracy and cos...,- '03 Audi all road and they did a great job t...
520,Auto Asylum Car Care Center,55.762626,44.237374,33.752525,66.247475,"- reviewer bought a Groupon for this place, bu...",- Groupon and I have both tried contacting the...,- car looks like new! Much better job than oth...
