In [1]:

!pip install transformers textblob
!python -m textblob.download_corpora


[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


In [3]:
from google.colab import files
uploaded = files.upload()

import pandas as pd
from transformers import pipeline
from textblob import TextBlob
import torch
import io

device = 0 if torch.cuda.is_available() else -1
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)

csv_filename = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[csv_filename]))

def get_summary(text):
    try:
        return summarizer(text[:1000], max_length=120, min_length=40, do_sample=False)[0]['summary_text']
    except:
        return ""

def get_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    return "positive" if polarity > 0 else "negative" if polarity < 0 else "neutral"

def get_keywords(text):
    blob = TextBlob(text)
    return ', '.join(blob.noun_phrases[:5])

results = []
for i, row in df.iterrows():
    content = row.get("EV_Content", "")
    if isinstance(content, str) and len(content.strip()) > 30:
        summary = get_summary(content)
        sentiment = get_sentiment(content)
        keywords = get_keywords(content)
        results.append({
            "Original_Text": content[:300] + "...",
            "Summary": summary,
            "Sentiment": sentiment,
            "Key_Topics": keywords
        })

result_df = pd.DataFrame(results)
result_df.to_csv("EV_Insights_Summary.csv", index=False)
files.download("EV_Insights_Summary.csv")

Saving team_bhp_travelogues_ev_dynamic_data.csv to team_bhp_travelogues_ev_dynamic_data (1).csv


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Device set to use cuda:0
Your max_length is set to 120, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
from google.colab import files
uploaded = files.upload()

import pandas as pd
from transformers import pipeline
from textblob import TextBlob
import torch
import io

device = 0 if torch.cuda.is_available() else -1
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)

csv_filename = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[csv_filename]))

def get_summary(text):
    try:
        return summarizer(text[:1000], max_length=120, min_length=40, do_sample=False)[0]['summary_text']
    except:
        return ""

def get_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    return "positive" if polarity > 0 else "negative" if polarity < 0 else "neutral"

def get_keywords(text):
    blob = TextBlob(text)
    return ', '.join(blob.noun_phrases[:5])

def generate_story(summary, sentiment, keywords):
    tone = {
        "positive": "an uplifting",
        "negative": "a concerning",
        "neutral": "a factual"
    }[sentiment]
    return (
        f"This is {tone} glimpse into an EV travel experience. "
        f"The main highlights include {keywords}. "
        f"The journey reveals {summary.lower()} "
        f"This story reflects the evolving landscape of electric mobility in India."
    )

results = []
for i, row in df.iterrows():
    content = row.get("EV_Content", "")
    if isinstance(content, str) and len(content.strip()) > 30:
        summary = get_summary(content)
        sentiment = get_sentiment(content)
        keywords = get_keywords(content)
        deep_insight = generate_story(summary, sentiment, keywords)
        results.append({
            "Original_Text": content[:300] + "...",
            "Summary": summary,
            "Sentiment": sentiment,
            "Key_Topics": keywords,
            "Deep_Insights": deep_insight
        })

result_df = pd.DataFrame(results)
result_df.to_csv("EV_Insights_Summary.csv", index=False)
files.download("EV_Insights_Summary.csv")


Saving team_bhp_travelogues_ev_dynamic_data.csv to team_bhp_travelogues_ev_dynamic_data (2).csv


Device set to use cuda:0
Your max_length is set to 120, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>