In [1]:
pip install transformers

Note: you may need to restart the kernel to use updated packages.




In [6]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from torch.utils.data import Dataset

# Load data
file_path = 'test.csv'
df = pd.read_csv(file_path)

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("Falconsai/text_summarization")

# Load the fine-tuned model
model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/text_summarization")

# Function to preprocess and tokenize an article
def preprocess_and_tokenize(article):
    return tokenizer(article, truncation=True, padding='max_length', max_length=512, return_tensors="pt")

# Function to generate a summary with specified emotion
def generate_summary_with_emotion(article, model, emotion):
    prompt = f"Generate a {emotion} summary of the article: {article}"
    input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=512, padding='max_length')
    summary_ids = model.generate(input_ids, max_length=150, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to apply the summarization pipeline with specified emotion to a list of articles
def summarization_pipeline_with_emotion(articles, emotion):
    tokenized_articles = [preprocess_and_tokenize(article) for article in articles]
    summaries = [generate_summary_with_emotion(article, model, emotion) for article in articles]
    return summaries




In [7]:
# Example usage
articles_to_summarize = ['Young Derich Born’s cheekbones were not always sharp enough to peel apples. New research and conservation work on the 1533 portrait by Hans Holbein the Younger has revealed that the painter made repeated small changes, each time giving Born even more chiseled features.Young Derich Born’s cheekbones were not always sharp enough to peel apples. New research and conservation work on the 1533 portrait by Hans Holbein the Younger has revealed that the painter made repeated small changes, each time giving Born even more chiseled features.The dazzling result bears an inscription on the parapet under Born’s arm, presumably composed by Holbein. Lifted from a near-identical tribute to Albrecht Dürer, the translated statement boasts: “If you added a voice, this would be Derich his very self. You would be in doubt whether the painter or his father made him.”The painting, newly conserved after work at the J. Paul Getty Museum in Los Angeles, California, was once owned by Charles I, and bought back by Charles II as he tried to reassemble the Royal Collection scattered under the politician Oliver Cromwell. It will now star in an exhibition at the Queen’s Gallery in Buckingham Palace, opening November 10, which traces the work in England of the German-born Holbein, and his rise to become the favorite artist of the Tudor court.Born was not royal but a German merchant in the Steelyard in London, the Thames-side headquarters of the Hanseatic League — a powerful group of traders, at least seven of whom commissioned portraits from Holbein, with two now surviving in the Royal Collection, according to the exhibition catalog. Born was just 23 when he commissioned his portrait, and was evidently as arrogant as his features. He was expelled from England in 1541 after getting into an unwise dispute with the powerful Duke of Suffolk, and according to the show’s curator, Kate Heard, was still grumbling about it a decade later.','It’s safe to say that Harry Kane is enjoying life in Germany.The England captain made his high-profile move from boyhood club Tottenham to Bayern Munich in August and he hasn’t looked back.After netting a brace against Heidenheim on Saturday, the striker took his tally to 17 league goals this season. No player has scored more after 11 Bundesliga games, with Bayern’s former talisman Robert Lewandowski holding the previous record of 16.Kane, the league’s top scorer, also has four goals in the Champions League with Bundesliga leaders Bayern yet to drop points in the group stage.“It is impressive,” Bayern Munich’s sports director Christoph Freund said, per Reuters, after Kane’s double in the team’s 4-2 win against Heidenheim.“He is playing away from the UK for the first time and then he scores like that. The team also plays him into position. He is a phenomenon.”Kane, the Premier League’s second all-time scorer, was already considered a world-class talent before his switch from England to Germany, but it was unknown whether he could emulate his form at Tottenham.The 30-year-old has answered that question emphatically, ripping up the record books in the process.']
emotion = "positive"  # Change this to the desired emotion (e.g., neutral, positive, negative, etc.)
summaries = summarization_pipeline_with_emotion(articles_to_summarize, emotion)

# Print or use the generated summaries
for i, summary in enumerate(summaries):
    print(f"{emotion.capitalize()} Summary for Article {i + 1}:\n{summary}")

Positive Summary for Article 1:
New research and conservation work on the 1533 portrait by Hans Holbein the Younger has revealed that the painter made repeated small changes, each time giving Born even more chiseled features.Young Derich Born’s cheekbones were not always sharp enough to peel apples.The dazzling result bears an inscription on the parapet under Born’s arm, presumably composed by Holbein. The painting, newly conserved after work at the J. Paul Getty Museum in Los Angeles, California, was once owned
Positive Summary for Article 2:
Harry Kane made his high-profile move from Tottenham to Bayern Munich in August and he hasn’t looked back.Kane, the Premier League’s second all-time scorer, also has four goals in the Champions League with Bayern yet to drop points in the group stage.“It is impressive,” Bayern Munich’s sports director Christoph Freund said, per Reuters, after Kane’s double in the team’s 4-2 win against Heidenheim.“He is playing away from the UK for the first time

In [8]:
articles_to_summarize = ['Whatever has gone wrong? After China rejoined the world economy in 1978, it became the most spectacular growth story in history. Farm reform, industrialisation and rising incomes lifted nearly 800m people out of extreme poverty. Having produced just a tenth as much as America in 1980, China’s economy is now about three-quarters the size. Yet instead of roaring back after the government abandoned its “zero-covid” policy at the end of 2022, it is lurching from one ditch to the next.','Donald Trump is promising a second presidency that would be an aberration in American history.The former and possibly future commander in chief aspires to strongman power if he wins back the White House next year. He believes his authority would be absolute. He wants vengeance against his political enemies. He’d pose the greatest challenge to the rule of law and the Constitution in modern times, seek to crush press freedoms and gut the machinery of government.None of this is speculation. Trump is saying and showing exactly what he would do in his rallies, social media posts, interviews, lawyers’ filings and even appearances in court that he uses to stigmatize the legal system. And Trump’s ambitions should be taken seriously because one year from the election, President Joe Biden’s reelection hopes are far from secure.Take Trump’s speech in New Hampshire on Saturday, when he chose to rip at national divides rather than foster unity on Veterans Day.“We will root out the communists, Marxists, fascists and the radical left thugs that live like vermin within the confines of our country,” Trump said, using the demagogic technique of dehumanizing his opponents. He warned that “the real threat is not from the radical right. The real threat is from the radical left, and it’s growing every day.”']
emotion = "sarcastic"  # Change this to the desired emotion (e.g., neutral, positive, negative, etc.)
summaries = summarization_pipeline_with_emotion(articles_to_summarize, emotion)

# Print or use the generated summaries
for i, summary in enumerate(summaries):
    print(f"{emotion.capitalize()} Summary for Article {i + 1}:\n{summary}")

Sarcastic Summary for Article 1:
China rejoined the world economy in 1978, it became the most spectacular growth story in history. Farm reform, industrialisation and rising incomes lifted nearly 800m people out of extreme poverty. After China rejoined the world economy in 1978, it became the most spectacular growth story in history.
Sarcastic Summary for Article 2:
Trump is promising a second presidency that would be an aberration in American history.The former and possibly future commander in chief aspires to strongman power if he wins back the White House next year. He believes his authority would be absolute. He wants vengeance against his political enemies.


In [9]:
df

Unnamed: 0,id,article,highlights
0,92c514c913c0bdfe25341af9fd72b29db544099b,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,caabf9cbdf96eb1410295a673e953d304391bfbb,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."
...,...,...,...
11485,ed8674cc15b29a87d8df8de1efee353d71122272,Our young Earth may have collided with a body ...,Oxford scientists say a Mercury-like body stru...
11486,2f58d1a99e9c47914e4b1c31613e3a041cd9011e,A man facing trial for helping his former love...,Man accused of helping former lover kill woman...
11487,411f6d57825161c3a037b4742baccd6cd227c0c3,A dozen or more metal implements are arranged ...,Marianne Power tried the tuning fork facial at...
11488,b5683ef8342056b17b068e0d59bdbe87e3fe44ea,Brook Lopez dominated twin brother Robin with ...,Brooklyn Nets beat the Portland Trail Blazers ...


In [10]:
articles_to_summarize = ["Ever noticed how plane seats appear to be getting smaller and smaller? With increasing numbers of people taking to the skies, some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable - it's putting our health and safety in danger. More than squabbling over the arm rest, shrinking space on planes putting our health and safety in danger? This week, a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes, it doesn't stipulate a minimum amount of space for humans. 'In a world where animals have more rights to space and food than humans,' said Charlie Leocha, consumer representative on the committee.Â 'It is time that the DOT and FAA take a stand for humane treatment of passengers.' But could crowding on planes lead to more serious issues than fighting for space in the overhead lockers, crashing elbows and seat back kicking? Tests conducted by the FAA use planes with a 31 inch pitch, a standard which on some airlines has decreased . Many economy seats on United Airlines have 30 inches of room, while some airlines offer as little as 28 inches . Cynthia Corbertt, a human factors researcher with the Federal Aviation Administration, that it conducts tests on how quickly passengers can leave a plane. But these tests are conducted using planes with 31 inches between each row of seats, a standard which on some airlines has decreased, reported the Detroit News. The distance between two seats from one point on a seat to the same point on the seat behind it is known as the pitch. While most airlines stick to a pitch of 31 inches or above, some fall below this. While United Airlines has 30 inches of space, Gulf Air economy seats have between 29 and 32 inches, Air Asia offers 29 inches and Spirit Airlines offers just 28 inches. British Airways has a seat pitch of 31 inches, while easyJet has 29 inches, Thomson's short haul seat pitch is 28 inches, and Virgin Atlantic's is 30-31.","A drunk teenage boy had to be rescued by security after jumping into a lions' enclosure at a zoo in western India. Rahul Kumar, 17, clambered over the enclosure fence at theÂ Kamla Nehru Zoological Park in Ahmedabad, and began running towards the animals, shouting he would 'kill them'. Mr Kumar explained afterwards that he was drunk and 'thought I'd stand a good chance' against the predators. Next level drunk: Intoxicated Rahul Kumar, 17, climbed into the lions' enclosure at a zoo in Ahmedabad and began running towards the animals shouting 'Today I kill a lion!' Mr Kumar had been sitting near the enclosure when he suddenly made a dash for the lions, surprising zoo security. The intoxicated teenager ran towards the lions, shouting: 'Today I kill a lion or a lion kills me!' A zoo spokesman said: 'Guards had earlier spotted him close to the enclosure but had no idea he was planing to enter it. 'Fortunately, there are eight moats to cross before getting to where the lions usually are and he fell into the second one, allowing guards to catch up with him and take him out. 'We then handed him over to the police.' Brave fool: Fortunately, Mr Kumar  fell into a moat as he ran towards the lions and could be rescued by zoo security staff before reaching the animals (stock image) Kumar later explained: 'I don't really know why I did it. 'I was drunk and thought I'd stand a good chance.' A police spokesman said: 'He has been cautioned and will be sent for psychiatric evaluation. 'Fortunately for him, the lions were asleep and the zoo guards acted quickly enough to prevent a tragedy similar to that in Delhi.' Last year a 20-year-old man was mauled to death by a tiger in the Indian capital after climbing into its enclosure at the city zoo."]
emotion = 'positive'
summaries = summarization_pipeline_with_emotion(articles_to_summarize, emotion)

# Print or use the generated summaries
for i, summary in enumerate(summaries):
    print(f"{emotion.capitalize()} Summary for Article {i + 1}:\n{summary}")

Positive Summary for Article 1:
'The shrinking space on planes is not only uncomfortable - it's putting our health and safety in danger,' said Consumer Representative on the committee. 'It is time that the DOT and FAA take a stand for humane treatment of passengers'
Positive Summary for Article 2:
Rahul Kumar, 17, climbed into a lions' enclosure at a zoo in Ahmedabad. He ran towards the animals shouting: 'Today I kill a lion or a lion kills me!' Mr Kumar had been sitting near the enclosure when he suddenly made a dash for the lions. He was drunk and 'thought I'd stand a good chance' against the predators.


In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Assuming df is your DataFrame containing the dataset
# You might need to adjust the column names based on your actual dataset structure

# Extract highlights from the dataset
highlights_from_dataset = df['highlights'].tolist()

# Tokenize and vectorize the highlights using TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(highlights_from_dataset)

# Function to calculate cosine similarity between a generated summary and dataset highlights
def calculate_similarity(summary, tfidf_matrix, vectorizer):
    summary_vector = vectorizer.transform([summary])
    similarity_scores = cosine_similarity(summary_vector, tfidf_matrix)
    return similarity_scores

# Function to find the most similar article based on highlights
def find_most_similar_article(summary, df, tfidf_matrix, vectorizer):
    similarity_scores = calculate_similarity(summary, tfidf_matrix, vectorizer)
    most_similar_index = similarity_scores.argmax()
    most_similar_article = df.loc[most_similar_index, 'article']
    similarity_score = similarity_scores[0, most_similar_index]
    return most_similar_article, similarity_score

# Example usage
for i, summary in enumerate(summaries):
    most_similar_article, similarity_score = find_most_similar_article(summary, df, tfidf_matrix, vectorizer)
    print(f"Summary for Article {i + 1}:\n{summary}")
    print(f"Most Similar Article:\n{most_similar_article}")
    print(f"Cosine Similarity Score: {similarity_score}\n")


Summary for Article 1:
'The shrinking space on planes is not only uncomfortable - it's putting our health and safety in danger,' said Consumer Representative on the committee. 'It is time that the DOT and FAA take a stand for humane treatment of passengers'
Most Similar Article:
Ever noticed how plane seats appear to be getting smaller and smaller? With increasing numbers of people taking to the skies, some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable - it's putting our health and safety in danger. More than squabbling over the arm rest, shrinking space on planes putting our health and safety in danger? This week, a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes, it doesn't stipulate a minimum amount of space for humans. 'In a world where anim