In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import string
from heapq import nlargest

# Load the text
with open('spaceX_DP.txt', 'r') as file:
    text = file.read()

# Preprocess text
nltk.download('punkt')
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
punctuation = set(string.punctuation)

def preprocess(text):
    words = word_tokenize(text.lower())
    words = [word for word in words if word not in stop_words and word not in punctuation]
    return words

words = preprocess(text)

# Build word frequency
word_frequencies = nltk.FreqDist(words)
maximum_frequency = max(word_frequencies.values())
for word in word_frequencies:
    word_frequencies[word] /= maximum_frequency

# Tokenize sentences and score them
sentences = sent_tokenize(text)
sentence_scores = {}

for sent in sentences:
    sentence_words = preprocess(sent)
    score = sum(word_frequencies.get(word, 0) for word in sentence_words)
    sentence_scores[sent] = score

#Get summary
summary_sentences = nlargest(5, sentence_scores, key=sentence_scores.get)
summary = ' '.join(summary_sentences)

#Output summary and compare lengths
original_length = len(text.split())
summary_length = len(summary.split())

print(f"Summary:\n{summary}")
print(f"\nOriginal Length: {original_length} words")
print(f"Summary Length: {summary_length} words")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Summary:
Given the fact that Boca Chica is much closer to communities than other major spaceports, and that three massive LNG export terminals are planned only 5 miles away, the groups express concern about the FAA’s objectivity when it comes to safety evaluations.Maria Galasso of Save RGV said, “The FAA has already demonstrated that they are willing to rubber-stamp SpaceX’s plans and look the other way when the company does not meet its commitments, how are we supposed to trust them when they say launching massive rockets just 5 miles away from South Padre Island and Port Isabel and the LNG export terminals is safe?” After months of delays and testing setbacks, a prototype of SpaceX’s Starship next-generation reusable launch vehicle made a brief “hop” flight Aug. 4 at the company’s South Texas test site.The Starship SN5 vehicle, powered by a single Raptor engine, lifted off shortly before 8 p.m. Eastern at SpaceX’s site in Boca Chica, Texas. It urged the FAA to “develop a new or suppl

In [3]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [5]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Load the text for sentiment analysis (from previous exercise)
with open('SpaceX.txt', 'r') as file:
    text = file.read()

# Step 1: Initialize the sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Step 2: Get sentiment scores
sentiment = analyzer.polarity_scores(text)

# Step 3: Output sentiment results
print(f"Compound: {sentiment['compound']}")
print(f"Positive: {sentiment['pos']}")
print(f"Neutral: {sentiment['neu']}")
print(f"Negative: {sentiment['neg']}")


Compound: 0.5999
Positive: 0.076
Neutral: 0.863
Negative: 0.061


###Whether the text is more subjective or objective

In [6]:
from textblob import TextBlob

#Analyze subjectivity
blob = TextBlob(text)
subjectivity = blob.sentiment.subjectivity


print(f"Subjectivity: {subjectivity}")
if subjectivity > 0.5:
    print("The text is more subjective.")
else:
    print("The text is more objective.")


Subjectivity: 0.30897435897435904
The text is more objective.
