##1. Import Libraries:

In [7]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split


##2. Load the Dataset:

In [8]:
# Replace 'your_dataset.csv' with the actual path to your dataset
df = pd.read_csv('/content/Tweets.csv')

# Display the first few rows of the dataset to understand its structure
print(df.head())


             tweet_id airline_sentiment  airline_sentiment_confidence  \
0  570306133677760513           neutral                        1.0000   
1  570301130888122368          positive                        0.3486   
2  570301083672813571           neutral                        0.6837   
3  570301031407624196          negative                        1.0000   
4  570300817074462722          negative                        1.0000   

  negativereason  negativereason_confidence         airline  \
0            NaN                        NaN  Virgin America   
1            NaN                     0.0000  Virgin America   
2            NaN                        NaN  Virgin America   
3     Bad Flight                     0.7033  Virgin America   
4     Can't Tell                     1.0000  Virgin America   

  airline_sentiment_gold        name negativereason_gold  retweet_count  \
0                    NaN     cairdin                 NaN              0   
1                    NaN    jnar

##3. Explore the Data:

In [9]:
# Check for missing values
print(df.isnull().sum())

# Explore the target variable (e.g., 'sentiment')
print(df['airline_sentiment'].value_counts())


tweet_id                            0
airline_sentiment                   0
airline_sentiment_confidence        0
negativereason                   5462
negativereason_confidence        4118
airline                             0
airline_sentiment_gold          14600
name                                0
negativereason_gold             14608
retweet_count                       0
text                                0
tweet_coord                     13621
tweet_created                       0
tweet_location                   4733
user_timezone                    4820
dtype: int64
negative    9178
neutral     3099
positive    2363
Name: airline_sentiment, dtype: int64


##4. Text Preprocessing:

In [10]:
# Download NLTK resources
nltk.download('stopwords')
nltk.download('punkt')

# Text preprocessing function
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()

    # Tokenize the text
    words = word_tokenize(text)

    # Remove stop words and punctuation
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word.isalnum() and word not in stop_words]

    # Join the words back into a string
    text = ' '.join(words)

    return text

# Apply preprocessing to the 'text' column
df['preprocessed_text'] = df['text'].apply(preprocess_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


##5. Split the Dataset:

In [11]:
# Split the dataset
train_data, test_data, train_labels, test_labels = train_test_split(
    df['preprocessed_text'], df['airline_sentiment'], test_size=0.2, random_state=42
)

# Display the shapes of the train and test sets
print(f"Train Data Shape: {train_data.shape}")
print(f"Test Data Shape: {test_data.shape}")


Train Data Shape: (11712,)
Test Data Shape: (2928,)


##Import Libraries:


In [12]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer


### Download VADER lexicon if not already downloaded

In [13]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

## Initialize the VADER sentiment analyzer

In [14]:
sia = SentimentIntensityAnalyzer()

## Sample customer feedback

In [15]:
customer_feedback = [
    "I love the competitor's product. It's amazing!",
    "The competitor's product is good, but it could be better.",
    "I dislike their customer support. It's terrible.",
    "The competitor's product is too expensive.",
]


## Analyze and generate insights from the feedback

In [16]:
for feedback in customer_feedback:
    sentiment_scores = sia.polarity_scores(feedback)

    # Determine the sentiment
    if sentiment_scores['compound'] >= 0.05:
        sentiment = "Positive"
    elif sentiment_scores['compound'] <= -0.05:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    # Print feedback and sentiment
    print(f"Feedback: {feedback}")
    print(f"Sentiment: {sentiment}")
    print(f"Sentiment Scores: {sentiment_scores}")
    print()


Feedback: I love the competitor's product. It's amazing!
Sentiment: Positive
Sentiment Scores: {'neg': 0.0, 'neu': 0.325, 'pos': 0.675, 'compound': 0.8516}

Feedback: The competitor's product is good, but it could be better.
Sentiment: Positive
Sentiment Scores: {'neg': 0.0, 'neu': 0.58, 'pos': 0.42, 'compound': 0.7003}

Feedback: I dislike their customer support. It's terrible.
Sentiment: Negative
Sentiment Scores: {'neg': 0.5, 'neu': 0.263, 'pos': 0.237, 'compound': -0.4588}

Feedback: The competitor's product is too expensive.
Sentiment: Neutral
Sentiment Scores: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}



In [17]:
# Generate overall insights
positive_feedback = sum(1 for feedback in customer_feedback if sia.polarity_scores(feedback)['compound'] >= 0.05)
negative_feedback = sum(1 for feedback in customer_feedback if sia.polarity_scores(feedback)['compound'] <= -0.05)
neutral_feedback = len(customer_feedback) - positive_feedback - negative_feedback

print("Overall Insights:")
print(f"Positive Feedback: {positive_feedback} out of {len(customer_feedback)}")
print(f"Negative Feedback: {negative_feedback} out of {len(customer_feedback)}")
print(f"Neutral Feedback: {neutral_feedback} out of {len(customer_feedback)}")


Overall Insights:
Positive Feedback: 2 out of 4
Negative Feedback: 1 out of 4
Neutral Feedback: 1 out of 4
