In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# Download VADER lexicon if not already downloaded
nltk.download('vader_lexicon')


In [None]:
# Load the dataset into a DataFrame (replace 'music_album_reviews.csv' with your actual file path)
df = pd.read_csv('music_album_reviews.csv', sep=';')

# Display the first few rows of the dataset
print(df.head())


In [3]:
# Clean the data: remove or fill NaN values in the 'Review' column
df['Review'] = df['Review'].fillna('')  # Fill NaN values with empty strings


In [None]:
# Visualize the distribution of ratings
plt.figure(figsize=(10, 6))
plt.hist(df['Rating'], bins=20, edgecolor='k', alpha=0.7)
plt.xlabel('Rating')
plt.ylabel('Count')
plt.title('Distribution of Ratings')
plt.show()


In [None]:
# Display the count of ratings
rating_counts = df['Rating'].value_counts()
print("\nCount of Ratings:")
print(rating_counts)


In [6]:
# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()


In [7]:
# Define function to get VADER sentiment
def get_vader_sentiment(text):
    try:
        sentiment_scores = sid.polarity_scores(text)
        compound_score = sentiment_scores['compound']
        if compound_score > 0:
            return 'Positive'
        elif compound_score < 0:
            return 'Negative'
        else:
            return 'Neutral'
    except Exception as e:
        print(f"Error processing text: {text}. Error: {e}")
        return 'Neutral'


In [None]:
# Apply the sentiment analysis to the 'Review' column
df['Sentiment'] = df['Review'].apply(get_vader_sentiment)

# Display the DataFrame with the new column
print("\nDataFrame with Sentiment Analysis:")
print(df.head())


In [None]:
# Display a chart showing the distribution of positive, negative, and neutral reviews
sentiment_counts = df['Sentiment'].value_counts()
print("\nSentiment Counts:")
print(sentiment_counts)


In [None]:
# Convert sentiment_counts to lists for plotting
sentiments = sentiment_counts.index.tolist()
counts = sentiment_counts.values.tolist()

plt.figure(figsize=(10, 6))
plt.bar(sentiments, counts, color=['green', 'red', 'blue'], alpha=0.7)
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.title('Distribution of Positive, Negative, and Neutral Reviews')
plt.show()
