In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('../data/Tweets.csv')
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data.shape

In [None]:
data.columns

In [None]:
data["text"].head()

In [None]:
data["text"].str.lower().head()

In [None]:
data["airline"].value_counts()

In [None]:
# calculating value counts for "airline_sentiment"
sentiment_counts = data['airline_sentiment'].value_counts()

plt.figure(figsize=(8, 5))
sentiment_counts.plot(kind='bar')
plt.xlabel('Airline Sentiment')
plt.ylabel('Number of Tweets')
plt.title('Distribution of Airline Sentiments')
plt.xticks(rotation=0)
plt.tight_layout()
plt.savefig('airline_sentiment_distribution_bar_chart.png')
plt.show()
print("Distribution of Airline Sentiments chart is saved as airline_sentiment_distribution_bar_chart.png")


In [None]:
# grouping the DataFrame by "airline" and calculating value counts for "airline_sentiment"
sentiment_by_airline = data.groupby('airline')['airline_sentiment'].value_counts().unstack(fill_value=0).reset_index()
sentiment_by_airline = sentiment_by_airline.melt(id_vars='airline', var_name='airline_sentiment', value_name='count')

pivot_df = sentiment_by_airline.pivot(index='airline', columns='airline_sentiment', values='count')

pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.xlabel('Airline')
plt.ylabel('Number of Tweets')
plt.title('Sentiment Distribution by Airline')
plt.legend(title='Airline Sentiment')
plt.tight_layout()
plt.savefig('sentiment_distribution_by_airline_stacked_bar_chart_matplotlib.png')
plt.show()
print("Sentiment Distribution by Airline chart is saved as sentiment_distribution_by_airline_stacked_bar_chart_matplotlib.png")

In [None]:
# filtering the DataFrame to include only rows where "airline_sentiment" is 'negative'
negative_tweets = data[data['airline_sentiment'] == 'negative']

# grouping the filtered DataFrame by both "airline" and "negativereason" and calculate value counts
reason_by_airline = negative_tweets.groupby(['airline', 'negativereason']).size().reset_index(name='count')

# Pivot the data for grouped bar plotting
pivot_reason = reason_by_airline.pivot(index='negativereason', columns='airline', values='count')

# Plot the grouped bar chart using matplotlib
pivot_reason.plot(kind='bar', figsize=(14, 7))
plt.xlabel('Reason for Negative Sentiment')
plt.ylabel('Number of Negative Tweets')
plt.title('Reasons for Negative Sentiment by Airline')
plt.legend(title='Airline')
plt.tight_layout()
plt.savefig('negative_sentiment_reasons_by_airline_grouped_bar_chart_matplotlib.png')
plt.show()
print("Reasons for Negative Sentiment by Airline chart is saved as negative_sentiment_reasons_by_airline_grouped_bar_chart_matplotlib.png")


In [None]:
# Ensuring that the "text" and "airline_sentiment" columns has unique datatypes
num_text_types = data['text'].apply(type).nunique()
num_sentiment_types = data['airline_sentiment'].apply(type).nunique()
print(f"Number of datatypes in 'text': {num_text_types}")
print(f"Number of datatypes in 'airline_sentiment': {num_sentiment_types}")