In [None]:
pip install tweepy pandas

In [None]:
pip install numpy==1.21.0

In [None]:
pip install pandas==1.3.0

In [None]:
pip install --upgrade tweepy

In [None]:
import tweepy
import pandas as pd

# Twitter API credentials
#bearer_token = 'AAAAAAAAAAAAAAAAAAAAAMngugEAAAAAHs84A%2FalMvMIqCt%2FV5NCZfZNvf0%3DYHxkzFJDoE2RnUhfyXHL4tPfsTRvi5hQZV07GvkO12S9duuiHb'

# Twitter API credentials
consumer_key = 'your_consumer_key'
consumer_secret = 'your_consumer_secret'
access_token = 'your_access_token'
access_token_secret = 'your_access_token_secret'

# Authenticate with the Twitter API
auth = tweepy.OAuth1UserHandler(consumer_key, consumer_secret, access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

# Define a function to extract tweets
def get_tweets(keyword, count):
    tweets = []
    for tweet in tweepy.Cursor(api.search_tweets, q=keyword, lang="en").items(count):
        tweets.append(tweet.text)
    return tweets

# Extract tweets
keyword = "your_keyword"
tweet_count = 1000
tweets = get_tweets(keyword, tweet_count)

# Create a DataFrame
df = pd.DataFrame(tweets, columns=['tweet'])
df.to_csv('tweets.csv', index=False)

In [None]:
#store the data in Hadoop's HDFS
hadoop fs -put tweets.csv /path/to/hdfs/tweets.csv

In [None]:
#data processing with Spark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf
from pyspark.sql.types import StringType
import re

# Initialize Spark session
spark = SparkSession.builder \
    .appName("TwitterSentimentAnalysis") \
    .getOrCreate()

# Load the data
df = spark.read.csv('/path/to/hdfs/tweets.csv', header=True)

# Define a UDF for text cleaning
def clean_tweet(tweet):
    tweet = re.sub(r'http\S+', '', tweet)
    tweet = re.sub(r'@\w+', '', tweet)
    tweet = re.sub(r'#', '', tweet)
    tweet = re.sub(r'RT', '', tweet)
    tweet = re.sub(r'\W', ' ', tweet)
    tweet = tweet.lower()
    return tweet

clean_tweet_udf = udf(lambda x: clean_tweet(x), StringType())

# Apply the UDF
df = df.withColumn('cleaned_tweet', clean_tweet_udf(col('tweet')))
df.show(5)


In [None]:
#perform sentiment analysis using a pre-trained model like VADER
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Define a UDF for sentiment analysis
def analyze_sentiment(tweet):
    score = analyzer.polarity_scores(tweet)
    if score['compound'] >= 0.05:
        return 'positive'
    elif score['compound'] <= -0.05:
        return 'negative'
    else:
        return 'neutral'

analyze_sentiment_udf = udf(lambda x: analyze_sentiment(x), StringType())

# Apply the UDF
df = df.withColumn('sentiment', analyze_sentiment_udf(col('cleaned_tweet')))
df.show(5)


In [None]:
#visualize sentiment distribution

import matplotlib.pyplot as plt

# Convert to Pandas DataFrame for visualization
pandas_df = df.toPandas()

# Plot the sentiment distribution
sentiment_counts = pandas_df['sentiment'].value_counts()
plt.figure(figsize=(8,6))
sentiment_counts.plot(kind='bar', color=['blue', 'green', 'red'])
plt.title('Sentiment Analysis of Tweets')
plt.xlabel('Sentiment')
plt.ylabel('Number of Tweets')
plt.show()


Deploying your analysis using a web framework like Flask or Django to allow real-time sentiment analysis.