# INSTALLING REQUIRED LIBRARIES

##### TWITTER SCRAPING

In [None]:
!pip3 install snscrape

##### TEXT SUMMARIZER

In [None]:
!pip install transformers

In [None]:
!pip install torch==1.9

In [None]:
!pip install sentencepiece

##### SENTIMENT ANALYSIS WITH FLAIR

In [None]:
!pip install flair

##### SENTIMENT ANALYSIS WITH VADER

In [None]:
!pip install vaderSentiment

# IMPORTING REQUIRED LIBRARIES



##### TWITTER SCRAPING

In [None]:
import snscrape.modules.twitter as sntwitter
import pandas as pd
from time import sleep
from tqdm import tqdm

##### TEXT SUMMARIZATION

In [None]:
import torch
import tensorflow as tf
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer

In [None]:
torch.__version__

##### SENTIMENT ANALYSIS USING FLAIR

In [None]:
from flair.models import TextClassifier
from flair.data import Sentence

In [None]:
import re

In [None]:
#Visualization 
import matplotlib.pyplot as plt
import numpy as np

##### SENTIMENT ANALYSIS WITH VADER

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# PROCESSING

##### TWITTER SCRAPING

In [None]:
def TwitterScraper(searchQuery):
  tweet_data = []
  number = 50
  for i, tweets in enumerate(sntwitter.TwitterSearchScraper('{}'.format(searchQuery)).get_items()):
    if i>number:
      break
    tweet_data.append([tweets.date, tweets.content, tweets.user.username, tweets.url])
  df = pd.DataFrame(tweet_data,columns=['Date','Tweets','Username','Url'])
  df.to_csv(f'{searchQuery}.csv',index=False,encoding='utf-8')
  tweetsDF = df['Tweets']
  return tweetsDF

SENTIMENT ANALYSIS WITH FLAIR

In [None]:
def sentiment_flair(query):
  # requires query to be in list format as input parameter
  query = query.tolist()
  classifier = TextClassifier.load('en-sentiment')
  outsflair=[]

  for i in query:
    sentence1 = Sentence(i)
    classifier.predict(sentence1)
    outsflair.append(sentence1.labels)
    
  return outsflair

In [None]:
def visualizeSentAna(tit,neg,pos):
  y = np.array([neg,pos])
  mylabels = ["Negative Reviews", "Positive Reviews"]
  mycolors = ["#FF495C", "#3DDC97"]

  plt.pie(y,labels=mylabels,
          colors=mycolors,
          autopct = '%1.2f%%',
          wedgeprops = {"edgecolor" : "black",
                        'linewidth': 2,
                        'antialiased': True})
  plt.title("Product Demand Analysis - Visualization\n" + tit, bbox={'facecolor':'#C4DCE9', 'pad':5})
  plt.show() 

In [None]:
def flair_sentiment_probs(inpQuery):
  outforflair = sentiment_flair(inpQuery)
  negCnt , posCnt = 0 , 0
  outf=[]
  for i in outforflair:
    # print(i[0])
    st = str(i[0])
    res = re.split(r"→ ",st)
    # print(res[1])
    res2 = re.split(r"\s",res[1])
    if res2[0] == 'NEGATIVE':
      negCnt = negCnt + 1
    elif res2[0] == 'POSITIVE':
      posCnt = posCnt + 1
  outf.append(negCnt)
  outf.append(posCnt)
  return outf
  # print(f"\nNegative reviews count : {negCnt} \nPositive reviews count : {posCnt}")

##### TEXT SUMMARIZER

In [None]:
def remove_emojis(data):
    emoj = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002500-\U00002BEF"  # chinese char
        u"\U00002702-\U000027B0"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        u"\U0001f926-\U0001f937"
        u"\U00010000-\U0010ffff"
        u"\u2640-\u2642" 
        u"\u2600-\u2B55"
        u"\u200d"
        u"\u23cf"
        u"\u23e9"
        u"\u231a"
        u"\ufe0f"  # dingbats
        u"\u3030"
                      "]+", re.UNICODE)
    return re.sub(emoj, '', data)

In [None]:
def TextSummaryPreProcessing(inpDF):
  #Converting to a String for Summarization
  tweetSummary = ''
  for i in inpDF:
    tweetSummary = tweetSummary + i

  # print(f"\n\nBEFORE : \n{tweetSummary}")
  
  #Preprocessing the data - removing tags and urls
  #URL
  tweetSummary = re.sub(r'http\S+', '', tweetSummary, flags=re.MULTILINE)
  #Tags
  tweetSummary = re.sub(r'@\S+', '', tweetSummary, flags=re.MULTILINE)
  tweetSummary = re.sub(r'@', '', tweetSummary, flags=re.MULTILINE)
  tweetSummary = re.sub(r'#\S+', '', tweetSummary, flags=re.MULTILINE)
  tweetSummary = re.sub(r'#', '', tweetSummary, flags=re.MULTILINE)
  tweetSummary = re.sub(r':', '', tweetSummary, flags=re.MULTILINE)
  tweetSummary = re.sub(r'-', '', tweetSummary, flags=re.MULTILINE)
  # tweetSummary = re.sub(r')', '', tweetSummary, flags=re.MULTILINE)
  # tweetSummary = re.sub(r'(', '', tweetSummary, flags=re.MULTILINE)
  tweetSummary = remove_emojis(tweetSummary)

  # print(f"\n\nAFTER : \n{tweetSummary}")
  return tweetSummary

In [None]:
def text_summerizer(InpData):
  #Preprocess
  iptext = TextSummaryPreProcessing(InpData)
  # Initialize the pretrained model
  model = T5ForConditionalGeneration.from_pretrained('t5-small') # t5-small --> model name
  tokenizer = AutoTokenizer.from_pretrained('t5-small')
  device = torch.device('cpu')

  # Preprocess the input text
  preprocessed_text = iptext.strip().replace('\n','')
  t5_input_text = 'Summarize: ' + preprocessed_text
  t5_input_text

  # Word length of input text
  len(t5_input_text.split())

  # Tokenizing each word 
  tokenized_text = tokenizer.encode(t5_input_text, return_tensors='pt', max_length=512).to(device)

  summary_ids = model.generate(tokenized_text, min_length=120, max_length=512)
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
  # print(f"\n\nSummary of Public Opinion : \n{summary}")
  return summary

##### FINAL OUTPUT

In [None]:
def ProductAnalysisFunc():
  sQuery = input("Enter the product name : ")
  query = TwitterScraper(sQuery)
  # print(type(query))
  # print(query)
  sentAn = flair_sentiment_probs(query)
  textSum = text_summerizer(query)
  print("\n\n\n\n\n")
  visualizeSentAna(sQuery,sentAn[0],sentAn[1])
  print(f"\nNegative reviews count : {sentAn[0]} \nPositive reviews count : {sentAn[1]}")
  print(f"\n\nSummary of Public Opinion : \n{textSum}")
  



  # querySummary = text_summerizer(query)
  # print(querySummary)

  # sentiment_flair() function requires query to be in list format as input parameter
  # query = query.tolist()

  # outforflair = sentiment_flair(query)
  # print(f"\n\nSENTIMENT ANALYSIS OUTPUT")
  # flair_sentiment_probs(outforflair)
  # flair_sentiment_probs(sQuery,query)

# OUTPUT

In [None]:
ProductAnalysisFunc()