# Libraries to use

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.support.ui import Select

import requests
from bs4 import BeautifulSoup
import re as re
import time
import pandas as pd
import numpy as np

import pymysql
import sqlalchemy as alch
from sqlalchemy import create_engine
from getpass import getpass

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords

from wordcloud import WordCloud
from langdetect import detect
from textblob import TextBlob


# Import Al-Jazeera file and creating the DF

In [2]:
import src.aljazeera as aj

In [3]:
lst_aj = aj.get_aljazeera() # obtaining all the links of ukraine news

In [4]:
dict_aj = aj.create_dict_aj(lst_aj) # creating a dictionary with links and content

In [5]:
df_aj_descr= aj.descriptives(dict_aj) # counts words 'ukraine','war','russia','putin','zelensky' and sentimental analysis.

In [6]:
df_aj= aj.create_df_aj(dict_aj) # creates a DataFrame with the links and articles, transposing and creating a new index

In [7]:
df_aj_all =df_aj.join(df_aj_descr) #join both DF

In [8]:
df_aj_all.to_csv('data/aljazeera.csv') #save data into a file

In [9]:
df_aj_all.sample()

Unnamed: 0,link,article,polarity,subjectivity,length,ukraine,war,russia,putin,zelensky
22,https://www.aljazeera.com/news/2023/2/24/un-co...,The United Nations General Assembly (UNGA) has...,0.04028,0.31566,4394,10,6,13,2,1


# Import BBC file and creating a DF 

In [10]:
import src.bbc as bbc

In [11]:
lst_bbc_ukr = bbc.get_bbc() # obtaining all the links of ukraine news

In [12]:
dict_article_bbc = bbc.bbc_list(lst_bbc_ukr) # creating a dictionary with links and content

In [13]:
df_descriptive = bbc.descriptives(dict_article_bbc) # counts words 'ukraine','war','russia','putin','zelensky' and sentimental analysis.

In [14]:
df_bbc = bbc.create_df_bbc(dict_article_bbc) # creates a DataFrame with the links and articles, tansposing and creating a new index

In [15]:
df_bbc_together = df_bbc.join(df_descriptive)#join both DF
df_bbc_together.to_csv('data/bbc.csv')
df_bbc_together.sample()

Unnamed: 0,link,article,polarity,subjectivity,length,ukraine,war,russia,putin,zelensky
120,https://www.bbc.co.uk/news/world-europe-61263856,The children surviving Ukraine’s war without t...,0.02778,0.18056,3341,11,13,2,0,0


# SQL 

## Getting into MySQL server and saving the data in tables

In [16]:
import src.sql_queries as sql

In [17]:
engine= sql.sql_connection() #starting the engine from SQL

MySQL password: ········


In [19]:
sql.include_into_sql(df_bbc_together,df_aj_all,engine) #to run once just to add the data into SQL

## Queries

### BBC queries

In [20]:
df_bbc_sql= pd.read_sql_query("""SELECT * FROM bbc """,con=engine)


In [21]:
df_bbc_sql = sql.df_bbc_sql_query(df_bbc_sql,engine)

In [22]:
df_bbc_sql= sql.bbc_df_clearance(df_bbc_sql)

In [23]:
df_bbc_sql.sample()

Unnamed: 0_level_0,polarity,subjectivity,(ukraine:length)*100,(war:length)*100,(russia:length)*100,(putin:length)*100,(zelensky:length)*100,link
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BBC,0.0725,0.36108,0.2593,0.124,0.1353,0.0,0.0789,https://www.bbc.co.uk/news/world-europe-64341337


### Aljazeera queries

In [24]:
df_aj_sql= pd.read_sql_query("""SELECT * FROM aljazeera """,con=engine)

In [25]:
df_aj_sql = sql.df_aj_sql_query(df_aj_sql,engine)

In [26]:
df_aj_sql = sql.aj_df_clearance(df_aj_sql)

In [27]:
df_aj_sql.sample()

Unnamed: 0_level_0,polarity,subjectivity,(ukraine:length)*100,(war:length)*100,(russia:length)*100,(putin:length)*100,(zelensky:length)*100,link
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Aljazeera,0.00658,0.26092,0.2749,0.1718,0.3093,0.0344,0.0,https://www.aljazeera.com/news/2023/2/21/russi...
